From 3ca68df6ee61e1a2034f3307b9edb9b3d87e5ca1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 20:11:25 -0400
Subject: [GFS2] split gfs2_dinode into on-disk and host variants

The latter is used as part of gfs2-private part of struct inode.
It actually stores a lot of fields differently; for now the
declaration is just cloned, inode field is swtiched and changes
propagated.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c  | 4 ++--
 fs/gfs2/ondisk.c | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index d470e5286e..191a3df93d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -48,7 +48,7 @@
 void gfs2_inode_attr_in(struct gfs2_inode *ip)
 {
 	struct inode *inode = &ip->i_inode;
-	struct gfs2_dinode *di = &ip->i_di;
+	struct gfs2_dinode_host *di = &ip->i_di;
 
 	inode->i_ino = ip->i_num.no_addr;
 
@@ -98,7 +98,7 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 void gfs2_inode_attr_out(struct gfs2_inode *ip)
 {
 	struct inode *inode = &ip->i_inode;
-	struct gfs2_dinode *di = &ip->i_di;
+	struct gfs2_dinode_host *di = &ip->i_di;
 	gfs2_assert_withdraw(GFS2_SB(inode),
 		(di->di_mode & S_IFMT) == (inode->i_mode & S_IFMT));
 	di->di_mode = inode->i_mode;
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 1025960b0e..52cb9a2dc0 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -153,7 +153,7 @@ void gfs2_quota_in(struct gfs2_quota *qu, const void *buf)
 	qu->qu_value = be64_to_cpu(str->qu_value);
 }
 
-void gfs2_dinode_in(struct gfs2_dinode *di, const void *buf)
+void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf)
 {
 	const struct gfs2_dinode *str = buf;
 
@@ -187,7 +187,7 @@ void gfs2_dinode_in(struct gfs2_dinode *di, const void *buf)
 
 }
 
-void gfs2_dinode_out(const struct gfs2_dinode *di, void *buf)
+void gfs2_dinode_out(const struct gfs2_dinode_host *di, void *buf)
 {
 	struct gfs2_dinode *str = buf;
 
@@ -221,7 +221,7 @@ void gfs2_dinode_out(const struct gfs2_dinode *di, void *buf)
 
 }
 
-void gfs2_dinode_print(const struct gfs2_dinode *di)
+void gfs2_dinode_print(const struct gfs2_dinode_host *di)
 {
 	gfs2_meta_header_print(&di->di_header);
 	gfs2_inum_print(&di->di_num);
-- 
cgit v1.2.2


From 5c6edb576f3800723bb65dbfaff82517089e32d0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 20:33:01 -0400
Subject: [GFS2] gfs2_dinode_host fields are host-endian

Annotated scalar fields, dropped unused ones.  Note that
it's not at all obvious that we want to convert all of them
to host-endian...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/incore.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 118dc693d1..1c876e0fb4 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -229,7 +229,7 @@ struct gfs2_inode {
 	unsigned long i_flags;		/* GIF_... */
 
 	u64 i_vn;
-	struct gfs2_dinode i_di; /* To be replaced by ref to block */
+	struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
 
 	struct gfs2_glock *i_gl; /* Move into i_gh? */
 	struct gfs2_holder i_iopen_gh;
-- 
cgit v1.2.2


From f50dfaf78c01df3cc2d8819f07d6661915567bae Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 20:45:02 -0400
Subject: [GFS2] split gfs2_sb

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/incore.h | 2 +-
 fs/gfs2/ondisk.c | 2 +-
 fs/gfs2/super.c  | 2 +-
 fs/gfs2/super.h  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 1c876e0fb4..bd596ba74e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -450,7 +450,7 @@ struct gfs2_sbd {
 	struct super_block *sd_vfs_meta;
 	struct kobject sd_kobj;
 	unsigned long sd_flags;	/* SDF_... */
-	struct gfs2_sb sd_sb;
+	struct gfs2_sb_host sd_sb;
 
 	/* Constants computed on mount */
 
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 52cb9a2dc0..5b32f1a357 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -79,7 +79,7 @@ static void gfs2_meta_header_print(const struct gfs2_meta_header *mh)
 	pv(mh, mh_format, "%u");
 }
 
-void gfs2_sb_in(struct gfs2_sb *sb, const void *buf)
+void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
 {
 	const struct gfs2_sb *str = buf;
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 6a78b1b32e..52aa3221fc 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -97,7 +97,7 @@ void gfs2_tune_init(struct gfs2_tune *gt)
  * changed.
  */
 
-int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent)
+int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
 {
 	unsigned int x;
 
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 5bb443ae0f..ac95064c1e 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -14,7 +14,7 @@
 
 void gfs2_tune_init(struct gfs2_tune *gt);
 
-int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent);
+int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
 int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
 struct page *gfs2_read_super(struct super_block *sb, sector_t sector);
 
-- 
cgit v1.2.2


From 68826664d12827d7a732192e2f00ba46fb899414 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 21:07:22 -0400
Subject: [GFS2] split and annotate gfs2_rgrp

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/incore.h | 2 +-
 fs/gfs2/ondisk.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index bd596ba74e..8ca7a7f350 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -68,7 +68,7 @@ struct gfs2_rgrpd {
 	struct list_head rd_recent;	/* Recently used rgrps */
 	struct gfs2_glock *rd_gl;	/* Glock for this rgrp */
 	struct gfs2_rindex rd_ri;
-	struct gfs2_rgrp rd_rg;
+	struct gfs2_rgrp_host rd_rg;
 	u64 rd_rg_vn;
 	struct gfs2_bitmap *rd_bits;
 	unsigned int rd_bh_count;
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 5b32f1a357..3b156a15cd 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -120,7 +120,7 @@ void gfs2_rindex_print(const struct gfs2_rindex *ri)
 	pv(ri, ri_bitbytes, "%u");
 }
 
-void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf)
+void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
 {
 	const struct gfs2_rgrp *str = buf;
 
@@ -131,7 +131,7 @@ void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf)
 	rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
 }
 
-void gfs2_rgrp_out(const struct gfs2_rgrp *rg, void *buf)
+void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
 {
 	struct gfs2_rgrp *str = buf;
 
-- 
cgit v1.2.2


From e697264709c86040271cdd7abee781d7adbb7f91 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 21:29:46 -0400
Subject: [GFS2] split and annotate gfs2_inum_range

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c  | 4 ++--
 fs/gfs2/ondisk.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 191a3df93d..7eb6b440da 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -436,7 +436,7 @@ static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
 {
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
 	struct buffer_head *bh;
-	struct gfs2_inum_range ir;
+	struct gfs2_inum_range_host ir;
 	int error;
 
 	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
@@ -479,7 +479,7 @@ static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino)
 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode);
 	struct gfs2_holder gh;
 	struct buffer_head *bh;
-	struct gfs2_inum_range ir;
+	struct gfs2_inum_range_host ir;
 	int error;
 
 	error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 3b156a15cd..64f5f0c604 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -263,7 +263,7 @@ void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf)
 	lh->lh_hash = be32_to_cpu(str->lh_hash);
 }
 
-void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf)
+void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
 {
 	const struct gfs2_inum_range *str = buf;
 
@@ -271,7 +271,7 @@ void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf)
 	ir->ir_length = be64_to_cpu(str->ir_length);
 }
 
-void gfs2_inum_range_out(const struct gfs2_inum_range *ir, void *buf)
+void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
 {
 	struct gfs2_inum_range *str = buf;
 
-- 
cgit v1.2.2


From 551676226163379c217e8ec54bd287eab9b8521e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 21:47:13 -0400
Subject: [GFS2] split and annotate gfs2_log_head

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glops.c    |  2 +-
 fs/gfs2/incore.h   |  2 +-
 fs/gfs2/lops.c     |  4 ++--
 fs/gfs2/lops.h     |  2 +-
 fs/gfs2/ondisk.c   |  2 +-
 fs/gfs2/recovery.c | 22 +++++++++++-----------
 fs/gfs2/recovery.h |  2 +-
 fs/gfs2/super.c    |  4 ++--
 8 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 41a6b6818a..5406b19322 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -491,7 +491,7 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl)
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
 	struct gfs2_glock *j_gl = ip->i_gl;
-	struct gfs2_log_header head;
+	struct gfs2_log_header_host head;
 	int error;
 
 	if (gl->gl_state != LM_ST_UNLOCKED &&
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 8ca7a7f350..e69f3394a2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -41,7 +41,7 @@ struct gfs2_log_operations {
 	void (*lo_before_commit) (struct gfs2_sbd *sdp);
 	void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
 	void (*lo_before_scan) (struct gfs2_jdesc *jd,
-				struct gfs2_log_header *head, int pass);
+				struct gfs2_log_header_host *head, int pass);
 	int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start,
 				 struct gfs2_log_descriptor *ld, __be64 *ptr,
 				 int pass);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index ab6d1115f9..8a654cd253 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -182,7 +182,7 @@ static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
 }
 
 static void buf_lo_before_scan(struct gfs2_jdesc *jd,
-			       struct gfs2_log_header *head, int pass)
+			       struct gfs2_log_header_host *head, int pass)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 
@@ -328,7 +328,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
 }
 
 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
-				  struct gfs2_log_header *head, int pass)
+				  struct gfs2_log_header_host *head, int pass)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 5839c05ae6..965bc65c7c 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -60,7 +60,7 @@ static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
 }
 
 static inline void lops_before_scan(struct gfs2_jdesc *jd,
-				    struct gfs2_log_header *head,
+				    struct gfs2_log_header_host *head,
 				    unsigned int pass)
 {
 	int x;
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 64f5f0c604..84b1ebc756 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -251,7 +251,7 @@ void gfs2_dinode_print(const struct gfs2_dinode_host *di)
 	printk(KERN_INFO "  di_eattr = %llu\n", (unsigned long long)di->di_eattr);
 }
 
-void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf)
+void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
 {
 	const struct gfs2_log_header *str = buf;
 
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 62cd223819..4478162416 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -132,10 +132,10 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp)
  */
 
 static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
-			  struct gfs2_log_header *head)
+			  struct gfs2_log_header_host *head)
 {
 	struct buffer_head *bh;
-	struct gfs2_log_header lh;
+	struct gfs2_log_header_host lh;
 	u32 hash;
 	int error;
 
@@ -143,7 +143,7 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 	if (error)
 		return error;
 
-	memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header));
+	memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header));	/* XXX */
 	lh.lh_hash = 0;
 	hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header));
 	gfs2_log_header_in(&lh, bh->b_data);
@@ -174,7 +174,7 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
  */
 
 static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
-			struct gfs2_log_header *head)
+			struct gfs2_log_header_host *head)
 {
 	unsigned int orig_blk = *blk;
 	int error;
@@ -205,10 +205,10 @@ static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
  * Returns: errno
  */
 
-static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
+static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 {
 	unsigned int blk = head->lh_blkno;
-	struct gfs2_log_header lh;
+	struct gfs2_log_header_host lh;
 	int error;
 
 	for (;;) {
@@ -245,9 +245,9 @@ static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
  * Returns: errno
  */
 
-int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
+int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 {
-	struct gfs2_log_header lh_1, lh_m;
+	struct gfs2_log_header_host lh_1, lh_m;
 	u32 blk_1, blk_2, blk_m;
 	int error;
 
@@ -320,7 +320,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
 		length = be32_to_cpu(ld->ld_length);
 
 		if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
-			struct gfs2_log_header lh;
+			struct gfs2_log_header_host lh;
 			error = get_log_header(jd, start, &lh);
 			if (!error) {
 				gfs2_replay_incr_blk(sdp, &start);
@@ -363,7 +363,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
  * Returns: errno
  */
 
-static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
+static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
 {
 	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
@@ -425,7 +425,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd)
 {
 	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
-	struct gfs2_log_header head;
+	struct gfs2_log_header_host head;
 	struct gfs2_holder j_gh, ji_gh, t_gh;
 	unsigned long t;
 	int ro = 0;
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index 961feedf4d..f7235e61c7 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -26,7 +26,7 @@ int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
 void gfs2_revoke_clean(struct gfs2_sbd *sdp);
 
 int gfs2_find_jhead(struct gfs2_jdesc *jd,
-		    struct gfs2_log_header *head);
+		    struct gfs2_log_header_host *head);
 int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd);
 void gfs2_check_journals(struct gfs2_sbd *sdp);
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 52aa3221fc..0faf563c83 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -508,7 +508,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
 	struct gfs2_glock *j_gl = ip->i_gl;
 	struct gfs2_holder t_gh;
-	struct gfs2_log_header head;
+	struct gfs2_log_header_host head;
 	int error;
 
 	error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
@@ -873,7 +873,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
 	struct gfs2_jdesc *jd;
 	struct lfcc *lfcc;
 	LIST_HEAD(list);
-	struct gfs2_log_header lh;
+	struct gfs2_log_header_host lh;
 	int error;
 
 	error = gfs2_jindex_hold(sdp, &ji_gh);
-- 
cgit v1.2.2


From 2a2c98247b822db8df037a56c27201f9d716ac66 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 31 Oct 2006 14:44:50 -0500
Subject: [GFS2] Fix crc32 calculation in recovery.c

Commit "[GFS2] split and annotate gfs2_log_head" resulted in an incorrect
checksum calculation for log headers. This patch corrects the
problem without resorting to copying the whole log header as
the previous code used to.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/recovery.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 4478162416..4acf238e1d 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -136,6 +136,7 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 {
 	struct buffer_head *bh;
 	struct gfs2_log_header_host lh;
+static const u32 nothing = 0;
 	u32 hash;
 	int error;
 
@@ -143,11 +144,11 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 	if (error)
 		return error;
 
-	memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header));	/* XXX */
-	lh.lh_hash = 0;
-	hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header));
+	hash = crc32_le((u32)~0, bh->b_data, sizeof(struct gfs2_log_header) -
+					     sizeof(u32));
+	hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
+	hash ^= (u32)~0;
 	gfs2_log_header_in(&lh, bh->b_data);
-
 	brelse(bh);
 
 	if (lh.lh_header.mh_magic != GFS2_MAGIC ||
-- 
cgit v1.2.2


From e928a76f959e89884f6186bb6f846c533847d5df Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 21:57:23 -0400
Subject: [GFS2] split and annotate gfs2_meta_header

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ondisk.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 84b1ebc756..b5aa7ab97f 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -54,7 +54,7 @@ static void gfs2_inum_print(const struct gfs2_inum *no)
 	printk(KERN_INFO "  no_addr = %llu\n", (unsigned long long)no->no_addr);
 }
 
-static void gfs2_meta_header_in(struct gfs2_meta_header *mh, const void *buf)
+static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
 {
 	const struct gfs2_meta_header *str = buf;
 
@@ -63,7 +63,7 @@ static void gfs2_meta_header_in(struct gfs2_meta_header *mh, const void *buf)
 	mh->mh_format = be32_to_cpu(str->mh_format);
 }
 
-static void gfs2_meta_header_out(const struct gfs2_meta_header *mh, void *buf)
+static void gfs2_meta_header_out(const struct gfs2_meta_header_host *mh, void *buf)
 {
 	struct gfs2_meta_header *str = buf;
 
@@ -72,7 +72,7 @@ static void gfs2_meta_header_out(const struct gfs2_meta_header *mh, void *buf)
 	str->mh_format = cpu_to_be32(mh->mh_format);
 }
 
-static void gfs2_meta_header_print(const struct gfs2_meta_header *mh)
+static void gfs2_meta_header_print(const struct gfs2_meta_header_host *mh)
 {
 	pv(mh, mh_magic, "0x%.8X");
 	pv(mh, mh_type, "%u");
-- 
cgit v1.2.2


From 1e81c4c3e0f55c95b6278a827262b80debd0dc7e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 22:51:24 -0400
Subject: [GFS2] split and annotate gfs_rindex

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/incore.h | 2 +-
 fs/gfs2/ondisk.c | 4 ++--
 fs/gfs2/rgrp.c   | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index e69f3394a2..e4afc2c4d6 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -67,7 +67,7 @@ struct gfs2_rgrpd {
 	struct list_head rd_list_mru;
 	struct list_head rd_recent;	/* Recently used rgrps */
 	struct gfs2_glock *rd_gl;	/* Glock for this rgrp */
-	struct gfs2_rindex rd_ri;
+	struct gfs2_rindex_host rd_ri;
 	struct gfs2_rgrp_host rd_rg;
 	u64 rd_rg_vn;
 	struct gfs2_bitmap *rd_bits;
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index b5aa7ab97f..c4e099d582 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -97,7 +97,7 @@ void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
 	memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
 }
 
-void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf)
+void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf)
 {
 	const struct gfs2_rindex *str = buf;
 
@@ -109,7 +109,7 @@ void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf)
 
 }
 
-void gfs2_rindex_print(const struct gfs2_rindex *ri)
+void gfs2_rindex_print(const struct gfs2_rindex_host *ri)
 {
 	printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)ri->ri_addr);
 	pv(ri, ri_length, "%u");
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index b261385c00..07dfd63050 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -253,7 +253,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 
 }
 
-static inline int rgrp_contains_block(struct gfs2_rindex *ri, u64 block)
+static inline int rgrp_contains_block(struct gfs2_rindex_host *ri, u64 block)
 {
 	u64 first = ri->ri_data0;
 	u64 last = first + ri->ri_data;
-- 
cgit v1.2.2


From 629a21e7ecedf779c68dcaa9a186069f57a7c652 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 22:51:24 -0400
Subject: [GFS2] split and annotate gfs2_inum

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/dir.c        |  8 ++++----
 fs/gfs2/dir.h        |  8 ++++----
 fs/gfs2/incore.h     |  2 +-
 fs/gfs2/inode.c      | 22 +++++++++++-----------
 fs/gfs2/inode.h      |  4 ++--
 fs/gfs2/ondisk.c     |  6 +++---
 fs/gfs2/ops_dentry.c |  2 +-
 fs/gfs2/ops_export.c |  8 ++++----
 fs/gfs2/ops_export.h |  2 +-
 fs/gfs2/ops_file.c   |  2 +-
 fs/gfs2/ops_fstype.c |  4 ++--
 11 files changed, 34 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index e24af28b1a..d67a3760ca 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1194,7 +1194,7 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
 			   int *copied)
 {
 	const struct gfs2_dirent *dent, *dent_next;
-	struct gfs2_inum inum;
+	struct gfs2_inum_host inum;
 	u64 off, off_next;
 	unsigned int x, y;
 	int run = 0;
@@ -1456,7 +1456,7 @@ out:
  */
 
 int gfs2_dir_search(struct inode *dir, const struct qstr *name,
-		    struct gfs2_inum *inum, unsigned int *type)
+		    struct gfs2_inum_host *inum, unsigned int *type)
 {
 	struct buffer_head *bh;
 	struct gfs2_dirent *dent;
@@ -1531,7 +1531,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
  */
 
 int gfs2_dir_add(struct inode *inode, const struct qstr *name,
-		 const struct gfs2_inum *inum, unsigned type)
+		 const struct gfs2_inum_host *inum, unsigned type)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct buffer_head *bh;
@@ -1666,7 +1666,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
  */
 
 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
-		   struct gfs2_inum *inum, unsigned int new_type)
+		   struct gfs2_inum_host *inum, unsigned int new_type)
 {
 	struct buffer_head *bh;
 	struct gfs2_dirent *dent;
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index 371233419b..b21b33668a 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -31,17 +31,17 @@ struct gfs2_inum;
 typedef int (*gfs2_filldir_t) (void *opaque,
 			      const char *name, unsigned int length,
 			      u64 offset,
-			      struct gfs2_inum *inum, unsigned int type);
+			      struct gfs2_inum_host *inum, unsigned int type);
 
 int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
-		    struct gfs2_inum *inum, unsigned int *type);
+		    struct gfs2_inum_host *inum, unsigned int *type);
 int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
-		 const struct gfs2_inum *inum, unsigned int type);
+		 const struct gfs2_inum_host *inum, unsigned int type);
 int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
 int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque,
 		  gfs2_filldir_t filldir);
 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
-		   struct gfs2_inum *new_inum, unsigned int new_type);
+		   struct gfs2_inum_host *new_inum, unsigned int new_type);
 
 int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
 
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index e4afc2c4d6..20c9b4f0e5 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -224,7 +224,7 @@ enum {
 
 struct gfs2_inode {
 	struct inode i_inode;
-	struct gfs2_inum i_num;
+	struct gfs2_inum_host i_num;
 
 	unsigned long i_flags;		/* GIF_... */
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7eb6b440da..dadd1f35c8 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -112,7 +112,7 @@ void gfs2_inode_attr_out(struct gfs2_inode *ip)
 static int iget_test(struct inode *inode, void *opaque)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_inum *inum = opaque;
+	struct gfs2_inum_host *inum = opaque;
 
 	if (ip && ip->i_num.no_addr == inum->no_addr)
 		return 1;
@@ -123,19 +123,19 @@ static int iget_test(struct inode *inode, void *opaque)
 static int iget_set(struct inode *inode, void *opaque)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_inum *inum = opaque;
+	struct gfs2_inum_host *inum = opaque;
 
 	ip->i_num = *inum;
 	return 0;
 }
 
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum *inum)
+struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum)
 {
 	return ilookup5(sb, (unsigned long)inum->no_formal_ino,
 			iget_test, inum);
 }
 
-static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum)
+static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum_host *inum)
 {
 	return iget5_locked(sb, (unsigned long)inum->no_formal_ino,
 		     iget_test, iget_set, inum);
@@ -150,7 +150,7 @@ static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum)
  * Returns: A VFS inode, or an error
  */
 
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, unsigned int type)
+struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned int type)
 {
 	struct inode *inode = gfs2_iget(sb, inum);
 	struct gfs2_inode *ip = GFS2_I(inode);
@@ -394,7 +394,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 	struct super_block *sb = dir->i_sb;
 	struct gfs2_inode *dip = GFS2_I(dir);
 	struct gfs2_holder d_gh;
-	struct gfs2_inum inum;
+	struct gfs2_inum_host inum;
 	unsigned int type;
 	int error = 0;
 	struct inode *inode = NULL;
@@ -610,7 +610,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
 		*gid = current->fsgid;
 }
 
-static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum *inum,
+static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum,
 			u64 *generation)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
@@ -650,7 +650,7 @@ out:
  */
 
 static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
-			const struct gfs2_inum *inum, unsigned int mode,
+			const struct gfs2_inum_host *inum, unsigned int mode,
 			unsigned int uid, unsigned int gid,
 			const u64 *generation)
 {
@@ -707,7 +707,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 }
 
 static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
-		       unsigned int mode, const struct gfs2_inum *inum,
+		       unsigned int mode, const struct gfs2_inum_host *inum,
 		       const u64 *generation)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
@@ -866,7 +866,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 	struct gfs2_inode *dip = ghs->gh_gl->gl_object;
 	struct inode *dir = &dip->i_inode;
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-	struct gfs2_inum inum;
+	struct gfs2_inum_host inum;
 	int error;
 	u64 generation;
 
@@ -1018,7 +1018,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 		   struct gfs2_inode *ip)
 {
-	struct gfs2_inum inum;
+	struct gfs2_inum_host inum;
 	unsigned int type;
 	int error;
 
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index f5d8617605..d699b92097 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -27,8 +27,8 @@ static inline int gfs2_is_dir(struct gfs2_inode *ip)
 
 void gfs2_inode_attr_in(struct gfs2_inode *ip);
 void gfs2_inode_attr_out(struct gfs2_inode *ip);
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, unsigned type);
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum *inum);
+struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type);
+struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum);
 
 int gfs2_inode_refresh(struct gfs2_inode *ip);
 
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index c4e099d582..32f592f4a3 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -32,7 +32,7 @@
  * first arg: the cpu-order structure
  */
 
-void gfs2_inum_in(struct gfs2_inum *no, const void *buf)
+void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf)
 {
 	const struct gfs2_inum *str = buf;
 
@@ -40,7 +40,7 @@ void gfs2_inum_in(struct gfs2_inum *no, const void *buf)
 	no->no_addr = be64_to_cpu(str->no_addr);
 }
 
-void gfs2_inum_out(const struct gfs2_inum *no, void *buf)
+void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf)
 {
 	struct gfs2_inum *str = buf;
 
@@ -48,7 +48,7 @@ void gfs2_inum_out(const struct gfs2_inum *no, void *buf)
 	str->no_addr = cpu_to_be64(no->no_addr);
 }
 
-static void gfs2_inum_print(const struct gfs2_inum *no)
+static void gfs2_inum_print(const struct gfs2_inum_host *no)
 {
 	printk(KERN_INFO "  no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino);
 	printk(KERN_INFO "  no_addr = %llu\n", (unsigned long long)no->no_addr);
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index 00041b1b80..c36f9e342e 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -43,7 +43,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 	struct inode *inode = dentry->d_inode;
 	struct gfs2_holder d_gh;
 	struct gfs2_inode *ip;
-	struct gfs2_inum inum;
+	struct gfs2_inum_host inum;
 	unsigned int type;
 	int error;
 
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 86127d93bd..33423a5c32 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -35,7 +35,7 @@ static struct dentry *gfs2_decode_fh(struct super_block *sb,
 				     void *context)
 {
 	struct gfs2_fh_obj fh_obj;
-	struct gfs2_inum *this, parent;
+	struct gfs2_inum_host *this, parent;
 
 	if (fh_type != fh_len)
 		return NULL;
@@ -114,12 +114,12 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
 }
 
 struct get_name_filldir {
-	struct gfs2_inum inum;
+	struct gfs2_inum_host inum;
 	char *name;
 };
 
 static int get_name_filldir(void *opaque, const char *name, unsigned int length,
-			    u64 offset, struct gfs2_inum *inum,
+			    u64 offset, struct gfs2_inum_host *inum,
 			    unsigned int type)
 {
 	struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque;
@@ -202,7 +202,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj;
-	struct gfs2_inum *inum = &fh_obj->this;
+	struct gfs2_inum_host *inum = &fh_obj->this;
 	struct gfs2_holder i_gh, ri_gh, rgd_gh;
 	struct gfs2_rgrpd *rgd;
 	struct inode *inode;
diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h
index 09aca5046f..f925a955b3 100644
--- a/fs/gfs2/ops_export.h
+++ b/fs/gfs2/ops_export.h
@@ -15,7 +15,7 @@
 
 extern struct export_operations gfs2_export_ops;
 struct gfs2_fh_obj {
-	struct gfs2_inum this;
+	struct gfs2_inum_host this;
 	__u32            imode;
 };
 
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 3064f133bf..359965c368 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -139,7 +139,7 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
  */
 
 static int filldir_func(void *opaque, const char *name, unsigned int length,
-			u64 offset, struct gfs2_inum *inum,
+			u64 offset, struct gfs2_inum_host *inum,
 			unsigned int type)
 {
 	struct filldir_reg *fdr = (struct filldir_reg *)opaque;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 882873a6bd..d14e139d26 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -237,7 +237,7 @@ fail:
 }
 
 static struct inode *gfs2_lookup_root(struct super_block *sb,
-				      struct gfs2_inum *inum)
+				      struct gfs2_inum_host *inum)
 {
 	return gfs2_inode_lookup(sb, inum, DT_DIR);
 }
@@ -246,7 +246,7 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
 {
 	struct super_block *sb = sdp->sd_vfs;
 	struct gfs2_holder sb_gh;
-	struct gfs2_inum *inum;
+	struct gfs2_inum_host *inum;
 	struct inode *inode;
 	int error = 0;
 
-- 
cgit v1.2.2


From b5bc9e8b065dbcd4c675e8c158d6e524f221b8e1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 23:31:55 -0400
Subject: [GFS2] split and annotate gfs2_quota

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ondisk.c | 2 +-
 fs/gfs2/quota.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 32f592f4a3..5db0737fcd 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -144,7 +144,7 @@ void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
 	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
 }
 
-void gfs2_quota_in(struct gfs2_quota *qu, const void *buf)
+void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
 {
 	const struct gfs2_quota *str = buf;
 
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a3deae7416..e3f5b8da48 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -743,7 +743,7 @@ static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
 	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
 	struct gfs2_holder i_gh;
-	struct gfs2_quota q;
+	struct gfs2_quota_host q;
 	char buf[sizeof(struct gfs2_quota)];
 	struct file_ra_state ra_state;
 	int error;
-- 
cgit v1.2.2


From bd209cc017f231e8536550bdab1bf5da93c32798 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 23:43:19 -0400
Subject: [GFS2] split and annotate gfs2_statfs_change

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/incore.h    |  4 ++--
 fs/gfs2/ondisk.c    |  4 ++--
 fs/gfs2/ops_super.c |  2 +-
 fs/gfs2/super.c     | 22 +++++++++++-----------
 fs/gfs2/super.h     |  4 ++--
 5 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 20c9b4f0e5..c0a8c3b6a8 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -503,8 +503,8 @@ struct gfs2_sbd {
 
 	spinlock_t sd_statfs_spin;
 	struct mutex sd_statfs_mutex;
-	struct gfs2_statfs_change sd_statfs_master;
-	struct gfs2_statfs_change sd_statfs_local;
+	struct gfs2_statfs_change_host sd_statfs_master;
+	struct gfs2_statfs_change_host sd_statfs_local;
 	unsigned long sd_statfs_sync_time;
 
 	/* Resource group stuff */
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 5db0737fcd..84c59b165b 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -279,7 +279,7 @@ void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
 	str->ir_length = cpu_to_be64(ir->ir_length);
 }
 
-void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf)
+void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
 {
 	const struct gfs2_statfs_change *str = buf;
 
@@ -288,7 +288,7 @@ void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf)
 	sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
 }
 
-void gfs2_statfs_change_out(const struct gfs2_statfs_change *sc, void *buf)
+void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
 {
 	struct gfs2_statfs_change *str = buf;
 
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index b47d9598c0..9c786d1e70 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -215,7 +215,7 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct super_block *sb = dentry->d_inode->i_sb;
 	struct gfs2_sbd *sdp = sb->s_fs_info;
-	struct gfs2_statfs_change sc;
+	struct gfs2_statfs_change_host sc;
 	int error;
 
 	if (gfs2_tune_get(sdp, gt_statfs_slow))
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 0faf563c83..0ef8317270 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -587,9 +587,9 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 int gfs2_statfs_init(struct gfs2_sbd *sdp)
 {
 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
-	struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
+	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
 	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
-	struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
+	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 	struct buffer_head *m_bh, *l_bh;
 	struct gfs2_holder gh;
 	int error;
@@ -634,7 +634,7 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
 			s64 dinodes)
 {
 	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
-	struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
+	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 	struct buffer_head *l_bh;
 	int error;
 
@@ -660,8 +660,8 @@ int gfs2_statfs_sync(struct gfs2_sbd *sdp)
 {
 	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 	struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
-	struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
-	struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
+	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 	struct gfs2_holder gh;
 	struct buffer_head *m_bh, *l_bh;
 	int error;
@@ -727,10 +727,10 @@ out:
  * Returns: errno
  */
 
-int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
+int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
 {
-	struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
-	struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
+	struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 
 	spin_lock(&sdp->sd_statfs_spin);
 
@@ -760,7 +760,7 @@ int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
  */
 
 static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
-			    struct gfs2_statfs_change *sc)
+			    struct gfs2_statfs_change_host *sc)
 {
 	gfs2_rgrp_verify(rgd);
 	sc->sc_total += rgd->rd_ri.ri_data;
@@ -782,7 +782,7 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
  * Returns: errno
  */
 
-int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
+int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
 {
 	struct gfs2_holder ri_gh;
 	struct gfs2_rgrpd *rgd_next;
@@ -792,7 +792,7 @@ int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
 	int done;
 	int error = 0, err;
 
-	memset(sc, 0, sizeof(struct gfs2_statfs_change));
+	memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
 	gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
 	if (!gha)
 		return -ENOMEM;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index ac95064c1e..e590b2df11 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -45,8 +45,8 @@ int gfs2_statfs_init(struct gfs2_sbd *sdp);
 void gfs2_statfs_change(struct gfs2_sbd *sdp,
 			s64 total, s64 free, s64 dinodes);
 int gfs2_statfs_sync(struct gfs2_sbd *sdp);
-int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
-int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
+int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc);
+int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc);
 
 int gfs2_freeze_fs(struct gfs2_sbd *sdp);
 void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
-- 
cgit v1.2.2


From b62f963e1fdf838fed91faec21228d421a834f2d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 23:46:46 -0400
Subject: [GFS2] split and annotate gfs2_quota_change

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ondisk.c | 2 +-
 fs/gfs2/quota.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 84c59b165b..2d1682ded4 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -297,7 +297,7 @@ void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
 	str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
 }
 
-void gfs2_quota_change_in(struct gfs2_quota_change *qc, const void *buf)
+void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
 {
 	const struct gfs2_quota_change *str = buf;
 
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e3f5b8da48..009d86c000 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1103,7 +1103,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
 
 		for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots;
 		     y++, slot++) {
-			struct gfs2_quota_change qc;
+			struct gfs2_quota_change_host qc;
 			struct gfs2_quota_data *qd;
 
 			gfs2_quota_change_in(&qc, bh->b_data +
-- 
cgit v1.2.2


From b44b84d765b02f813a67b96bf79e3b5d4d621631 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 14 Oct 2006 10:46:30 -0400
Subject: [GFS2] gfs2 misc endianness annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/bmap.c       | 32 ++++++++++++++++----------------
 fs/gfs2/dir.c        | 24 +++++++++++++-----------
 fs/gfs2/eattr.c      | 27 ++++++++++++++-------------
 fs/gfs2/eattr.h      |  6 +++---
 fs/gfs2/inode.c      |  9 +++++----
 fs/gfs2/ops_export.c | 30 ++++++++++++------------------
 fs/gfs2/quota.c      |  3 +--
 fs/gfs2/util.h       |  6 ++----
 8 files changed, 66 insertions(+), 71 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 06e9a8cb45..51f6356bdc 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -38,8 +38,8 @@ struct metapath {
 };
 
 typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh,
-			     struct buffer_head *bh, u64 *top,
-			     u64 *bottom, unsigned int height,
+			     struct buffer_head *bh, __be64 *top,
+			     __be64 *bottom, unsigned int height,
 			     void *data);
 
 struct strip_mine {
@@ -230,7 +230,7 @@ static int build_height(struct inode *inode, unsigned height)
 	struct buffer_head *blocks[GFS2_MAX_META_HEIGHT];
 	struct gfs2_dinode *di;
 	int error;
-	u64 *bp;
+	__be64 *bp;
 	u64 bn;
 	unsigned n;
 
@@ -255,7 +255,7 @@ static int build_height(struct inode *inode, unsigned height)
 					  GFS2_FORMAT_IN);
 			gfs2_buffer_clear_tail(blocks[n],
 					       sizeof(struct gfs2_meta_header));
-			bp = (u64 *)(blocks[n]->b_data +
+			bp = (__be64 *)(blocks[n]->b_data +
 				     sizeof(struct gfs2_meta_header));
 			*bp = cpu_to_be64(blocks[n+1]->b_blocknr);
 			brelse(blocks[n]);
@@ -360,15 +360,15 @@ static void find_metapath(struct gfs2_inode *ip, u64 block,
  * metadata tree.
  */
 
-static inline u64 *metapointer(struct buffer_head *bh, int *boundary,
+static inline __be64 *metapointer(struct buffer_head *bh, int *boundary,
 			       unsigned int height, const struct metapath *mp)
 {
 	unsigned int head_size = (height > 0) ?
 		sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode);
-	u64 *ptr;
+	__be64 *ptr;
 	*boundary = 0;
-	ptr = ((u64 *)(bh->b_data + head_size)) + mp->mp_list[height];
-	if (ptr + 1 == (u64 *)(bh->b_data + bh->b_size))
+	ptr = ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height];
+	if (ptr + 1 == (__be64 *)(bh->b_data + bh->b_size))
 		*boundary = 1;
 	return ptr;
 }
@@ -394,7 +394,7 @@ static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
 			int *new, u64 *block)
 {
 	int boundary;
-	u64 *ptr = metapointer(bh, &boundary, height, mp);
+	__be64 *ptr = metapointer(bh, &boundary, height, mp);
 
 	if (*ptr) {
 		*block = be64_to_cpu(*ptr);
@@ -600,7 +600,7 @@ static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct buffer_head *bh = NULL;
-	u64 *top, *bottom;
+	__be64 *top, *bottom;
 	u64 bn;
 	int error;
 	int mh_size = sizeof(struct gfs2_meta_header);
@@ -611,17 +611,17 @@ static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
 			return error;
 		dibh = bh;
 
-		top = (u64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
-		bottom = (u64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
+		top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
+		bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
 	} else {
 		error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
 		if (error)
 			return error;
 
-		top = (u64 *)(bh->b_data + mh_size) +
+		top = (__be64 *)(bh->b_data + mh_size) +
 				  (first ? mp->mp_list[height] : 0);
 
-		bottom = (u64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
+		bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
 	}
 
 	error = bc(ip, dibh, bh, top, bottom, height, data);
@@ -660,7 +660,7 @@ out:
  */
 
 static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
-		    struct buffer_head *bh, u64 *top, u64 *bottom,
+		    struct buffer_head *bh, __be64 *top, __be64 *bottom,
 		    unsigned int height, void *data)
 {
 	struct strip_mine *sm = data;
@@ -668,7 +668,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 	struct gfs2_rgrp_list rlist;
 	u64 bn, bstart;
 	u32 blen;
-	u64 *p;
+	__be64 *p;
 	unsigned int rg_blocks = 0;
 	int metadata;
 	unsigned int revokes = 0;
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index d67a3760ca..59dc823c28 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -713,12 +713,12 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no,
 static int get_leaf_nr(struct gfs2_inode *dip, u32 index,
 		       u64 *leaf_out)
 {
-	u64 leaf_no;
+	__be64 leaf_no;
 	int error;
 
 	error = gfs2_dir_read_data(dip, (char *)&leaf_no,
-				    index * sizeof(u64),
-				    sizeof(u64), 0);
+				    index * sizeof(__be64),
+				    sizeof(__be64), 0);
 	if (error != sizeof(u64))
 		return (error < 0) ? error : -EIO;
 
@@ -837,7 +837,8 @@ static int dir_make_exhash(struct inode *inode)
 	struct gfs2_leaf *leaf;
 	int y;
 	u32 x;
-	u64 *lp, bn;
+	__be64 *lp;
+	u64 bn;
 	int error;
 
 	error = gfs2_meta_inode_buffer(dip, &dibh);
@@ -893,7 +894,7 @@ static int dir_make_exhash(struct inode *inode)
 	gfs2_trans_add_bh(dip->i_gl, dibh, 1);
 	gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 
-	lp = (u64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
+	lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
 
 	for (x = sdp->sd_hash_ptrs; x--; lp++)
 		*lp = cpu_to_be64(bn);
@@ -929,7 +930,8 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
 	struct gfs2_leaf *nleaf, *oleaf;
 	struct gfs2_dirent *dent = NULL, *prev = NULL, *next = NULL, *new;
 	u32 start, len, half_len, divider;
-	u64 bn, *lp, leaf_no;
+	u64 bn, leaf_no;
+	__be64 *lp;
 	u32 index;
 	int x, moved = 0;
 	int error;
@@ -974,7 +976,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
 	/* Change the pointers.
 	   Don't bother distinguishing stuffed from non-stuffed.
 	   This code is complicated enough already. */
-	lp = kmalloc(half_len * sizeof(u64), GFP_NOFS | __GFP_NOFAIL);
+	lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS | __GFP_NOFAIL);
 	/*  Change the pointers  */
 	for (x = 0; x < half_len; x++)
 		lp[x] = cpu_to_be64(bn);
@@ -1341,7 +1343,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
 	u32 hsize, len = 0;
 	u32 ht_offset, lp_offset, ht_offset_cur = -1;
 	u32 hash, index;
-	u64 *lp;
+	__be64 *lp;
 	int copied = 0;
 	int error = 0;
 	unsigned depth = 0;
@@ -1365,7 +1367,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
 
 		if (ht_offset_cur != ht_offset) {
 			error = gfs2_dir_read_data(dip, (char *)lp,
-						ht_offset * sizeof(u64),
+						ht_offset * sizeof(__be64),
 						sdp->sd_hash_bsize, 1);
 			if (error != sdp->sd_hash_bsize) {
 				if (error >= 0)
@@ -1715,7 +1717,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
 	u32 hsize, len;
 	u32 ht_offset, lp_offset, ht_offset_cur = -1;
 	u32 index = 0;
-	u64 *lp;
+	__be64 *lp;
 	u64 leaf_no;
 	int error = 0;
 
@@ -1735,7 +1737,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
 
 		if (ht_offset_cur != ht_offset) {
 			error = gfs2_dir_read_data(dip, (char *)lp,
-						ht_offset * sizeof(u64),
+						ht_offset * sizeof(__be64),
 						sdp->sd_hash_bsize, 1);
 			if (error != sdp->sd_hash_bsize) {
 				if (error >= 0)
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index a65a4ccfd4..518f0c0786 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -112,7 +112,7 @@ fail:
 static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
 {
 	struct buffer_head *bh, *eabh;
-	u64 *eablk, *end;
+	__be64 *eablk, *end;
 	int error;
 
 	error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT, &bh);
@@ -129,7 +129,7 @@ static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
 		goto out;
 	}
 
-	eablk = (u64 *)(bh->b_data + sizeof(struct gfs2_meta_header));
+	eablk = (__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header));
 	end = eablk + GFS2_SB(&ip->i_inode)->sd_inptrs;
 
 	for (; eablk < end; eablk++) {
@@ -224,7 +224,8 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 	struct gfs2_rgrpd *rgd;
 	struct gfs2_holder rg_gh;
 	struct buffer_head *dibh;
-	u64 *dataptrs, bn = 0;
+	__be64 *dataptrs;
+	u64 bn = 0;
 	u64 bstart = 0;
 	unsigned int blen = 0;
 	unsigned int blks = 0;
@@ -444,7 +445,7 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
 	struct buffer_head **bh;
 	unsigned int amount = GFS2_EA_DATA_LEN(ea);
 	unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
-	u64 *dataptrs = GFS2_EA2DATAPTRS(ea);
+	__be64 *dataptrs = GFS2_EA2DATAPTRS(ea);
 	unsigned int x;
 	int error = 0;
 
@@ -629,7 +630,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
 		ea->ea_num_ptrs = 0;
 		memcpy(GFS2_EA2DATA(ea), er->er_data, er->er_data_len);
 	} else {
-		u64 *dataptr = GFS2_EA2DATAPTRS(ea);
+		__be64 *dataptr = GFS2_EA2DATAPTRS(ea);
 		const char *data = er->er_data;
 		unsigned int data_len = er->er_data_len;
 		unsigned int copy;
@@ -931,12 +932,12 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct buffer_head *indbh, *newbh;
-	u64 *eablk;
+	__be64 *eablk;
 	int error;
 	int mh_size = sizeof(struct gfs2_meta_header);
 
 	if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
-		u64 *end;
+		__be64 *end;
 
 		error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, DIO_WAIT,
 				       &indbh);
@@ -948,7 +949,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 			goto out;
 		}
 
-		eablk = (u64 *)(indbh->b_data + mh_size);
+		eablk = (__be64 *)(indbh->b_data + mh_size);
 		end = eablk + sdp->sd_inptrs;
 
 		for (; eablk < end; eablk++)
@@ -971,7 +972,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 		gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
 		gfs2_buffer_clear_tail(indbh, mh_size);
 
-		eablk = (u64 *)(indbh->b_data + mh_size);
+		eablk = (__be64 *)(indbh->b_data + mh_size);
 		*eablk = cpu_to_be64(ip->i_di.di_eattr);
 		ip->i_di.di_eattr = blk;
 		ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT;
@@ -1202,7 +1203,7 @@ static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
 	struct buffer_head **bh;
 	unsigned int amount = GFS2_EA_DATA_LEN(ea);
 	unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
-	u64 *dataptrs = GFS2_EA2DATAPTRS(ea);
+	__be64 *dataptrs = GFS2_EA2DATAPTRS(ea);
 	unsigned int x;
 	int error;
 
@@ -1300,7 +1301,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrp_list rlist;
 	struct buffer_head *indbh, *dibh;
-	u64 *eablk, *end;
+	__be64 *eablk, *end;
 	unsigned int rg_blocks = 0;
 	u64 bstart = 0;
 	unsigned int blen = 0;
@@ -1319,7 +1320,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
 		goto out;
 	}
 
-	eablk = (u64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
+	eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
 	end = eablk + sdp->sd_inptrs;
 
 	for (; eablk < end; eablk++) {
@@ -1363,7 +1364,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
 
 	gfs2_trans_add_bh(ip->i_gl, indbh, 1);
 
-	eablk = (u64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
+	eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
 	bstart = 0;
 	blen = 0;
 
diff --git a/fs/gfs2/eattr.h b/fs/gfs2/eattr.h
index ffa65947d6..c82dbe01d7 100644
--- a/fs/gfs2/eattr.h
+++ b/fs/gfs2/eattr.h
@@ -19,7 +19,7 @@ struct iattr;
 #define GFS2_EA_SIZE(ea) \
 ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
       ((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \
-                                  (sizeof(u64) * (ea)->ea_num_ptrs)), 8)
+                                  (sizeof(__be64) * (ea)->ea_num_ptrs)), 8)
 
 #define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs)
 #define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST)
@@ -29,13 +29,13 @@ ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8)
 
 #define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \
 ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \
-      sizeof(u64) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8)
+      sizeof(__be64) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8)
 
 #define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1))
 #define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len)
 
 #define GFS2_EA2DATAPTRS(ea) \
-((u64 *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8)))
+((__be64 *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8)))
 
 #define GFS2_EA2NEXT(ea) \
 ((struct gfs2_ea_header *)((char *)(ea) + GFS2_EA_REC_LEN(ea)))
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index dadd1f35c8..fb969302f1 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -500,21 +500,22 @@ static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino)
 	if (!ir.ir_length) {
 		struct buffer_head *m_bh;
 		u64 x, y;
+		__be64 z;
 
 		error = gfs2_meta_inode_buffer(m_ip, &m_bh);
 		if (error)
 			goto out_brelse;
 
-		x = *(u64 *)(m_bh->b_data + sizeof(struct gfs2_dinode));
-		x = y = be64_to_cpu(x);
+		z = *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode));
+		x = y = be64_to_cpu(z);
 		ir.ir_start = x;
 		ir.ir_length = GFS2_INUM_QUANTUM;
 		x += GFS2_INUM_QUANTUM;
 		if (x < y)
 			gfs2_consist_inode(m_ip);
-		x = cpu_to_be64(x);
+		z = cpu_to_be64(x);
 		gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
-		*(u64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = x;
+		*(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = z;
 
 		brelse(m_bh);
 	}
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 33423a5c32..b4e7b87753 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -27,13 +27,14 @@
 #include "util.h"
 
 static struct dentry *gfs2_decode_fh(struct super_block *sb,
-				     __u32 *fh,
+				     __u32 *p,
 				     int fh_len,
 				     int fh_type,
 				     int (*acceptable)(void *context,
 						       struct dentry *dentry),
 				     void *context)
 {
+	__be32 *fh = (__force __be32 *)p;
 	struct gfs2_fh_obj fh_obj;
 	struct gfs2_inum_host *this, parent;
 
@@ -65,9 +66,10 @@ static struct dentry *gfs2_decode_fh(struct super_block *sb,
 						    acceptable, context);
 }
 
-static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
+static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
 			  int connectable)
 {
+	__be32 *fh = (__force __be32 *)p;
 	struct inode *inode = dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
 	struct gfs2_inode *ip = GFS2_I(inode);
@@ -76,14 +78,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
 	    (connectable && *len < GFS2_LARGE_FH_SIZE))
 		return 255;
 
-	fh[0] = ip->i_num.no_formal_ino >> 32;
-	fh[0] = cpu_to_be32(fh[0]);
-	fh[1] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
-	fh[1] = cpu_to_be32(fh[1]);
-	fh[2] = ip->i_num.no_addr >> 32;
-	fh[2] = cpu_to_be32(fh[2]);
-	fh[3] = ip->i_num.no_addr & 0xFFFFFFFF;
-	fh[3] = cpu_to_be32(fh[3]);
+	fh[0] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
+	fh[1] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
+	fh[2] = cpu_to_be32(ip->i_num.no_addr >> 32);
+	fh[3] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
 	*len = GFS2_SMALL_FH_SIZE;
 
 	if (!connectable || inode == sb->s_root->d_inode)
@@ -95,14 +93,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
 	igrab(inode);
 	spin_unlock(&dentry->d_lock);
 
-	fh[4] = ip->i_num.no_formal_ino >> 32;
-	fh[4] = cpu_to_be32(fh[4]);
-	fh[5] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
-	fh[5] = cpu_to_be32(fh[5]);
-	fh[6] = ip->i_num.no_addr >> 32;
-	fh[6] = cpu_to_be32(fh[6]);
-	fh[7] = ip->i_num.no_addr & 0xFFFFFFFF;
-	fh[7] = cpu_to_be32(fh[7]);
+	fh[4] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
+	fh[5] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
+	fh[6] = cpu_to_be32(ip->i_num.no_addr >> 32);
+	fh[7] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
 
 	fh[8]  = cpu_to_be32(inode->i_mode);
 	fh[9]  = 0;	/* pad to double word */
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 009d86c000..5d00e9b209 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -539,8 +539,7 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
 		qc->qc_id = cpu_to_be32(qd->qd_id);
 	}
 
-	x = qc->qc_change;
-	x = be64_to_cpu(x) + change;
+	x = be64_to_cpu(qc->qc_change) + change;
 	qc->qc_change = cpu_to_be64(x);
 
 	spin_lock(&sdp->sd_quota_spin);
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 76a50899fe..ca8667c3ed 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -83,8 +83,7 @@ static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
 				    char *file, unsigned int line)
 {
 	struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
-	u32 magic = mh->mh_magic;
-	magic = be32_to_cpu(magic);
+	u32 magic = be32_to_cpu(mh->mh_magic);
 	if (unlikely(magic != GFS2_MAGIC))
 		return gfs2_meta_check_ii(sdp, bh, "magic number", function,
 					  file, line);
@@ -107,9 +106,8 @@ static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
 					char *file, unsigned int line)
 {
 	struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
-	u32 magic = mh->mh_magic;
+	u32 magic = be32_to_cpu(mh->mh_magic);
 	u16 t = be32_to_cpu(mh->mh_type);
-	magic = be32_to_cpu(magic);
 	if (unlikely(magic != GFS2_MAGIC))
 		return gfs2_meta_check_ii(sdp, bh, "magic number", function,
 					  file, line);
-- 
cgit v1.2.2


From 9c9ab3d5414653bfe5e5b9f4dfdaab0c6ab17196 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Oct 2006 23:49:23 -0400
Subject: [GFS2] gfs2 __user misannotation fix

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 359965c368..2fc8868e06 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -71,7 +71,7 @@ static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
 		size = count;
 
 	kaddr = kmap(page);
-	memcpy(desc->arg.buf, kaddr + offset, size);
+	memcpy(desc->arg.data, kaddr + offset, size);
 	kunmap(page);
 
 	desc->count = count - size;
@@ -86,7 +86,7 @@ int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
 	struct inode *inode = &ip->i_inode;
 	read_descriptor_t desc;
 	desc.written = 0;
-	desc.arg.buf = buf;
+	desc.arg.data = buf;
 	desc.count = size;
 	desc.error = 0;
 	do_generic_mapping_read(inode->i_mapping, ra_state,
-- 
cgit v1.2.2


From 539e5d6b7ae8612c0393fe940d2da5b591318d3d Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 31 Oct 2006 15:07:05 -0500
Subject: [GFS2] Change argument of gfs2_dinode_out

Everywhere this was called, a struct gfs2_inode was available,
but despite that, it was always called with a struct gfs2_dinode
as an argument. By making this change it paves the way to start
eliminating fields duplicated between the kernel's struct inode
and the struct gfs2_dinode.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/acl.c       |  2 +-
 fs/gfs2/bmap.c      | 12 ++++++------
 fs/gfs2/dir.c       | 20 ++++++++++----------
 fs/gfs2/eattr.c     | 14 +++++++-------
 fs/gfs2/inode.c     |  6 +++---
 fs/gfs2/ondisk.c    |  5 ++++-
 fs/gfs2/ops_file.c  |  2 +-
 fs/gfs2/ops_inode.c | 10 +++++-----
 8 files changed, 37 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 5f959b8ce4..906e403b05 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -201,7 +201,7 @@ static int munge_mode(struct gfs2_inode *ip, mode_t mode)
 				(ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT));
 		ip->i_di.di_mode = mode;
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 51f6356bdc..8c092ab2b4 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -498,7 +498,7 @@ static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
 			error = gfs2_meta_inode_buffer(ip, &dibh);
 			if (!error) {
 				gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-				gfs2_dinode_out(&ip->i_di, dibh->b_data);
+				gfs2_dinode_out(ip, dibh->b_data);
 				brelse(dibh);
 			}
 			set_buffer_new(bh_map);
@@ -780,7 +780,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 
 	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
 
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 
 	up_write(&ip->i_rw_mutex);
 
@@ -860,7 +860,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
 		goto out_end_trans;
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 
 out_end_trans:
@@ -970,7 +970,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
 		ip->i_di.di_size = size;
 		ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
 		error = 1;
 
@@ -983,7 +983,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
 			ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
 			ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
 			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-			gfs2_dinode_out(&ip->i_di, dibh->b_data);
+			gfs2_dinode_out(ip, dibh->b_data);
 		}
 	}
 
@@ -1057,7 +1057,7 @@ static int trunc_end(struct gfs2_inode *ip)
 	ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 
 out:
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 59dc823c28..0742761e1e 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -132,7 +132,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
 	if (ip->i_di.di_size < offset + size)
 		ip->i_di.di_size = offset + size;
 	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 
 	brelse(dibh);
 
@@ -232,7 +232,7 @@ out:
 	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 
 	return copied;
@@ -907,7 +907,7 @@ static int dir_make_exhash(struct inode *inode)
 	for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
 	dip->i_di.di_depth = y;
 
-	gfs2_dinode_out(&dip->i_di, dibh->b_data);
+	gfs2_dinode_out(dip, dibh->b_data);
 
 	brelse(dibh);
 
@@ -1039,7 +1039,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
 	error = gfs2_meta_inode_buffer(dip, &dibh);
 	if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
 		dip->i_di.di_blocks++;
-		gfs2_dinode_out(&dip->i_di, dibh->b_data);
+		gfs2_dinode_out(dip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -1119,7 +1119,7 @@ static int dir_double_exhash(struct gfs2_inode *dip)
 	error = gfs2_meta_inode_buffer(dip, &dibh);
 	if (!gfs2_assert_withdraw(sdp, !error)) {
 		dip->i_di.di_depth++;
-		gfs2_dinode_out(&dip->i_di, dibh->b_data);
+		gfs2_dinode_out(dip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -1517,7 +1517,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
 		return error;
 	gfs2_trans_add_bh(ip->i_gl, bh, 1);
 	ip->i_di.di_blocks++;
-	gfs2_dinode_out(&ip->i_di, bh->b_data);
+	gfs2_dinode_out(ip, bh->b_data);
 	brelse(bh);
 	return 0;
 }
@@ -1561,7 +1561,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 			gfs2_trans_add_bh(ip->i_gl, bh, 1);
 			ip->i_di.di_entries++;
 			ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
-			gfs2_dinode_out(&ip->i_di, bh->b_data);
+			gfs2_dinode_out(ip, bh->b_data);
 			brelse(bh);
 			error = 0;
 			break;
@@ -1647,7 +1647,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
 	gfs2_trans_add_bh(dip->i_gl, bh, 1);
 	dip->i_di.di_entries--;
 	dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
-	gfs2_dinode_out(&dip->i_di, bh->b_data);
+	gfs2_dinode_out(dip, bh->b_data);
 	brelse(bh);
 	mark_inode_dirty(&dip->i_inode);
 
@@ -1695,7 +1695,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 	}
 
 	dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
-	gfs2_dinode_out(&dip->i_di, bh->b_data);
+	gfs2_dinode_out(dip, bh->b_data);
 	brelse(bh);
 	return 0;
 }
@@ -1875,7 +1875,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
 		goto out_end_trans;
 
 	gfs2_trans_add_bh(dip->i_gl, dibh, 1);
-	gfs2_dinode_out(&dip->i_di, dibh->b_data);
+	gfs2_dinode_out(dip, dibh->b_data);
 	brelse(dibh);
 
 out_end_trans:
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 518f0c0786..9b7bb565b5 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -302,7 +302,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 	if (!error) {
 		ip->i_di.di_ctime = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -717,7 +717,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 		}
 		ip->i_di.di_ctime = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -852,7 +852,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
 	}
 	ip->i_di.di_ctime = get_seconds();
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 out:
 	gfs2_trans_end(GFS2_SB(&ip->i_inode));
@@ -1132,7 +1132,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
 	if (!error) {
 		ip->i_di.di_ctime = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -1287,7 +1287,7 @@ int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
 		gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
 		gfs2_inode_attr_out(ip);
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -1397,7 +1397,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -1446,7 +1446,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index fb969302f1..b861ddba86 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -338,7 +338,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
 	ip->i_inode.i_nlink = nlink;
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 	mark_inode_dirty(&ip->i_inode);
 
@@ -792,7 +792,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 		goto fail_end_trans;
 	ip->i_di.di_nlink = 1;
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 	return 0;
 
@@ -1349,7 +1349,7 @@ __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 		gfs2_inode_attr_out(ip);
 
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 	return error;
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 2d1682ded4..2c50fa0261 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -15,6 +15,8 @@
 
 #include "gfs2.h"
 #include <linux/gfs2_ondisk.h>
+#include <linux/lm_interface.h>
+#include "incore.h"
 
 #define pv(struct, member, fmt) printk(KERN_INFO "  "#member" = "fmt"\n", \
 				       struct->member);
@@ -187,8 +189,9 @@ void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf)
 
 }
 
-void gfs2_dinode_out(const struct gfs2_dinode_host *di, void *buf)
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 {
+	const struct gfs2_dinode_host *di = &ip->i_di;
 	struct gfs2_dinode *str = buf;
 
 	gfs2_meta_header_out(&di->di_header, buf);
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 2fc8868e06..7ea4175739 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -336,7 +336,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
 		goto out_trans_end;
 	gfs2_trans_add_bh(ip->i_gl, bh, 1);
 	ip->i_di.di_flags = new_flags;
-	gfs2_dinode_out(&ip->i_di, bh->b_data);
+	gfs2_dinode_out(ip, bh->b_data);
 	brelse(bh);
 out_trans_end:
 	gfs2_trans_end(sdp);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index ef6e5ed70e..bd268852c6 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -339,7 +339,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 
 	if (!gfs2_assert_withdraw(sdp, !error)) {
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
 		       size);
 		brelse(dibh);
@@ -414,7 +414,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 		gfs2_inum_out(&dip->i_num, &dent->de_inum);
 		dent->de_type = cpu_to_be16(DT_DIR);
 
-		gfs2_dinode_out(&ip->i_di, di);
+		gfs2_dinode_out(ip, di);
 
 		brelse(dibh);
 	}
@@ -541,7 +541,7 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 
 	if (!gfs2_assert_withdraw(sdp, !error)) {
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -762,7 +762,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 			goto out_end_trans;
 		ip->i_di.di_ctime = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(&ip->i_di, dibh->b_data);
+		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
 	}
 
@@ -949,7 +949,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 	gfs2_inode_attr_out(ip);
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	gfs2_dinode_out(&ip->i_di, dibh->b_data);
+	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 
 	if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
-- 
cgit v1.2.2


From 891ea14712da68e282de8583e5fa14f0d3f3731e Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 31 Oct 2006 15:22:10 -0500
Subject: [GFS2] Change argument to gfs2_dinode_in

This is a preliminary patch to enable the removal of fields
in gfs2_dinode_host which are duplicated in struct inode.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c  | 2 +-
 fs/gfs2/ondisk.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b861ddba86..9875e9356c 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -229,7 +229,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
 		return -EIO;
 	}
 
-	gfs2_dinode_in(&ip->i_di, dibh->b_data);
+	gfs2_dinode_in(ip, dibh->b_data);
 
 	brelse(dibh);
 
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 2c50fa0261..edf87567bb 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -155,8 +155,9 @@ void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
 	qu->qu_value = be64_to_cpu(str->qu_value);
 }
 
-void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf)
+void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 {
+	struct gfs2_dinode_host *di = &ip->i_di;
 	const struct gfs2_dinode *str = buf;
 
 	gfs2_meta_header_in(&di->di_header, buf);
@@ -186,7 +187,6 @@ void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf)
 	di->di_entries = be32_to_cpu(str->di_entries);
 
 	di->di_eattr = be64_to_cpu(str->di_eattr);
-
 }
 
 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
-- 
cgit v1.2.2


From ea744d01c6a5acf1f6171b4c6e1658a742063613 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 31 Oct 2006 15:28:00 -0500
Subject: [GFS2] Move gfs2_dinode_in to inode.c

gfs2_dinode_in() is only ever called from one place, so move it
to that place (in inode.c) and make it static.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c  | 34 ++++++++++++++++++++++++++++++++++
 fs/gfs2/ondisk.c | 36 +-----------------------------------
 2 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 9875e9356c..b39cfcfe92 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -208,6 +208,40 @@ fail:
 	return ERR_PTR(error);
 }
 
+static void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
+{
+	struct gfs2_dinode_host *di = &ip->i_di;
+	const struct gfs2_dinode *str = buf;
+
+	gfs2_meta_header_in(&di->di_header, buf);
+	gfs2_inum_in(&di->di_num, &str->di_num);
+
+	di->di_mode = be32_to_cpu(str->di_mode);
+	di->di_uid = be32_to_cpu(str->di_uid);
+	di->di_gid = be32_to_cpu(str->di_gid);
+	di->di_nlink = be32_to_cpu(str->di_nlink);
+	di->di_size = be64_to_cpu(str->di_size);
+	di->di_blocks = be64_to_cpu(str->di_blocks);
+	di->di_atime = be64_to_cpu(str->di_atime);
+	di->di_mtime = be64_to_cpu(str->di_mtime);
+	di->di_ctime = be64_to_cpu(str->di_ctime);
+	di->di_major = be32_to_cpu(str->di_major);
+	di->di_minor = be32_to_cpu(str->di_minor);
+
+	di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
+	di->di_goal_data = be64_to_cpu(str->di_goal_data);
+	di->di_generation = be64_to_cpu(str->di_generation);
+
+	di->di_flags = be32_to_cpu(str->di_flags);
+	di->di_payload_format = be32_to_cpu(str->di_payload_format);
+	di->di_height = be16_to_cpu(str->di_height);
+
+	di->di_depth = be16_to_cpu(str->di_depth);
+	di->di_entries = be32_to_cpu(str->di_entries);
+
+	di->di_eattr = be64_to_cpu(str->di_eattr);
+}
+
 /**
  * gfs2_inode_refresh - Refresh the incore copy of the dinode
  * @ip: The GFS2 inode
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index edf87567bb..062a44f87f 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -56,7 +56,7 @@ static void gfs2_inum_print(const struct gfs2_inum_host *no)
 	printk(KERN_INFO "  no_addr = %llu\n", (unsigned long long)no->no_addr);
 }
 
-static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
+void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
 {
 	const struct gfs2_meta_header *str = buf;
 
@@ -155,40 +155,6 @@ void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
 	qu->qu_value = be64_to_cpu(str->qu_value);
 }
 
-void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
-{
-	struct gfs2_dinode_host *di = &ip->i_di;
-	const struct gfs2_dinode *str = buf;
-
-	gfs2_meta_header_in(&di->di_header, buf);
-	gfs2_inum_in(&di->di_num, &str->di_num);
-
-	di->di_mode = be32_to_cpu(str->di_mode);
-	di->di_uid = be32_to_cpu(str->di_uid);
-	di->di_gid = be32_to_cpu(str->di_gid);
-	di->di_nlink = be32_to_cpu(str->di_nlink);
-	di->di_size = be64_to_cpu(str->di_size);
-	di->di_blocks = be64_to_cpu(str->di_blocks);
-	di->di_atime = be64_to_cpu(str->di_atime);
-	di->di_mtime = be64_to_cpu(str->di_mtime);
-	di->di_ctime = be64_to_cpu(str->di_ctime);
-	di->di_major = be32_to_cpu(str->di_major);
-	di->di_minor = be32_to_cpu(str->di_minor);
-
-	di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
-	di->di_goal_data = be64_to_cpu(str->di_goal_data);
-	di->di_generation = be64_to_cpu(str->di_generation);
-
-	di->di_flags = be32_to_cpu(str->di_flags);
-	di->di_payload_format = be32_to_cpu(str->di_payload_format);
-	di->di_height = be16_to_cpu(str->di_height);
-
-	di->di_depth = be16_to_cpu(str->di_depth);
-	di->di_entries = be32_to_cpu(str->di_entries);
-
-	di->di_eattr = be64_to_cpu(str->di_eattr);
-}
-
 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 {
 	const struct gfs2_dinode_host *di = &ip->i_di;
-- 
cgit v1.2.2


From 4cc14f0b88bf3e0b508143e091eb5a8dff3e3b9c Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 31 Oct 2006 19:00:24 -0500
Subject: [GFS2] Change argument to gfs2_dinode_print

Change argument for gfs2_dinode_print in order to prepare
for removal of duplicate fields between struct inode and
struct gfs2_dinode_host.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c     | 8 ++++----
 fs/gfs2/ondisk.c    | 4 +++-
 fs/gfs2/ops_inode.c | 4 ++--
 3 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b39cfcfe92..4c5d286fef 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -269,7 +269,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
 
 	if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) {
 		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(&ip->i_di);
+			gfs2_dinode_print(ip);
 		return -EIO;
 	}
 	if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino)
@@ -289,7 +289,7 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip)
 
 	if (ip->i_di.di_blocks != 1) {
 		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(&ip->i_di);
+			gfs2_dinode_print(ip);
 		return -EIO;
 	}
 
@@ -359,7 +359,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
 	   bigger than the old one, we must have underflowed. */
 	if (diff < 0 && nlink > ip->i_di.di_nlink) {
 		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(&ip->i_di);
+			gfs2_dinode_print(ip);
 		return -EIO;
 	}
 
@@ -1010,7 +1010,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
 
 	if (ip->i_di.di_entries != 2) {
 		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(&ip->i_di);
+			gfs2_dinode_print(ip);
 		return -EIO;
 	}
 
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 062a44f87f..77bed44083 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -190,8 +190,10 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 
 }
 
-void gfs2_dinode_print(const struct gfs2_dinode_host *di)
+void gfs2_dinode_print(const struct gfs2_inode *ip)
 {
+	const struct gfs2_dinode_host *di = &ip->i_di;
+
 	gfs2_meta_header_print(&di->di_header);
 	gfs2_inum_print(&di->di_num);
 
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index bd268852c6..b2c2fe613d 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -467,7 +467,7 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
 
 	if (ip->i_di.di_entries < 2) {
 		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(&ip->i_di);
+			gfs2_dinode_print(ip);
 		error = -EIO;
 		goto out_gunlock;
 	}
@@ -640,7 +640,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		if (S_ISDIR(nip->i_di.di_mode)) {
 			if (nip->i_di.di_entries < 2) {
 				if (gfs2_consist_inode(nip))
-					gfs2_dinode_print(&nip->i_di);
+					gfs2_dinode_print(nip);
 				error = -EIO;
 				goto out_gunlock;
 			}
-- 
cgit v1.2.2


From af339c0241d0dd3b35f9097b4f4999bb22ffe502 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 1 Nov 2006 10:34:15 -0500
Subject: [GFS2] Shrink gfs2_inode (1) - di_header/di_num

The metadata header doesn't need to be stored in the incore
struct gfs2_inode since its constant, and this patch removes it.
Also, there is already a field for the inode's number in the
struct gfs2_inode, so we don't need one in struct gfs2_dinode_host
as well.

This saves 28 bytes of space in the struct gfs2_inode.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c  | 25 +++++++++++--------------
 fs/gfs2/ondisk.c | 22 +++++++++-------------
 2 files changed, 20 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 4c5d286fef..7ba05fc553 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -208,13 +208,18 @@ fail:
 	return ERR_PTR(error);
 }
 
-static void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
+static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 {
 	struct gfs2_dinode_host *di = &ip->i_di;
 	const struct gfs2_dinode *str = buf;
 
-	gfs2_meta_header_in(&di->di_header, buf);
-	gfs2_inum_in(&di->di_num, &str->di_num);
+	if (ip->i_num.no_addr != be64_to_cpu(str->di_num.no_addr)) {
+		if (gfs2_consist_inode(ip))
+			gfs2_dinode_print(ip);
+		return -EIO;
+	}
+	if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino))
+		return -ESTALE;
 
 	di->di_mode = be32_to_cpu(str->di_mode);
 	di->di_uid = be32_to_cpu(str->di_uid);
@@ -240,6 +245,7 @@ static void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	di->di_entries = be32_to_cpu(str->di_entries);
 
 	di->di_eattr = be64_to_cpu(str->di_eattr);
+	return 0;
 }
 
 /**
@@ -263,21 +269,12 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
 		return -EIO;
 	}
 
-	gfs2_dinode_in(ip, dibh->b_data);
+	error = gfs2_dinode_in(ip, dibh->b_data);
 
 	brelse(dibh);
-
-	if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) {
-		if (gfs2_consist_inode(ip))
-			gfs2_dinode_print(ip);
-		return -EIO;
-	}
-	if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino)
-		return -ESTALE;
-
 	ip->i_vn = ip->i_gl->gl_vn;
 
-	return 0;
+	return error;
 }
 
 int gfs2_dinode_dealloc(struct gfs2_inode *ip)
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 77bed44083..4bc590edb6 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -56,7 +56,7 @@ static void gfs2_inum_print(const struct gfs2_inum_host *no)
 	printk(KERN_INFO "  no_addr = %llu\n", (unsigned long long)no->no_addr);
 }
 
-void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
+static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
 {
 	const struct gfs2_meta_header *str = buf;
 
@@ -74,13 +74,6 @@ static void gfs2_meta_header_out(const struct gfs2_meta_header_host *mh, void *b
 	str->mh_format = cpu_to_be32(mh->mh_format);
 }
 
-static void gfs2_meta_header_print(const struct gfs2_meta_header_host *mh)
-{
-	pv(mh, mh_magic, "0x%.8X");
-	pv(mh, mh_type, "%u");
-	pv(mh, mh_format, "%u");
-}
-
 void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
 {
 	const struct gfs2_sb *str = buf;
@@ -160,8 +153,13 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	const struct gfs2_dinode_host *di = &ip->i_di;
 	struct gfs2_dinode *str = buf;
 
-	gfs2_meta_header_out(&di->di_header, buf);
-	gfs2_inum_out(&di->di_num, (char *)&str->di_num);
+	str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+	str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
+	str->di_header.__pad0 = 0;
+	str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
+	str->di_header.__pad1 = 0;
+
+	gfs2_inum_out(&ip->i_num, &str->di_num);
 
 	str->di_mode = cpu_to_be32(di->di_mode);
 	str->di_uid = cpu_to_be32(di->di_uid);
@@ -187,15 +185,13 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_entries = cpu_to_be32(di->di_entries);
 
 	str->di_eattr = cpu_to_be64(di->di_eattr);
-
 }
 
 void gfs2_dinode_print(const struct gfs2_inode *ip)
 {
 	const struct gfs2_dinode_host *di = &ip->i_di;
 
-	gfs2_meta_header_print(&di->di_header);
-	gfs2_inum_print(&di->di_num);
+	gfs2_inum_print(&ip->i_num);
 
 	pv(di, di_mode, "0%o");
 	pv(di, di_uid, "%u");
-- 
cgit v1.2.2


From e7f14f4d094ea1a9ce1953375f5bc1500c760c79 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 31 Oct 2006 21:45:08 -0500
Subject: [GFS2] Shrink gfs2_inode (2) - di_major/di_minor

This removes the device numbers from this structure by using
inode->i_rdev instead. It also cleans up the code in gfs2_mknod.
It results in shrinking the gfs2_inode by 8 bytes.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c     | 36 ++++++++++++++++--------------------
 fs/gfs2/inode.h     |  2 +-
 fs/gfs2/ondisk.c    |  4 ----
 fs/gfs2/ops_inode.c | 38 +++++---------------------------------
 4 files changed, 22 insertions(+), 58 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7ba05fc553..a995919565 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -51,17 +51,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 	struct gfs2_dinode_host *di = &ip->i_di;
 
 	inode->i_ino = ip->i_num.no_addr;
-
-	switch (di->di_mode & S_IFMT) {
-	case S_IFBLK:
-	case S_IFCHR:
-		inode->i_rdev = MKDEV(di->di_major, di->di_minor);
-		break;
-	default:
-		inode->i_rdev = 0;
-		break;
-	};
-
 	inode->i_mode = di->di_mode;
 	inode->i_nlink = di->di_nlink;
 	inode->i_uid = di->di_uid;
@@ -222,6 +211,15 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 		return -ESTALE;
 
 	di->di_mode = be32_to_cpu(str->di_mode);
+	ip->i_inode.i_rdev = 0;
+	switch (di->di_mode & S_IFMT) {
+	case S_IFBLK:
+	case S_IFCHR:
+		ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
+					   be32_to_cpu(str->di_minor));
+		break;
+	};
+
 	di->di_uid = be32_to_cpu(str->di_uid);
 	di->di_gid = be32_to_cpu(str->di_gid);
 	di->di_nlink = be32_to_cpu(str->di_nlink);
@@ -230,8 +228,6 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	di->di_atime = be64_to_cpu(str->di_atime);
 	di->di_mtime = be64_to_cpu(str->di_mtime);
 	di->di_ctime = be64_to_cpu(str->di_ctime);
-	di->di_major = be32_to_cpu(str->di_major);
-	di->di_minor = be32_to_cpu(str->di_minor);
 
 	di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
 	di->di_goal_data = be64_to_cpu(str->di_goal_data);
@@ -270,7 +266,6 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
 	}
 
 	error = gfs2_dinode_in(ip, dibh->b_data);
-
 	brelse(dibh);
 	ip->i_vn = ip->i_gl->gl_vn;
 
@@ -684,7 +679,7 @@ out:
 static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 			const struct gfs2_inum_host *inum, unsigned int mode,
 			unsigned int uid, unsigned int gid,
-			const u64 *generation)
+			const u64 *generation, dev_t dev)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct gfs2_dinode *di;
@@ -705,7 +700,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	di->di_size = cpu_to_be64(0);
 	di->di_blocks = cpu_to_be64(1);
 	di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
-	di->di_major = di->di_minor = cpu_to_be32(0);
+	di->di_major = cpu_to_be32(MAJOR(dev));
+	di->di_minor = cpu_to_be32(MINOR(dev));
 	di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
 	di->di_generation = cpu_to_be64(*generation);
 	di->di_flags = cpu_to_be32(0);
@@ -740,7 +736,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 
 static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 		       unsigned int mode, const struct gfs2_inum_host *inum,
-		       const u64 *generation)
+		       const u64 *generation, dev_t dev)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	unsigned int uid, gid;
@@ -761,7 +757,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	if (error)
 		goto out_quota;
 
-	init_dinode(dip, gl, inum, mode, uid, gid, generation);
+	init_dinode(dip, gl, inum, mode, uid, gid, generation, dev);
 	gfs2_quota_change(dip, +1, uid, gid);
 	gfs2_trans_end(sdp);
 
@@ -892,7 +888,7 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
  */
 
 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
-			   unsigned int mode)
+			   unsigned int mode, dev_t dev)
 {
 	struct inode *inode;
 	struct gfs2_inode *dip = ghs->gh_gl->gl_object;
@@ -950,7 +946,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 			goto fail_gunlock;
 	}
 
-	error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation);
+	error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev);
 	if (error)
 		goto fail_gunlock2;
 
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index d699b92097..33c9ea68f7 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -37,7 +37,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
 struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 			   int is_root, struct nameidata *nd);
 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
-			   unsigned int mode);
+			   unsigned int mode, dev_t dev);
 int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
 		struct gfs2_inode *ip);
 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 4bc590edb6..60dd943761 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -170,8 +170,6 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_atime = cpu_to_be64(di->di_atime);
 	str->di_mtime = cpu_to_be64(di->di_mtime);
 	str->di_ctime = cpu_to_be64(di->di_ctime);
-	str->di_major = cpu_to_be32(di->di_major);
-	str->di_minor = cpu_to_be32(di->di_minor);
 
 	str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
 	str->di_goal_data = cpu_to_be64(di->di_goal_data);
@@ -202,8 +200,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 	printk(KERN_INFO "  di_atime = %lld\n", (long long)di->di_atime);
 	printk(KERN_INFO "  di_mtime = %lld\n", (long long)di->di_mtime);
 	printk(KERN_INFO "  di_ctime = %lld\n", (long long)di->di_ctime);
-	pv(di, di_major, "%u");
-	pv(di, di_minor, "%u");
 
 	printk(KERN_INFO "  di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta);
 	printk(KERN_INFO "  di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index b2c2fe613d..c10b914bf8 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -59,7 +59,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
 	gfs2_holder_init(dip->i_gl, 0, 0, ghs);
 
 	for (;;) {
-		inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode);
+		inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0);
 		if (!IS_ERR(inode)) {
 			gfs2_trans_end(sdp);
 			if (dip->i_alloc.al_rgd)
@@ -326,7 +326,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
 
 	gfs2_holder_init(dip->i_gl, 0, 0, ghs);
 
-	inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO);
+	inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0);
 	if (IS_ERR(inode)) {
 		gfs2_holder_uninit(ghs);
 		return PTR_ERR(inode);
@@ -379,7 +379,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	gfs2_holder_init(dip->i_gl, 0, 0, ghs);
 
-	inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode);
+	inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0);
 	if (IS_ERR(inode)) {
 		gfs2_holder_uninit(ghs);
 		return PTR_ERR(inode);
@@ -504,47 +504,19 @@ out:
 static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
 		      dev_t dev)
 {
-	struct gfs2_inode *dip = GFS2_I(dir), *ip;
+	struct gfs2_inode *dip = GFS2_I(dir);
 	struct gfs2_sbd *sdp = GFS2_SB(dir);
 	struct gfs2_holder ghs[2];
 	struct inode *inode;
-	struct buffer_head *dibh;
-	u32 major = 0, minor = 0;
-	int error;
-
-	switch (mode & S_IFMT) {
-	case S_IFBLK:
-	case S_IFCHR:
-		major = MAJOR(dev);
-		minor = MINOR(dev);
-		break;
-	case S_IFIFO:
-	case S_IFSOCK:
-		break;
-	default:
-		return -EOPNOTSUPP;
-	};
 
 	gfs2_holder_init(dip->i_gl, 0, 0, ghs);
 
-	inode = gfs2_createi(ghs, &dentry->d_name, mode);
+	inode = gfs2_createi(ghs, &dentry->d_name, mode, dev);
 	if (IS_ERR(inode)) {
 		gfs2_holder_uninit(ghs);
 		return PTR_ERR(inode);
 	}
 
-	ip = ghs[1].gh_gl->gl_object;
-
-	ip->i_di.di_major = major;
-	ip->i_di.di_minor = minor;
-
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-
-	if (!gfs2_assert_withdraw(sdp, !error)) {
-		gfs2_dinode_out(ip, dibh->b_data);
-		brelse(dibh);
-	}
-
 	gfs2_trans_end(sdp);
 	if (dip->i_alloc.al_rgd)
 		gfs2_inplace_release(dip);
-- 
cgit v1.2.2


From b60623c238b6a819bd04090139704e2cb57a751f Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 1 Nov 2006 12:22:46 -0500
Subject: [GFS2] Shrink gfs2_inode (3) - di_mode

This removes the duplicate di_mode field in favour of using the
inode->i_mode field. This saves 4 bytes.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/acl.c         | 16 ++++++++--------
 fs/gfs2/bmap.c        |  2 +-
 fs/gfs2/eaops.c       |  2 +-
 fs/gfs2/eattr.c       |  8 ++++----
 fs/gfs2/glock.c       |  2 +-
 fs/gfs2/glops.c       |  8 ++++----
 fs/gfs2/inode.c       | 18 +++++++-----------
 fs/gfs2/inode.h       |  2 +-
 fs/gfs2/ondisk.c      |  3 +--
 fs/gfs2/ops_address.c |  1 -
 fs/gfs2/ops_dentry.c  |  2 +-
 fs/gfs2/ops_file.c    |  6 +++---
 fs/gfs2/ops_inode.c   | 17 ++++++++---------
 fs/gfs2/ops_super.c   |  2 +-
 14 files changed, 41 insertions(+), 48 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 906e403b05..87f6304f2f 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -76,9 +76,9 @@ int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
 		return -EOPNOTSUPP;
 	if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER))
 		return -EPERM;
-	if (S_ISLNK(ip->i_di.di_mode))
+	if (S_ISLNK(ip->i_inode.i_mode))
 		return -EOPNOTSUPP;
-	if (!access && !S_ISDIR(ip->i_di.di_mode))
+	if (!access && !S_ISDIR(ip->i_inode.i_mode))
 		return -EACCES;
 
 	return 0;
@@ -198,8 +198,8 @@ static int munge_mode(struct gfs2_inode *ip, mode_t mode)
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
 		gfs2_assert_withdraw(sdp,
-				(ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT));
-		ip->i_di.di_mode = mode;
+				(ip->i_inode.i_mode & S_IFMT) == (mode & S_IFMT));
+		ip->i_inode.i_mode = mode;
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@@ -215,12 +215,12 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct posix_acl *acl = NULL, *clone;
 	struct gfs2_ea_request er;
-	mode_t mode = ip->i_di.di_mode;
+	mode_t mode = ip->i_inode.i_mode;
 	int error;
 
 	if (!sdp->sd_args.ar_posix_acl)
 		return 0;
-	if (S_ISLNK(ip->i_di.di_mode))
+	if (S_ISLNK(ip->i_inode.i_mode))
 		return 0;
 
 	memset(&er, 0, sizeof(struct gfs2_ea_request));
@@ -232,7 +232,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
 		return error;
 	if (!acl) {
 		mode &= ~current->fs->umask;
-		if (mode != ip->i_di.di_mode)
+		if (mode != ip->i_inode.i_mode)
 			error = munge_mode(ip, mode);
 		return error;
 	}
@@ -244,7 +244,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
 	posix_acl_release(acl);
 	acl = clone;
 
-	if (S_ISDIR(ip->i_di.di_mode)) {
+	if (S_ISDIR(ip->i_inode.i_mode)) {
 		er.er_name = GFS2_POSIX_ACL_DEFAULT;
 		er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
 		error = gfs2_system_eaops.eo_set(ip, &er);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 8c092ab2b4..481a068825 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1109,7 +1109,7 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size)
 {
 	int error;
 
-	if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_di.di_mode)))
+	if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_inode.i_mode)))
 		return -EINVAL;
 
 	if (size > ip->i_di.di_size)
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
index 92c54e9b0d..cd747c00f6 100644
--- a/fs/gfs2/eaops.c
+++ b/fs/gfs2/eaops.c
@@ -120,7 +120,7 @@ static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
 
 	if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
 		if (!(er->er_flags & GFS2_ERF_MODE)) {
-			er->er_mode = ip->i_di.di_mode;
+			er->er_mode = ip->i_inode.i_mode;
 			er->er_flags |= GFS2_ERF_MODE;
 		}
 		error = gfs2_acl_validate_set(ip, 1, er,
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 9b7bb565b5..5208fa94aa 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -711,9 +711,9 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 	if (!error) {
 		if (er->er_flags & GFS2_ERF_MODE) {
 			gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
-					    (ip->i_di.di_mode & S_IFMT) ==
+					    (ip->i_inode.i_mode & S_IFMT) ==
 					    (er->er_mode & S_IFMT));
-			ip->i_di.di_mode = er->er_mode;
+			ip->i_inode.i_mode = er->er_mode;
 		}
 		ip->i_di.di_ctime = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
@@ -847,8 +847,8 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
 
 	if (er->er_flags & GFS2_ERF_MODE) {
 		gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
-			(ip->i_di.di_mode & S_IFMT) == (er->er_mode & S_IFMT));
-		ip->i_di.di_mode = er->er_mode;
+			(ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
+		ip->i_inode.i_mode = er->er_mode;
 	}
 	ip->i_di.di_ctime = get_seconds();
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 78fe0fae23..44633c4671 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -2078,7 +2078,7 @@ static int dump_inode(struct gfs2_inode *ip)
 	printk(KERN_INFO "    num = %llu %llu\n",
 		    (unsigned long long)ip->i_num.no_formal_ino,
 		    (unsigned long long)ip->i_num.no_addr);
-	printk(KERN_INFO "    type = %u\n", IF2DT(ip->i_di.di_mode));
+	printk(KERN_INFO "    type = %u\n", IF2DT(ip->i_inode.i_mode));
 	printk(KERN_INFO "    i_flags =");
 	for (x = 0; x < 32; x++)
 		if (test_bit(x, &ip->i_flags))
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 5406b19322..aad45b7a92 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -92,7 +92,7 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
 
 	ip = gl->gl_object;
 	inode = &ip->i_inode;
-	if (!ip || !S_ISREG(ip->i_di.di_mode))
+	if (!ip || !S_ISREG(inode->i_mode))
 		return;
 
 	if (!test_bit(GIF_PAGED, &ip->i_flags))
@@ -119,7 +119,7 @@ static void gfs2_page_inval(struct gfs2_glock *gl)
 
 	ip = gl->gl_object;
 	inode = &ip->i_inode;
-	if (!ip || !S_ISREG(ip->i_di.di_mode))
+	if (!ip || !S_ISREG(inode->i_mode))
 		return;
 
 	truncate_inode_pages(inode->i_mapping, 0);
@@ -142,7 +142,7 @@ static void gfs2_page_wait(struct gfs2_glock *gl)
 	struct address_space *mapping = inode->i_mapping;
 	int error;
 
-	if (!S_ISREG(ip->i_di.di_mode))
+	if (!S_ISREG(inode->i_mode))
 		return;
 
 	error = filemap_fdatawait(mapping);
@@ -164,7 +164,7 @@ static void gfs2_page_writeback(struct gfs2_glock *gl)
 	struct inode *inode = &ip->i_inode;
 	struct address_space *mapping = inode->i_mapping;
 
-	if (!S_ISREG(ip->i_di.di_mode))
+	if (!S_ISREG(inode->i_mode))
 		return;
 
 	filemap_fdatawrite(mapping);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index a995919565..de466043c9 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -51,7 +51,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 	struct gfs2_dinode_host *di = &ip->i_di;
 
 	inode->i_ino = ip->i_num.no_addr;
-	inode->i_mode = di->di_mode;
 	inode->i_nlink = di->di_nlink;
 	inode->i_uid = di->di_uid;
 	inode->i_gid = di->di_gid;
@@ -88,9 +87,6 @@ void gfs2_inode_attr_out(struct gfs2_inode *ip)
 {
 	struct inode *inode = &ip->i_inode;
 	struct gfs2_dinode_host *di = &ip->i_di;
-	gfs2_assert_withdraw(GFS2_SB(inode),
-		(di->di_mode & S_IFMT) == (inode->i_mode & S_IFMT));
-	di->di_mode = inode->i_mode;
 	di->di_uid = inode->i_uid;
 	di->di_gid = inode->i_gid;
 	di->di_atime = inode->i_atime.tv_sec;
@@ -210,9 +206,9 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino))
 		return -ESTALE;
 
-	di->di_mode = be32_to_cpu(str->di_mode);
+	ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
 	ip->i_inode.i_rdev = 0;
-	switch (di->di_mode & S_IFMT) {
+	switch (ip->i_inode.i_mode & S_IFMT) {
 	case S_IFBLK:
 	case S_IFCHR:
 		ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
@@ -620,7 +616,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
 			       unsigned int *uid, unsigned int *gid)
 {
 	if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir &&
-	    (dip->i_di.di_mode & S_ISUID) && dip->i_di.di_uid) {
+	    (dip->i_inode.i_mode & S_ISUID) && dip->i_di.di_uid) {
 		if (S_ISDIR(*mode))
 			*mode |= S_ISUID;
 		else if (dip->i_di.di_uid != current->fsuid)
@@ -629,7 +625,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
 	} else
 		*uid = current->fsuid;
 
-	if (dip->i_di.di_mode & S_ISGID) {
+	if (dip->i_inode.i_mode & S_ISGID) {
 		if (S_ISDIR(*mode))
 			*mode |= S_ISGID;
 		*gid = dip->i_di.di_gid;
@@ -810,7 +806,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 			goto fail_quota_locks;
 	}
 
-	error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_di.di_mode));
+	error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_inode.i_mode));
 	if (error)
 		goto fail_end_trans;
 
@@ -1053,7 +1049,7 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 	if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
 		return -EPERM;
 
-	if ((dip->i_di.di_mode & S_ISVTX) &&
+	if ((dip->i_inode.i_mode & S_ISVTX) &&
 	    dip->i_di.di_uid != current->fsuid &&
 	    ip->i_di.di_uid != current->fsuid && !capable(CAP_FOWNER))
 		return -EPERM;
@@ -1072,7 +1068,7 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 	if (!gfs2_inum_equal(&inum, &ip->i_num))
 		return -ENOENT;
 
-	if (IF2DT(ip->i_di.di_mode) != type) {
+	if (IF2DT(ip->i_inode.i_mode) != type) {
 		gfs2_consist_inode(dip);
 		return -EIO;
 	}
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 33c9ea68f7..69cbf98509 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -22,7 +22,7 @@ static inline int gfs2_is_jdata(struct gfs2_inode *ip)
 
 static inline int gfs2_is_dir(struct gfs2_inode *ip)
 {
-	return S_ISDIR(ip->i_di.di_mode);
+	return S_ISDIR(ip->i_inode.i_mode);
 }
 
 void gfs2_inode_attr_in(struct gfs2_inode *ip);
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 60dd943761..6b50a5731a 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -161,7 +161,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 
 	gfs2_inum_out(&ip->i_num, &str->di_num);
 
-	str->di_mode = cpu_to_be32(di->di_mode);
+	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
 	str->di_uid = cpu_to_be32(di->di_uid);
 	str->di_gid = cpu_to_be32(di->di_gid);
 	str->di_nlink = cpu_to_be32(di->di_nlink);
@@ -191,7 +191,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 
 	gfs2_inum_print(&ip->i_num);
 
-	pv(di, di_mode, "0%o");
 	pv(di, di_uid, "%u");
 	pv(di, di_gid, "%u");
 	pv(di, di_nlink, "%u");
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 015640b3f1..45a3d85b1d 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -498,7 +498,6 @@ static int gfs2_commit_write(struct file *file, struct page *page,
 		di->di_size = cpu_to_be64(inode->i_size);
 	}
 
-	di->di_mode = cpu_to_be32(inode->i_mode);
 	di->di_atime = cpu_to_be64(inode->i_atime.tv_sec);
 	di->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec);
 	di->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec);
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index c36f9e342e..d355899585 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -76,7 +76,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
 	if (!gfs2_inum_equal(&ip->i_num, &inum))
 		goto invalid_gunlock;
 
-	if (IF2DT(ip->i_di.di_mode) != type) {
+	if (IF2DT(ip->i_inode.i_mode) != type) {
 		gfs2_consist_inode(dip);
 		goto fail_gunlock;
 	}
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 7ea4175739..b52b9db1a2 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -425,7 +425,7 @@ static int gfs2_open(struct inode *inode, struct file *file)
 	gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
 	file->private_data = fp;
 
-	if (S_ISREG(ip->i_di.di_mode)) {
+	if (S_ISREG(ip->i_inode.i_mode)) {
 		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
 					   &i_gh);
 		if (error)
@@ -515,7 +515,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
 
 	if (!(fl->fl_flags & FL_POSIX))
 		return -ENOLCK;
-	if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+	if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
 		return -ENOLCK;
 
 	if (sdp->sd_args.ar_localflocks) {
@@ -617,7 +617,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
 
 	if (!(fl->fl_flags & FL_FLOCK))
 		return -ENOLCK;
-	if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+	if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
 		return -ENOLCK;
 
 	if (sdp->sd_args.ar_localflocks)
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index c10b914bf8..cf7a5bae39 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -144,7 +144,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	int alloc_required;
 	int error;
 
-	if (S_ISDIR(ip->i_di.di_mode))
+	if (S_ISDIR(inode->i_mode))
 		return -EPERM;
 
 	gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
@@ -220,7 +220,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	}
 
 	error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num,
-			     IF2DT(ip->i_di.di_mode));
+			     IF2DT(inode->i_mode));
 	if (error)
 		goto out_end_trans;
 
@@ -564,11 +564,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 
 	/* Make sure we aren't trying to move a dirctory into it's subdir */
 
-	if (S_ISDIR(ip->i_di.di_mode) && odip != ndip) {
+	if (S_ISDIR(ip->i_inode.i_mode) && odip != ndip) {
 		dir_rename = 1;
 
-		error = gfs2_glock_nq_init(sdp->sd_rename_gl,
-					   LM_ST_EXCLUSIVE, 0,
+		error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0,
 					   &r_gh);
 		if (error)
 			goto out;
@@ -609,7 +608,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		if (error)
 			goto out_gunlock;
 
-		if (S_ISDIR(nip->i_di.di_mode)) {
+		if (S_ISDIR(nip->i_inode.i_mode)) {
 			if (nip->i_di.di_entries < 2) {
 				if (gfs2_consist_inode(nip))
 					gfs2_dinode_print(nip);
@@ -646,7 +645,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 				error = -EFBIG;
 				goto out_gunlock;
 			}
-			if (S_ISDIR(ip->i_di.di_mode) &&
+			if (S_ISDIR(ip->i_inode.i_mode) &&
 			    ndip->i_di.di_nlink == (u32)-1) {
 				error = -EMLINK;
 				goto out_gunlock;
@@ -701,7 +700,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 	/* Remove the target file, if it exists */
 
 	if (nip) {
-		if (S_ISDIR(nip->i_di.di_mode))
+		if (S_ISDIR(nip->i_inode.i_mode))
 			error = gfs2_rmdiri(ndip, &ndentry->d_name, nip);
 		else {
 			error = gfs2_dir_del(ndip, &ndentry->d_name);
@@ -743,7 +742,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		goto out_end_trans;
 
 	error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num,
-			     IF2DT(ip->i_di.di_mode));
+			     IF2DT(ip->i_inode.i_mode));
 	if (error)
 		goto out_end_trans;
 
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 9c786d1e70..8635175692 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -407,7 +407,7 @@ static void gfs2_delete_inode(struct inode *inode)
 	if (error)
 		goto out_uninit;
 
-	if (S_ISDIR(ip->i_di.di_mode) &&
+	if (S_ISDIR(inode->i_mode) &&
 	    (ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
 		error = gfs2_dir_exhash_dealloc(ip);
 		if (error)
-- 
cgit v1.2.2


From 2933f9254a6af33db25270778c998a42029da668 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 1 Nov 2006 13:23:29 -0500
Subject: [GFS2] Shrink gfs2_inode (4) - di_uid/di_gid

Remove duplicate di_uid/di_gid fields in favour of using
inode->i_uid/inode->i_gid instead. This saves 8 bytes.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/acl.c         |  2 +-
 fs/gfs2/bmap.c        |  2 +-
 fs/gfs2/eattr.c       |  2 +-
 fs/gfs2/inode.c       | 23 +++++++++--------------
 fs/gfs2/ondisk.c      |  6 ++----
 fs/gfs2/ops_address.c |  2 +-
 fs/gfs2/ops_inode.c   | 10 ++++------
 fs/gfs2/ops_vm.c      |  2 +-
 fs/gfs2/quota.c       |  8 ++++----
 fs/gfs2/rgrp.c        | 11 +++++------
 10 files changed, 29 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 87f6304f2f..3908992b27 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -74,7 +74,7 @@ int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
 {
 	if (!GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl)
 		return -EOPNOTSUPP;
-	if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER))
+	if (current->fsuid != ip->i_inode.i_uid && !capable(CAP_FOWNER))
 		return -EPERM;
 	if (S_ISLNK(ip->i_inode.i_mode))
 		return -EOPNOTSUPP;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 481a068825..0c913eecf8 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -819,7 +819,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
 	if (error)
 		goto out;
 
-	error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+	error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	if (error)
 		goto out_gunlock_q;
 
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 5208fa94aa..935cc9a571 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -687,7 +687,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 	if (error)
 		goto out;
 
-	error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+	error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	if (error)
 		goto out_gunlock_q;
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index de466043c9..0de9b22f45 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -52,8 +52,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 
 	inode->i_ino = ip->i_num.no_addr;
 	inode->i_nlink = di->di_nlink;
-	inode->i_uid = di->di_uid;
-	inode->i_gid = di->di_gid;
 	i_size_write(inode, di->di_size);
 	inode->i_atime.tv_sec = di->di_atime;
 	inode->i_mtime.tv_sec = di->di_mtime;
@@ -87,8 +85,6 @@ void gfs2_inode_attr_out(struct gfs2_inode *ip)
 {
 	struct inode *inode = &ip->i_inode;
 	struct gfs2_dinode_host *di = &ip->i_di;
-	di->di_uid = inode->i_uid;
-	di->di_gid = inode->i_gid;
 	di->di_atime = inode->i_atime.tv_sec;
 	di->di_mtime = inode->i_mtime.tv_sec;
 	di->di_ctime = inode->i_ctime.tv_sec;
@@ -216,8 +212,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 		break;
 	};
 
-	di->di_uid = be32_to_cpu(str->di_uid);
-	di->di_gid = be32_to_cpu(str->di_gid);
+	ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
+	ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
 	di->di_nlink = be32_to_cpu(str->di_nlink);
 	di->di_size = be64_to_cpu(str->di_size);
 	di->di_blocks = be64_to_cpu(str->di_blocks);
@@ -616,19 +612,19 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
 			       unsigned int *uid, unsigned int *gid)
 {
 	if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir &&
-	    (dip->i_inode.i_mode & S_ISUID) && dip->i_di.di_uid) {
+	    (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) {
 		if (S_ISDIR(*mode))
 			*mode |= S_ISUID;
-		else if (dip->i_di.di_uid != current->fsuid)
+		else if (dip->i_inode.i_uid != current->fsuid)
 			*mode &= ~07111;
-		*uid = dip->i_di.di_uid;
+		*uid = dip->i_inode.i_uid;
 	} else
 		*uid = current->fsuid;
 
 	if (dip->i_inode.i_mode & S_ISGID) {
 		if (S_ISDIR(*mode))
 			*mode |= S_ISGID;
-		*gid = dip->i_di.di_gid;
+		*gid = dip->i_inode.i_gid;
 	} else
 		*gid = current->fsgid;
 }
@@ -783,8 +779,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 	if (alloc_required < 0)
 		goto fail;
 	if (alloc_required) {
-		error = gfs2_quota_check(dip, dip->i_di.di_uid,
-					 dip->i_di.di_gid);
+		error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid);
 		if (error)
 			goto fail_quota_locks;
 
@@ -1050,8 +1045,8 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 		return -EPERM;
 
 	if ((dip->i_inode.i_mode & S_ISVTX) &&
-	    dip->i_di.di_uid != current->fsuid &&
-	    ip->i_di.di_uid != current->fsuid && !capable(CAP_FOWNER))
+	    dip->i_inode.i_uid != current->fsuid &&
+	    ip->i_inode.i_uid != current->fsuid && !capable(CAP_FOWNER))
 		return -EPERM;
 
 	if (IS_APPEND(&dip->i_inode))
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 6b50a5731a..e224f6a226 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -162,8 +162,8 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	gfs2_inum_out(&ip->i_num, &str->di_num);
 
 	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
-	str->di_uid = cpu_to_be32(di->di_uid);
-	str->di_gid = cpu_to_be32(di->di_gid);
+	str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
+	str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
 	str->di_nlink = cpu_to_be32(di->di_nlink);
 	str->di_size = cpu_to_be64(di->di_size);
 	str->di_blocks = cpu_to_be64(di->di_blocks);
@@ -191,8 +191,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 
 	gfs2_inum_print(&ip->i_num);
 
-	pv(di, di_uid, "%u");
-	pv(di, di_gid, "%u");
 	pv(di, di_nlink, "%u");
 	printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
 	printk(KERN_INFO "  di_blocks = %llu\n", (unsigned long long)di->di_blocks);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 45a3d85b1d..38b702a182 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -386,7 +386,7 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
 		if (error)
 			goto out_alloc_put;
 
-		error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+		error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
 		if (error)
 			goto out_qunlock;
 
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index cf7a5bae39..efbcec3311 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -196,8 +196,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 		if (error)
 			goto out_alloc;
 
-		error = gfs2_quota_check(dip, dip->i_di.di_uid,
-					 dip->i_di.di_gid);
+		error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid);
 		if (error)
 			goto out_gunlock_q;
 
@@ -673,8 +672,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		if (error)
 			goto out_alloc;
 
-		error = gfs2_quota_check(ndip, ndip->i_di.di_uid,
-					 ndip->i_di.di_gid);
+		error = gfs2_quota_check(ndip, ndip->i_inode.i_uid, ndip->i_inode.i_gid);
 		if (error)
 			goto out_gunlock_q;
 
@@ -885,8 +883,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 	u32 ouid, ogid, nuid, ngid;
 	int error;
 
-	ouid = ip->i_di.di_uid;
-	ogid = ip->i_di.di_gid;
+	ouid = inode->i_uid;
+	ogid = inode->i_gid;
 	nuid = attr->ia_uid;
 	ngid = attr->ia_gid;
 
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
index 5453d2947a..45a5f11fc3 100644
--- a/fs/gfs2/ops_vm.c
+++ b/fs/gfs2/ops_vm.c
@@ -76,7 +76,7 @@ static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
 	if (error)
 		goto out;
 
-	error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+	error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	if (error)
 		goto out_gunlock_q;
 
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 5d00e9b209..d0db881b55 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -452,19 +452,19 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
 	if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
 		return 0;
 
-	error = qdsb_get(sdp, QUOTA_USER, ip->i_di.di_uid, CREATE, qd);
+	error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, CREATE, qd);
 	if (error)
 		goto out;
 	al->al_qd_num++;
 	qd++;
 
-	error = qdsb_get(sdp, QUOTA_GROUP, ip->i_di.di_gid, CREATE, qd);
+	error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, CREATE, qd);
 	if (error)
 		goto out;
 	al->al_qd_num++;
 	qd++;
 
-	if (uid != NO_QUOTA_CHANGE && uid != ip->i_di.di_uid) {
+	if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) {
 		error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd);
 		if (error)
 			goto out;
@@ -472,7 +472,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
 		qd++;
 	}
 
-	if (gid != NO_QUOTA_CHANGE && gid != ip->i_di.di_gid) {
+	if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) {
 		error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd);
 		if (error)
 			goto out;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 07dfd63050..ff0846528d 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1217,7 +1217,7 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip)
 	al->al_alloced++;
 
 	gfs2_statfs_change(sdp, 0, -1, 0);
-	gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
+	gfs2_quota_change(ip, +1, ip->i_inode.i_uid, ip->i_inode.i_gid);
 
 	spin_lock(&sdp->sd_rindex_spin);
 	rgd->rd_free_clone--;
@@ -1261,7 +1261,7 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip)
 	al->al_alloced++;
 
 	gfs2_statfs_change(sdp, 0, -1, 0);
-	gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
+	gfs2_quota_change(ip, +1, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	gfs2_trans_add_unrevoke(sdp, block);
 
 	spin_lock(&sdp->sd_rindex_spin);
@@ -1337,8 +1337,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
 	gfs2_trans_add_rg(rgd);
 
 	gfs2_statfs_change(sdp, 0, +blen, 0);
-	gfs2_quota_change(ip, -(s64)blen,
-			 ip->i_di.di_uid, ip->i_di.di_gid);
+	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
 }
 
 /**
@@ -1366,7 +1365,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
 	gfs2_trans_add_rg(rgd);
 
 	gfs2_statfs_change(sdp, 0, +blen, 0);
-	gfs2_quota_change(ip, -(s64)blen, ip->i_di.di_uid, ip->i_di.di_gid);
+	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	gfs2_meta_wipe(ip, bstart, blen);
 }
 
@@ -1411,7 +1410,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
 	gfs2_free_uninit_di(rgd, ip->i_num.no_addr);
-	gfs2_quota_change(ip, -1, ip->i_di.di_uid, ip->i_di.di_gid);
+	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
 	gfs2_meta_wipe(ip, ip->i_num.no_addr, 1);
 }
 
-- 
cgit v1.2.2


From 4f56110a00af5fb2e22fbccfcaf944d62cae8fcf Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 1 Nov 2006 14:04:17 -0500
Subject: [GFS2] Shrink gfs2_inode (5) - di_nlink

Remove the di_nlink field in favour of inode->i_nlink and
update the nlink handling to use the proper macros. This
saves 4 bytes.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c     | 37 ++++++++++++++++++++++++-------------
 fs/gfs2/ondisk.c    |  3 +--
 fs/gfs2/ops_inode.c | 12 ++++++------
 3 files changed, 31 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 0de9b22f45..7112039848 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -51,7 +51,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 	struct gfs2_dinode_host *di = &ip->i_di;
 
 	inode->i_ino = ip->i_num.no_addr;
-	inode->i_nlink = di->di_nlink;
 	i_size_write(inode, di->di_size);
 	inode->i_atime.tv_sec = di->di_atime;
 	inode->i_mtime.tv_sec = di->di_mtime;
@@ -214,7 +213,12 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 
 	ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
 	ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
-	di->di_nlink = be32_to_cpu(str->di_nlink);
+	/*
+	 * We will need to review setting the nlink count here in the
+	 * light of the forthcoming ro bind mount work. This is a reminder
+	 * to do that.
+	 */
+	ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
 	di->di_size = be64_to_cpu(str->di_size);
 	di->di_blocks = be64_to_cpu(str->di_blocks);
 	di->di_atime = be64_to_cpu(str->di_atime);
@@ -336,12 +340,12 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
 	u32 nlink;
 	int error;
 
-	BUG_ON(ip->i_di.di_nlink != ip->i_inode.i_nlink);
-	nlink = ip->i_di.di_nlink + diff;
+	BUG_ON(diff != 1 && diff != -1);
+	nlink = ip->i_inode.i_nlink + diff;
 
 	/* If we are reducing the nlink count, but the new value ends up being
 	   bigger than the old one, we must have underflowed. */
-	if (diff < 0 && nlink > ip->i_di.di_nlink) {
+	if (diff < 0 && nlink > ip->i_inode.i_nlink) {
 		if (gfs2_consist_inode(ip))
 			gfs2_dinode_print(ip);
 		return -EIO;
@@ -351,16 +355,19 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
 	if (error)
 		return error;
 
-	ip->i_di.di_nlink = nlink;
+	if (diff > 0)
+		inc_nlink(&ip->i_inode);
+	else
+		drop_nlink(&ip->i_inode);
+
 	ip->i_di.di_ctime = get_seconds();
-	ip->i_inode.i_nlink = nlink;
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
 	mark_inode_dirty(&ip->i_inode);
 
-	if (ip->i_di.di_nlink == 0) {
+	if (ip->i_inode.i_nlink == 0) {
 		struct gfs2_rgrpd *rgd;
 		struct gfs2_holder ri_gh, rg_gh;
 
@@ -375,7 +382,6 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
 		if (error)
 			goto out_norgrp;
 
-		clear_nlink(&ip->i_inode);
 		gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
 		gfs2_glock_dq_uninit(&rg_gh);
 out_norgrp:
@@ -586,7 +592,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
 		return error;
 
 	/*  Don't create entries in an unlinked directory  */
-	if (!dip->i_di.di_nlink)
+	if (!dip->i_inode.i_nlink)
 		return -EPERM;
 
 	error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL);
@@ -602,7 +608,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
 
 	if (dip->i_di.di_entries == (u32)-1)
 		return -EFBIG;
-	if (S_ISDIR(mode) && dip->i_di.di_nlink == (u32)-1)
+	if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1)
 		return -EMLINK;
 
 	return 0;
@@ -808,7 +814,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (error)
 		goto fail_end_trans;
-	ip->i_di.di_nlink = 1;
+	ip->i_inode.i_nlink = 1;
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
@@ -1016,7 +1022,12 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
 	if (error)
 		return error;
 
-	error = gfs2_change_nlink(ip, -2);
+	/* It looks odd, but it really should be done twice */
+	error = gfs2_change_nlink(ip, -1);
+	if (error)
+		return error;
+
+	error = gfs2_change_nlink(ip, -1);
 	if (error)
 		return error;
 
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index e224f6a226..b4e354b188 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -164,7 +164,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
 	str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
 	str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
-	str->di_nlink = cpu_to_be32(di->di_nlink);
+	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
 	str->di_size = cpu_to_be64(di->di_size);
 	str->di_blocks = cpu_to_be64(di->di_blocks);
 	str->di_atime = cpu_to_be64(di->di_atime);
@@ -191,7 +191,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 
 	gfs2_inum_print(&ip->i_num);
 
-	pv(di, di_nlink, "%u");
 	printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
 	printk(KERN_INFO "  di_blocks = %llu\n", (unsigned long long)di->di_blocks);
 	printk(KERN_INFO "  di_atime = %lld\n", (long long)di->di_atime);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index efbcec3311..06176dee15 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -169,7 +169,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	}
 
 	error = -EINVAL;
-	if (!dip->i_di.di_nlink)
+	if (!dip->i_inode.i_nlink)
 		goto out_gunlock;
 	error = -EFBIG;
 	if (dip->i_di.di_entries == (u32)-1)
@@ -178,10 +178,10 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
 		goto out_gunlock;
 	error = -EINVAL;
-	if (!ip->i_di.di_nlink)
+	if (!ip->i_inode.i_nlink)
 		goto out_gunlock;
 	error = -EMLINK;
-	if (ip->i_di.di_nlink == (u32)-1)
+	if (ip->i_inode.i_nlink == (u32)-1)
 		goto out_gunlock;
 
 	alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
@@ -386,7 +386,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
 	ip = ghs[1].gh_gl->gl_object;
 
-	ip->i_di.di_nlink = 2;
+	ip->i_inode.i_nlink = 2;
 	ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
 	ip->i_di.di_flags |= GFS2_DIF_JDATA;
 	ip->i_di.di_payload_format = GFS2_FORMAT_DE;
@@ -636,7 +636,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		};
 
 		if (odip != ndip) {
-			if (!ndip->i_di.di_nlink) {
+			if (!ndip->i_inode.i_nlink) {
 				error = -EINVAL;
 				goto out_gunlock;
 			}
@@ -645,7 +645,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 				goto out_gunlock;
 			}
 			if (S_ISDIR(ip->i_inode.i_mode) &&
-			    ndip->i_di.di_nlink == (u32)-1) {
+			    ndip->i_inode.i_nlink == (u32)-1) {
 				error = -EMLINK;
 				goto out_gunlock;
 			}
-- 
cgit v1.2.2


From 1a7b1eed5802502fd649e04784becd58557fdcf1 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 1 Nov 2006 14:35:17 -0500
Subject: [GFS2] Shrink gfs2_inode (6) - di_atime/di_mtime/di_ctime

Remove the di_[amc]time fields and use inode->i_[amc]time
fields instead. This saves 24 bytes from the gfs2_inode.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/bmap.c        | 10 +++++-----
 fs/gfs2/dir.c         | 10 +++++-----
 fs/gfs2/eattr.c       |  9 ++++-----
 fs/gfs2/inode.c       | 44 +++++++++++---------------------------------
 fs/gfs2/inode.h       |  1 -
 fs/gfs2/ondisk.c      | 10 +++-------
 fs/gfs2/ops_address.c |  4 ----
 fs/gfs2/ops_inode.c   |  3 +--
 8 files changed, 29 insertions(+), 62 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 0c913eecf8..692d4a3da1 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -778,7 +778,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 			gfs2_free_data(ip, bstart, blen);
 	}
 
-	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+	ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
 
 	gfs2_dinode_out(ip, dibh->b_data);
 
@@ -853,7 +853,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
 	}
 
 	ip->i_di.di_size = size;
-	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+	ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (error)
@@ -968,7 +968,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
 
 	if (gfs2_is_stuffed(ip)) {
 		ip->i_di.di_size = size;
-		ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+		ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
@@ -980,7 +980,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
 
 		if (!error) {
 			ip->i_di.di_size = size;
-			ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+			ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
 			ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
 			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 			gfs2_dinode_out(ip, dibh->b_data);
@@ -1053,7 +1053,7 @@ static int trunc_end(struct gfs2_inode *ip)
 			ip->i_num.no_addr;
 		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 	}
-	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+	ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
 	ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 0742761e1e..ca23c8beb3 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -131,7 +131,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
 	memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
 	if (ip->i_di.di_size < offset + size)
 		ip->i_di.di_size = offset + size;
-	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+	ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
 	gfs2_dinode_out(ip, dibh->b_data);
 
 	brelse(dibh);
@@ -229,7 +229,7 @@ out:
 
 	if (ip->i_di.di_size < offset + copied)
 		ip->i_di.di_size = offset + copied;
-	ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+	ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
@@ -1560,7 +1560,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
 				break;
 			gfs2_trans_add_bh(ip->i_gl, bh, 1);
 			ip->i_di.di_entries++;
-			ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
+			ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
 			gfs2_dinode_out(ip, bh->b_data);
 			brelse(bh);
 			error = 0;
@@ -1646,7 +1646,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
 		gfs2_consist_inode(dip);
 	gfs2_trans_add_bh(dip->i_gl, bh, 1);
 	dip->i_di.di_entries--;
-	dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
+	dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds();
 	gfs2_dinode_out(dip, bh->b_data);
 	brelse(bh);
 	mark_inode_dirty(&dip->i_inode);
@@ -1694,7 +1694,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 		gfs2_trans_add_bh(dip->i_gl, bh, 1);
 	}
 
-	dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
+	dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds();
 	gfs2_dinode_out(dip, bh->b_data);
 	brelse(bh);
 	return 0;
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 935cc9a571..7dde84775b 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -300,7 +300,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
-		ip->i_di.di_ctime = get_seconds();
+		ip->i_inode.i_ctime.tv_sec = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@@ -715,7 +715,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 					    (er->er_mode & S_IFMT));
 			ip->i_inode.i_mode = er->er_mode;
 		}
-		ip->i_di.di_ctime = get_seconds();
+		ip->i_inode.i_ctime.tv_sec = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@@ -850,7 +850,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
 			(ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
 		ip->i_inode.i_mode = er->er_mode;
 	}
-	ip->i_di.di_ctime = get_seconds();
+	ip->i_inode.i_ctime.tv_sec = get_seconds();
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);
@@ -1130,7 +1130,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
-		ip->i_di.di_ctime = get_seconds();
+		ip->i_inode.i_ctime.tv_sec = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@@ -1285,7 +1285,6 @@ int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
 	if (!error) {
 		error = inode_setattr(&ip->i_inode, attr);
 		gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
-		gfs2_inode_attr_out(ip);
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7112039848..c22ae3c3a4 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -52,12 +52,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 
 	inode->i_ino = ip->i_num.no_addr;
 	i_size_write(inode, di->di_size);
-	inode->i_atime.tv_sec = di->di_atime;
-	inode->i_mtime.tv_sec = di->di_mtime;
-	inode->i_ctime.tv_sec = di->di_ctime;
-	inode->i_atime.tv_nsec = 0;
-	inode->i_mtime.tv_nsec = 0;
-	inode->i_ctime.tv_nsec = 0;
 	inode->i_blocks = di->di_blocks <<
 		(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
 
@@ -72,23 +66,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 		inode->i_flags &= ~S_APPEND;
 }
 
-/**
- * gfs2_inode_attr_out - Copy attributes from VFS inode into the dinode
- * @ip: The GFS2 inode
- *
- * Only copy out the attributes that we want the VFS layer
- * to be able to modify.
- */
-
-void gfs2_inode_attr_out(struct gfs2_inode *ip)
-{
-	struct inode *inode = &ip->i_inode;
-	struct gfs2_dinode_host *di = &ip->i_di;
-	di->di_atime = inode->i_atime.tv_sec;
-	di->di_mtime = inode->i_mtime.tv_sec;
-	di->di_ctime = inode->i_ctime.tv_sec;
-}
-
 static int iget_test(struct inode *inode, void *opaque)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
@@ -221,9 +198,12 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
 	di->di_size = be64_to_cpu(str->di_size);
 	di->di_blocks = be64_to_cpu(str->di_blocks);
-	di->di_atime = be64_to_cpu(str->di_atime);
-	di->di_mtime = be64_to_cpu(str->di_mtime);
-	di->di_ctime = be64_to_cpu(str->di_ctime);
+	ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
+	ip->i_inode.i_atime.tv_nsec = 0;
+	ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
+	ip->i_inode.i_mtime.tv_nsec = 0;
+	ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
+	ip->i_inode.i_ctime.tv_nsec = 0;
 
 	di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
 	di->di_goal_data = be64_to_cpu(str->di_goal_data);
@@ -360,7 +340,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
 	else
 		drop_nlink(&ip->i_inode);
 
-	ip->i_di.di_ctime = get_seconds();
+	ip->i_inode.i_ctime.tv_sec = get_seconds();
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
@@ -1224,7 +1204,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
 		return 0;
 
 	curtime = get_seconds();
-	if (curtime - ip->i_di.di_atime >= quantum) {
+	if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
 		gfs2_glock_dq(gh);
 		gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
 				   gh);
@@ -1236,7 +1216,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
 		   trying to get exclusive lock. */
 
 		curtime = get_seconds();
-		if (curtime - ip->i_di.di_atime >= quantum) {
+		if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
 			struct buffer_head *dibh;
 			struct gfs2_dinode *di;
 
@@ -1250,11 +1230,11 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
 			if (error)
 				goto fail_end_trans;
 
-			ip->i_di.di_atime = curtime;
+			ip->i_inode.i_atime.tv_sec = curtime;
 
 			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 			di = (struct gfs2_dinode *)dibh->b_data;
-			di->di_atime = cpu_to_be64(ip->i_di.di_atime);
+			di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
 			brelse(dibh);
 
 			gfs2_trans_end(sdp);
@@ -1375,8 +1355,6 @@ __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 	if (!error) {
 		error = inode_setattr(&ip->i_inode, attr);
 		gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
-		gfs2_inode_attr_out(ip);
-
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 69cbf98509..54d584eddf 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -26,7 +26,6 @@ static inline int gfs2_is_dir(struct gfs2_inode *ip)
 }
 
 void gfs2_inode_attr_in(struct gfs2_inode *ip);
-void gfs2_inode_attr_out(struct gfs2_inode *ip);
 struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type);
 struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum);
 
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index b4e354b188..82003e872a 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -167,9 +167,9 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
 	str->di_size = cpu_to_be64(di->di_size);
 	str->di_blocks = cpu_to_be64(di->di_blocks);
-	str->di_atime = cpu_to_be64(di->di_atime);
-	str->di_mtime = cpu_to_be64(di->di_mtime);
-	str->di_ctime = cpu_to_be64(di->di_ctime);
+	str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+	str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
+	str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
 
 	str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
 	str->di_goal_data = cpu_to_be64(di->di_goal_data);
@@ -193,10 +193,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 
 	printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
 	printk(KERN_INFO "  di_blocks = %llu\n", (unsigned long long)di->di_blocks);
-	printk(KERN_INFO "  di_atime = %lld\n", (long long)di->di_atime);
-	printk(KERN_INFO "  di_mtime = %lld\n", (long long)di->di_mtime);
-	printk(KERN_INFO "  di_ctime = %lld\n", (long long)di->di_ctime);
-
 	printk(KERN_INFO "  di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta);
 	printk(KERN_INFO "  di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
 
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 38b702a182..5c3962c80e 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -498,10 +498,6 @@ static int gfs2_commit_write(struct file *file, struct page *page,
 		di->di_size = cpu_to_be64(inode->i_size);
 	}
 
-	di->di_atime = cpu_to_be64(inode->i_atime.tv_sec);
-	di->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec);
-	di->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec);
-
 	brelse(dibh);
 	gfs2_trans_end(sdp);
 	if (al->al_requested) {
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 06176dee15..585b43a94a 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -729,7 +729,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 		error = gfs2_meta_inode_buffer(ip, &dibh);
 		if (error)
 			goto out_end_trans;
-		ip->i_di.di_ctime = get_seconds();
+		ip->i_inode.i_ctime.tv_sec = get_seconds();
 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 		gfs2_dinode_out(ip, dibh->b_data);
 		brelse(dibh);
@@ -915,7 +915,6 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 
 	error = inode_setattr(inode, attr);
 	gfs2_assert_warn(sdp, !error);
-	gfs2_inode_attr_out(ip);
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
-- 
cgit v1.2.2


From a9583c7983cbba9726bfe64ee46613d654fc9e26 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 1 Nov 2006 20:09:14 -0500
Subject: [GFS2] Shrink gfs2_inode (7) - di_payload_format

This is almost never used. Its there for backward
compatibility with GFS1. It doesn't need its own
field since it can always be calculated from the
inode mode & flags. This saves a bit more space
in the gfs2_inode.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/dir.c       | 1 -
 fs/gfs2/inode.c     | 3 +--
 fs/gfs2/ondisk.c    | 6 +++---
 fs/gfs2/ops_inode.c | 1 -
 4 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index ca23c8beb3..c82d7cb4a6 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -902,7 +902,6 @@ static int dir_make_exhash(struct inode *inode)
 	dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
 	dip->i_di.di_blocks++;
 	dip->i_di.di_flags |= GFS2_DIF_EXHASH;
-	dip->i_di.di_payload_format = 0;
 
 	for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
 	dip->i_di.di_depth = y;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index c22ae3c3a4..f6177fc683 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -210,7 +210,6 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	di->di_generation = be64_to_cpu(str->di_generation);
 
 	di->di_flags = be32_to_cpu(str->di_flags);
-	di->di_payload_format = be32_to_cpu(str->di_payload_format);
 	di->di_height = be16_to_cpu(str->di_height);
 
 	di->di_depth = be16_to_cpu(str->di_depth);
@@ -699,7 +698,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	}
 
 	di->__pad1 = 0;
-	di->di_payload_format = cpu_to_be32(0);
+	di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0);
 	di->di_height = cpu_to_be32(0);
 	di->__pad2 = 0;
 	di->__pad3 = 0;
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index 82003e872a..b2baba5c50 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -176,9 +176,10 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_generation = cpu_to_be64(di->di_generation);
 
 	str->di_flags = cpu_to_be32(di->di_flags);
-	str->di_payload_format = cpu_to_be32(di->di_payload_format);
 	str->di_height = cpu_to_be16(di->di_height);
-
+	str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
+					     !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
+					     GFS2_FORMAT_DE : 0);
 	str->di_depth = cpu_to_be16(di->di_depth);
 	str->di_entries = cpu_to_be32(di->di_entries);
 
@@ -197,7 +198,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip)
 	printk(KERN_INFO "  di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
 
 	pv(di, di_flags, "0x%.8X");
-	pv(di, di_payload_format, "%u");
 	pv(di, di_height, "%u");
 
 	pv(di, di_depth, "%u");
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 585b43a94a..0e4eade47b 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -389,7 +389,6 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	ip->i_inode.i_nlink = 2;
 	ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
 	ip->i_di.di_flags |= GFS2_DIF_JDATA;
-	ip->i_di.di_payload_format = GFS2_FORMAT_DE;
 	ip->i_di.di_entries = 2;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
-- 
cgit v1.2.2


From bfded27ba010d1c3b0aa3843f97dc9b80de751be Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 1 Nov 2006 16:05:38 -0500
Subject: [GFS2] Shrink gfs2_inode (8) - i_vn

This shrinks the size of the gfs2_inode by 8 bytes by
replacing the version counter with a one bit valid/invalid
flag.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glops.c     | 5 +++--
 fs/gfs2/incore.h    | 2 +-
 fs/gfs2/inode.c     | 4 ++--
 fs/gfs2/ops_inode.c | 2 +-
 4 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index aad45b7a92..9c2033714b 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -305,8 +305,9 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
 	int data = (flags & DIO_DATA);
 
 	if (meta) {
+		struct gfs2_inode *ip = gl->gl_object;
 		gfs2_meta_inval(gl);
-		gl->gl_vn++;
+		set_bit(GIF_INVALID, &ip->i_flags);
 	}
 	if (data)
 		gfs2_page_inval(gl);
@@ -351,7 +352,7 @@ static int inode_go_lock(struct gfs2_holder *gh)
 	if (!ip)
 		return 0;
 
-	if (ip->i_vn != gl->gl_vn) {
+	if (test_bit(GIF_INVALID, &ip->i_flags)) {
 		error = gfs2_inode_refresh(ip);
 		if (error)
 			return error;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c0a8c3b6a8..227a74dc5e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -217,6 +217,7 @@ struct gfs2_alloc {
 };
 
 enum {
+	GIF_INVALID		= 0,
 	GIF_QD_LOCKED		= 1,
 	GIF_PAGED		= 2,
 	GIF_SW_PAGED		= 3,
@@ -228,7 +229,6 @@ struct gfs2_inode {
 
 	unsigned long i_flags;		/* GIF_... */
 
-	u64 i_vn;
 	struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
 
 	struct gfs2_glock *i_gl; /* Move into i_gh? */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index f6177fc683..e467780837 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -145,7 +145,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i
 		if (unlikely(error))
 			goto fail_put;
 
-		ip->i_vn = ip->i_gl->gl_vn - 1;
+		set_bit(GIF_INVALID, &ip->i_flags);
 		error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
 		if (unlikely(error))
 			goto fail_iopen;
@@ -242,7 +242,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
 
 	error = gfs2_dinode_in(ip, dibh->b_data);
 	brelse(dibh);
-	ip->i_vn = ip->i_gl->gl_vn;
+	clear_bit(GIF_INVALID, &ip->i_flags);
 
 	return error;
 }
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 0e4eade47b..b247f25eff 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -844,7 +844,7 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
 	struct gfs2_holder i_gh;
 	int error;
 
-	if (ip->i_vn == ip->i_gl->gl_vn)
+	if (!test_bit(GIF_INVALID, &ip->i_flags))
 		return generic_permission(inode, mask, gfs2_check_acl);
 
 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
-- 
cgit v1.2.2


From 294caaa3b8304c0a14c5039691caf23363bd9369 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 2 Nov 2006 11:59:28 -0500
Subject: [GFS2] Tidy up 0 initialisations in inode.c

We don't need to use endian conversions for 0 initialisations
when creating a new on-disk inode.

Cc: Christoph Hellwig <hch@infradead.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e467780837..faf9b9ed79 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -673,15 +673,15 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 	di->di_mode = cpu_to_be32(mode);
 	di->di_uid = cpu_to_be32(uid);
 	di->di_gid = cpu_to_be32(gid);
-	di->di_nlink = cpu_to_be32(0);
-	di->di_size = cpu_to_be64(0);
+	di->di_nlink = 0;
+	di->di_size = 0;
 	di->di_blocks = cpu_to_be64(1);
 	di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
 	di->di_major = cpu_to_be32(MAJOR(dev));
 	di->di_minor = cpu_to_be32(MINOR(dev));
 	di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
 	di->di_generation = cpu_to_be64(*generation);
-	di->di_flags = cpu_to_be32(0);
+	di->di_flags = 0;
 
 	if (S_ISREG(mode)) {
 		if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) ||
@@ -699,13 +699,13 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 
 	di->__pad1 = 0;
 	di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0);
-	di->di_height = cpu_to_be32(0);
+	di->di_height = 0;
 	di->__pad2 = 0;
 	di->__pad3 = 0;
-	di->di_depth = cpu_to_be16(0);
-	di->di_entries = cpu_to_be32(0);
+	di->di_depth = 0;
+	di->di_entries = 0;
 	memset(&di->__pad4, 0, sizeof(di->__pad4));
-	di->di_eattr = cpu_to_be64(0);
+	di->di_eattr = 0;
 	memset(&di->di_reserved, 0, sizeof(di->di_reserved));
 
 	brelse(dibh);
-- 
cgit v1.2.2


From f6e58f01e8dc869803b9f73b2aa9d5bc3f32ca05 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 7 Nov 2006 15:14:58 -0500
Subject: [GFS2] Don't copy meta_header for rgrp in and out

The meta_header for an ondisk rgrp never changes, so there is no point
copying it in and back out to disk. Also there is no reason to keep
a copy for each rgrp in memory.

The code already checks to ensure that the header is correct before
it calls the routine to copy the data in, so that we don't even need
to check whether its correct on disk in the functions in ondisk.c

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ondisk.c | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index b2baba5c50..f2495f1e21 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -65,15 +65,6 @@ static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *bu
 	mh->mh_format = be32_to_cpu(str->mh_format);
 }
 
-static void gfs2_meta_header_out(const struct gfs2_meta_header_host *mh, void *buf)
-{
-	struct gfs2_meta_header *str = buf;
-
-	str->mh_magic = cpu_to_be32(mh->mh_magic);
-	str->mh_type = cpu_to_be32(mh->mh_type);
-	str->mh_format = cpu_to_be32(mh->mh_format);
-}
-
 void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
 {
 	const struct gfs2_sb *str = buf;
@@ -119,7 +110,6 @@ void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
 {
 	const struct gfs2_rgrp *str = buf;
 
-	gfs2_meta_header_in(&rg->rg_header, buf);
 	rg->rg_flags = be32_to_cpu(str->rg_flags);
 	rg->rg_free = be32_to_cpu(str->rg_free);
 	rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
@@ -130,7 +120,6 @@ void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
 {
 	struct gfs2_rgrp *str = buf;
 
-	gfs2_meta_header_out(&rg->rg_header, buf);
 	str->rg_flags = cpu_to_be32(rg->rg_flags);
 	str->rg_free = cpu_to_be32(rg->rg_free);
 	str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
-- 
cgit v1.2.2


From c594d8866460a2710c436839d79f334a0714a2a7 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 8 Nov 2006 09:01:13 -0500
Subject: [GFS2] Remove unused GL_DUMP flag

There is no way to set the GL_DUMP flag, and in any case the
same thing can be done with systemtap if required for debugging,
so this removes it.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c | 3 ---
 fs/gfs2/glock.h | 1 -
 2 files changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 44633c4671..746347a404 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1244,9 +1244,6 @@ restart:
 
 	clear_bit(GLF_PREFETCH, &gl->gl_flags);
 
-	if (error == GLR_TRYFAILED && (gh->gh_flags & GL_DUMP))
-		dump_glock(gl);
-
 	return error;
 }
 
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 2b2a889ee2..b9856274df 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -28,7 +28,6 @@
 #define GL_NOCACHE		0x00000400
 #define GL_NOCANCEL		0x00001000
 #define GL_AOP			0x00004000
-#define GL_DUMP			0x00008000
 
 #define GLR_TRYFAILED		13
 #define GLR_CANCELED		14
-- 
cgit v1.2.2


From 2ca99501fa5422e84f18333918a503433449e2b5 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 8 Nov 2006 10:26:54 -0500
Subject: [GFS2] Fix page lock/glock deadlock

This fixes a race between the glock and the page lock encountered
during truncate in gfs2_readpage and gfs2_prepare_write. The gfs2_readpages
function doesn't need the same fix since it only uses a try lock anyway, so
it will fail back to gfs2_readpage in the case of a potential deadlock.

This bug was spotted by Russell Cattelan.

Cc: Russell Cattelan <cattelan@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.h       |  1 -
 fs/gfs2/ops_address.c | 13 +++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index b9856274df..a331bf8175 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -27,7 +27,6 @@
 #define GL_ATIME		0x00000200
 #define GL_NOCACHE		0x00000400
 #define GL_NOCANCEL		0x00001000
-#define GL_AOP			0x00004000
 
 #define GLR_TRYFAILED		13
 #define GLR_CANCELED		14
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 5c3962c80e..3822189454 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -230,7 +230,7 @@ static int gfs2_readpage(struct file *file, struct page *page)
 				/* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */
 				goto skip_lock;
 		}
-		gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|GL_AOP, &gh);
+		gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
 		do_unlock = 1;
 		error = gfs2_glock_nq_m_atime(1, &gh);
 		if (unlikely(error))
@@ -254,6 +254,8 @@ skip_lock:
 out:
 	return error;
 out_unlock:
+	if (error == GLR_TRYFAILED)
+		error = AOP_TRUNCATED_PAGE;
 	unlock_page(page);
 	if (do_unlock)
 		gfs2_holder_uninit(&gh);
@@ -293,7 +295,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
 				goto skip_lock;
 		}
 		gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
-				 LM_FLAG_TRY_1CB|GL_ATIME|GL_AOP, &gh);
+				 LM_FLAG_TRY_1CB|GL_ATIME, &gh);
 		do_unlock = 1;
 		ret = gfs2_glock_nq_m_atime(1, &gh);
 		if (ret == GLR_TRYFAILED)
@@ -366,10 +368,13 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
 	unsigned int write_len = to - from;
 
 
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|GL_AOP, &ip->i_gh);
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh);
 	error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
-	if (error)
+	if (unlikely(error)) {
+		if (error == GLR_TRYFAILED)
+			error = AOP_TRUNCATED_PAGE;
 		goto out_uninit;
+	}
 
 	gfs2_write_calc_reserv(ip, write_len, &data_blocks, &ind_blocks);
 
-- 
cgit v1.2.2


From 6b124d8dba1f46c5f2caf3b3159bbe627f75b9b6 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 8 Nov 2006 12:51:06 -0500
Subject: [GFS2] Only set inode flags when required

We were setting the inode flags from GFS2's flags far too often, even when they
couldn't possibly have changed. This patch reduces the amount of flag
setting going on so that we do it only when the inode is read in or
when the flags have changed. The create case is covered by the "when
the inode is read in" case.

This also fixes a bug where we didn't set S_SYNC correctly.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c    | 11 +----------
 fs/gfs2/ops_file.c | 19 +++++++++++++++++++
 fs/gfs2/ops_file.h |  2 +-
 3 files changed, 21 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index faf9b9ed79..56b39be769 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -54,16 +54,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 	i_size_write(inode, di->di_size);
 	inode->i_blocks = di->di_blocks <<
 		(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
-
-	if (di->di_flags & GFS2_DIF_IMMUTABLE)
-		inode->i_flags |= S_IMMUTABLE;
-	else
-		inode->i_flags &= ~S_IMMUTABLE;
-
-	if (di->di_flags & GFS2_DIF_APPENDONLY)
-		inode->i_flags |= S_APPEND;
-	else
-		inode->i_flags &= ~S_APPEND;
 }
 
 static int iget_test(struct inode *inode, void *opaque)
@@ -210,6 +200,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	di->di_generation = be64_to_cpu(str->di_generation);
 
 	di->di_flags = be32_to_cpu(str->di_flags);
+	gfs2_set_inode_flags(&ip->i_inode);
 	di->di_height = be16_to_cpu(str->di_height);
 
 	di->di_depth = be16_to_cpu(str->di_depth);
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index b52b9db1a2..eabf6c61a9 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -266,6 +266,24 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
 	return error;
 }
 
+void gfs2_set_inode_flags(struct inode *inode)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_dinode_host *di = &ip->i_di;
+	unsigned int flags = inode->i_flags;
+
+	flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+	if (di->di_flags & GFS2_DIF_IMMUTABLE)
+		flags |= S_IMMUTABLE;
+	if (di->di_flags & GFS2_DIF_APPENDONLY)
+		flags |= S_APPEND;
+	if (di->di_flags & GFS2_DIF_NOATIME)
+		flags |= S_NOATIME;
+	if (di->di_flags & GFS2_DIF_SYNC)
+		flags |= S_SYNC;
+	inode->i_flags = flags;
+}
+
 /* Flags that can be set by user space */
 #define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA|			\
 			     GFS2_DIF_DIRECTIO|			\
@@ -338,6 +356,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
 	ip->i_di.di_flags = new_flags;
 	gfs2_dinode_out(ip, bh->b_data);
 	brelse(bh);
+	gfs2_set_inode_flags(inode);
 out_trans_end:
 	gfs2_trans_end(sdp);
 out:
diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h
index ce319f89ec..7e5d8ec9c8 100644
--- a/fs/gfs2/ops_file.h
+++ b/fs/gfs2/ops_file.h
@@ -17,7 +17,7 @@ extern struct file gfs2_internal_file_sentinel;
 extern int gfs2_internal_read(struct gfs2_inode *ip,
 			      struct file_ra_state *ra_state,
 			      char *buf, loff_t *pos, unsigned size);
-
+extern void gfs2_set_inode_flags(struct inode *inode);
 extern const struct file_operations gfs2_file_fops;
 extern const struct file_operations gfs2_dir_fops;
 
-- 
cgit v1.2.2


From e7c698d74fc9e0e76b3086062b0519df3601ff52 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 8 Nov 2006 13:52:05 -0500
Subject: [GFS2] Inode number is constant

Since the inode number is constant, we don't need to keep updating
it everytime we refresh the other inode fields.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 56b39be769..19b2736cd5 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -50,7 +50,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip)
 	struct inode *inode = &ip->i_inode;
 	struct gfs2_dinode_host *di = &ip->i_di;
 
-	inode->i_ino = ip->i_num.no_addr;
 	i_size_write(inode, di->di_size);
 	inode->i_blocks = di->di_blocks <<
 		(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
@@ -73,6 +72,7 @@ static int iget_set(struct inode *inode, void *opaque)
 	struct gfs2_inum_host *inum = opaque;
 
 	ip->i_num = *inum;
+	inode->i_ino = inum->no_addr;
 	return 0;
 }
 
-- 
cgit v1.2.2


From 9e2dbdac3df300516ffdd9a8631f23164d068a50 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 8 Nov 2006 15:45:46 -0500
Subject: [GFS2] Remove gfs2_inode_attr_in

This function wasn't really doing the right thing. There was no need
to update the inode size at this point and the updating of the
i_blocks field has now been moved to the places where di_blocks is
updated. A result of this patch and some those preceeding it is that
unlocking a glock is now a much more efficient process, since there
is no longer any requirement to copy data from the gfs2 inode into
the vfs inode at this point.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/bmap.c  |  4 ++++
 fs/gfs2/dir.c   |  4 ++++
 fs/gfs2/eattr.c |  6 ++++++
 fs/gfs2/glops.c |  8 ++------
 fs/gfs2/inode.c | 21 +++------------------
 fs/gfs2/inode.h |  7 +++++++
 6 files changed, 26 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 692d4a3da1..06e3447ea1 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -163,6 +163,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
 	if (ip->i_di.di_size) {
 		*(__be64 *)(di + 1) = cpu_to_be64(block);
 		ip->i_di.di_blocks++;
+		gfs2_set_inode_blocks(&ip->i_inode);
 		di->di_blocks = cpu_to_be64(ip->i_di.di_blocks);
 	}
 
@@ -272,6 +273,7 @@ static int build_height(struct inode *inode, unsigned height)
 	*(__be64 *)(di + 1) = cpu_to_be64(bn);
 	ip->i_di.di_height += new_height;
 	ip->i_di.di_blocks += new_height;
+	gfs2_set_inode_blocks(&ip->i_inode);
 	di->di_height = cpu_to_be16(ip->i_di.di_height);
 	di->di_blocks = cpu_to_be64(ip->i_di.di_blocks);
 	brelse(dibh);
@@ -415,6 +417,7 @@ static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
 
 	*ptr = cpu_to_be64(*block);
 	ip->i_di.di_blocks++;
+	gfs2_set_inode_blocks(&ip->i_inode);
 
 	*new = 1;
 	return 0;
@@ -770,6 +773,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
 		if (!ip->i_di.di_blocks)
 			gfs2_consist_inode(ip);
 		ip->i_di.di_blocks--;
+		gfs2_set_inode_blocks(&ip->i_inode);
 	}
 	if (bstart) {
 		if (metadata)
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index c82d7cb4a6..a2923fb91b 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -901,6 +901,7 @@ static int dir_make_exhash(struct inode *inode)
 
 	dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
 	dip->i_di.di_blocks++;
+	gfs2_set_inode_blocks(&dip->i_inode);
 	dip->i_di.di_flags |= GFS2_DIF_EXHASH;
 
 	for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
@@ -1038,6 +1039,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
 	error = gfs2_meta_inode_buffer(dip, &dibh);
 	if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
 		dip->i_di.di_blocks++;
+		gfs2_set_inode_blocks(&dip->i_inode);
 		gfs2_dinode_out(dip, dibh->b_data);
 		brelse(dibh);
 	}
@@ -1516,6 +1518,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
 		return error;
 	gfs2_trans_add_bh(ip->i_gl, bh, 1);
 	ip->i_di.di_blocks++;
+	gfs2_set_inode_blocks(&ip->i_inode);
 	gfs2_dinode_out(ip, bh->b_data);
 	brelse(bh);
 	return 0;
@@ -1860,6 +1863,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
 		if (!dip->i_di.di_blocks)
 			gfs2_consist_inode(dip);
 		dip->i_di.di_blocks--;
+		gfs2_set_inode_blocks(&dip->i_inode);
 	}
 
 	error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size);
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 7dde84775b..ebebbdcd70 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -281,6 +281,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 		if (!ip->i_di.di_blocks)
 			gfs2_consist_inode(ip);
 		ip->i_di.di_blocks--;
+		gfs2_set_inode_blocks(&ip->i_inode);
 	}
 	if (bstart)
 		gfs2_free_meta(ip, bstart, blen);
@@ -598,6 +599,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
 	ea->ea_num_ptrs = 0;
 
 	ip->i_di.di_blocks++;
+	gfs2_set_inode_blocks(&ip->i_inode);
 
 	return 0;
 }
@@ -649,6 +651,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
 			gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
 
 			ip->i_di.di_blocks++;
+			gfs2_set_inode_blocks(&ip->i_inode);
 
 			copy = data_len > sdp->sd_jbsize ? sdp->sd_jbsize :
 							   data_len;
@@ -977,6 +980,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
 		ip->i_di.di_eattr = blk;
 		ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT;
 		ip->i_di.di_blocks++;
+		gfs2_set_inode_blocks(&ip->i_inode);
 
 		eablk++;
 	}
@@ -1387,6 +1391,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
 		if (!ip->i_di.di_blocks)
 			gfs2_consist_inode(ip);
 		ip->i_di.di_blocks--;
+		gfs2_set_inode_blocks(&ip->i_inode);
 	}
 	if (bstart)
 		gfs2_free_meta(ip, bstart, blen);
@@ -1441,6 +1446,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
 	if (!ip->i_di.di_blocks)
 		gfs2_consist_inode(ip);
 	ip->i_di.di_blocks--;
+	gfs2_set_inode_blocks(&ip->i_inode);
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (!error) {
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 9c2033714b..b92de0af0b 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -356,7 +356,6 @@ static int inode_go_lock(struct gfs2_holder *gh)
 		error = gfs2_inode_refresh(ip);
 		if (error)
 			return error;
-		gfs2_inode_attr_in(ip);
 	}
 
 	if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
@@ -380,11 +379,8 @@ static void inode_go_unlock(struct gfs2_holder *gh)
 	struct gfs2_glock *gl = gh->gh_gl;
 	struct gfs2_inode *ip = gl->gl_object;
 
-	if (ip == NULL)
-		return;
-	if (test_bit(GLF_DIRTY, &gl->gl_flags))
-		gfs2_inode_attr_in(ip);
-	gfs2_meta_cache_flush(ip);
+	if (ip)
+		gfs2_meta_cache_flush(ip);
 }
 
 /**
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 19b2736cd5..ea9ca239c8 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -38,29 +38,12 @@
 #include "trans.h"
 #include "util.h"
 
-/**
- * gfs2_inode_attr_in - Copy attributes from the dinode into the VFS inode
- * @ip: The GFS2 inode (with embedded disk inode data)
- * @inode:  The Linux VFS inode
- *
- */
-
-void gfs2_inode_attr_in(struct gfs2_inode *ip)
-{
-	struct inode *inode = &ip->i_inode;
-	struct gfs2_dinode_host *di = &ip->i_di;
-
-	i_size_write(inode, di->di_size);
-	inode->i_blocks = di->di_blocks <<
-		(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
-}
-
 static int iget_test(struct inode *inode, void *opaque)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_inum_host *inum = opaque;
 
-	if (ip && ip->i_num.no_addr == inum->no_addr)
+	if (ip->i_num.no_addr == inum->no_addr)
 		return 1;
 
 	return 0;
@@ -187,7 +170,9 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	 */
 	ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
 	di->di_size = be64_to_cpu(str->di_size);
+	i_size_write(&ip->i_inode, di->di_size);
 	di->di_blocks = be64_to_cpu(str->di_blocks);
+	gfs2_set_inode_blocks(&ip->i_inode);
 	ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
 	ip->i_inode.i_atime.tv_nsec = 0;
 	ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 54d584eddf..46917edfdd 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -25,6 +25,13 @@ static inline int gfs2_is_dir(struct gfs2_inode *ip)
 	return S_ISDIR(ip->i_inode.i_mode);
 }
 
+static inline void gfs2_set_inode_blocks(struct inode *inode)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	inode->i_blocks = ip->i_di.di_blocks <<
+		(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
+}
+
 void gfs2_inode_attr_in(struct gfs2_inode *ip);
 struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type);
 struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum);
-- 
cgit v1.2.2


From 7020933156ac2a8a7386314933e49948bf0438f7 Mon Sep 17 00:00:00 2001
From: Russell Cattelan <cattelan@redhat.com>
Date: Thu, 9 Nov 2006 11:28:08 -0500
Subject: [GFS2] Fix race in logging code

The log lock is dropped prior to io submittion, but
this exposes a hole in which the log data structures
may be going away due to a truncate.
Store the buffer head in a local pointer prior to
dropping the lock and relay on the buffer_head lock
for consitency on the buffer head.

Signed-Off-By: Russell Cattelan <cattelan@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/lops.c | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 8a654cd253..4d7f94d8c7 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -509,7 +509,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 {
 	LIST_HEAD(started);
 	struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
-	struct buffer_head *bh = NULL;
+	struct buffer_head *bh = NULL,*bh1 = NULL;
 	unsigned int offset = sizeof(struct gfs2_log_descriptor);
 	struct gfs2_log_descriptor *ld;
 	unsigned int limit;
@@ -537,8 +537,13 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 		list_for_each_entry_safe_continue(bd1, bdt,
 						  &sdp->sd_log_le_databuf,
 						  bd_le.le_list) {
+			/* store off the buffer head in a local ptr since
+			 * gfs2_bufdata might change when we drop the log lock
+			 */
+			bh1 = bd1->bd_bh;
+
 			/* An ordered write buffer */
-			if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) {
+			if (bh1 && !buffer_pinned(bh1)) {
 				list_move(&bd1->bd_le.le_list, &started);
 				if (bd1 == bd2) {
 					bd2 = NULL;
@@ -547,20 +552,21 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 							bd_le.le_list);
 				}
 				total_dbuf--;
-				if (bd1->bd_bh) {
-					get_bh(bd1->bd_bh);
-					if (buffer_dirty(bd1->bd_bh)) {
+				if (bh1) {
+					if (buffer_dirty(bh1)) {
+						get_bh(bh1);
+
 						gfs2_log_unlock(sdp);
-						wait_on_buffer(bd1->bd_bh);
-						ll_rw_block(WRITE, 1,
-							    &bd1->bd_bh);
+
+						ll_rw_block(SWRITE, 1, &bh1);
+						brelse(bh1);
+
 						gfs2_log_lock(sdp);
 					}
-					brelse(bd1->bd_bh);
 					continue;
 				}
 				continue;
-			} else if (bd1->bd_bh) { /* A journaled buffer */
+			} else if (bh1) { /* A journaled buffer */
 				int magic;
 				gfs2_log_unlock(sdp);
 				if (!bh) {
@@ -582,16 +588,16 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 					ld->ld_data2 = cpu_to_be32(0);
 					memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
 				}
-				magic = gfs2_check_magic(bd1->bd_bh);
-				*ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
+				magic = gfs2_check_magic(bh1);
+				*ptr++ = cpu_to_be64(bh1->b_blocknr);
 				*ptr++ = cpu_to_be64((__u64)magic);
-				clear_buffer_escaped(bd1->bd_bh);
+				clear_buffer_escaped(bh1);
 				if (unlikely(magic != 0))
-					set_buffer_escaped(bd1->bd_bh);
+					set_buffer_escaped(bh1);
 				gfs2_log_lock(sdp);
 				if (n++ > num)
 					break;
-			} else if (!bd1->bd_bh) {
+			} else if (!bh1) {
 				total_dbuf--;
 				sdp->sd_log_num_databuf--;
 				list_del_init(&bd1->bd_le.le_list);
-- 
cgit v1.2.2


From 61057c6bb3a3d14cf2bea6ca20dc6d367e1d852e Mon Sep 17 00:00:00 2001
From: Russell Cattelan <cattelan@redhat.com>
Date: Thu, 9 Nov 2006 11:42:33 -0500
Subject: [GFS2] Remove unused zero_readpage from stuffed_readpage

Stuffed files only consist of a maximum of
(gfs2 block size - sizeof(struct gfs2_dinode)) bytes. Since the
gfs2 block size is always less than page size, we will never see
a call to stuffed_readpage for anything other than the first page
in the file.

Signed-off-by: Russell Cattelan <cattelan@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_address.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 3822189454..2f7ef980d0 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -156,19 +156,6 @@ out_ignore:
 	return 0;
 }
 
-static int zero_readpage(struct page *page)
-{
-	void *kaddr;
-
-	kaddr = kmap_atomic(page, KM_USER0);
-	memset(kaddr, 0, PAGE_CACHE_SIZE);
-	kunmap_atomic(kaddr, KM_USER0);
-
-	SetPageUptodate(page);
-
-	return 0;
-}
-
 /**
  * stuffed_readpage - Fill in a Linux page with stuffed file data
  * @ip: the inode
@@ -183,9 +170,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 	void *kaddr;
 	int error;
 
-	/* Only the first page of a stuffed file might contain data */
-	if (unlikely(page->index))
-		return zero_readpage(page);
+	BUG_ON(page->index);
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (error)
@@ -737,6 +722,9 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
 			if (!atomic_read(&aspace->i_writecount))
 				return 0;
 
+			if (!(gfp_mask & __GFP_WAIT))
+				return 0;
+
 			if (time_after_eq(jiffies, t)) {
 				stuck_releasepage(bh);
 				/* should we withdraw here? */
-- 
cgit v1.2.2


From fdda387f73947e6ae511ec601f5b3c6fbb582aac Mon Sep 17 00:00:00 2001
From: Patrick Caulfield <pcaulfie@redhat.com>
Date: Thu, 2 Nov 2006 11:19:21 -0500
Subject: [DLM] Add support for tcp communications

The following patch adds a TCP based communications layer
to the DLM which is compile time selectable. The existing SCTP
layer gives the advantage of allowing multihoming, whereas
the TCP layer has been heavily tested in previous versions of
the DLM and is known to be robust and therefore can be used as
a baseline for performance testing.

Signed-off-by: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/Kconfig         |   17 +
 fs/dlm/Makefile        |    4 +-
 fs/dlm/lowcomms-sctp.c | 1239 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/dlm/lowcomms-tcp.c  | 1263 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/dlm/lowcomms.c      | 1239 -----------------------------------------------
 5 files changed, 2522 insertions(+), 1240 deletions(-)
 create mode 100644 fs/dlm/lowcomms-sctp.c
 create mode 100644 fs/dlm/lowcomms-tcp.c
 delete mode 100644 fs/dlm/lowcomms.c

(limited to 'fs')

diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index 81b2c6465e..c5985b883b 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -9,6 +9,23 @@ config DLM
 	A general purpose distributed lock manager for kernel or userspace
 	applications.
 
+choice
+	prompt "Select DLM communications protocol"
+	depends on DLM
+	default DLM_TCP
+	help
+	The DLM Can use TCP or SCTP for it's network communications.
+	SCTP supports multi-homed operations whereas TCP doesn't.
+	However, SCTP seems to have stability problems at the moment.
+
+config DLM_TCP
+	bool "TCP/IP"
+
+config DLM_SCTP
+	bool "SCTP"
+
+endchoice
+
 config DLM_DEBUG
 	bool "DLM debugging"
 	depends on DLM
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile
index 1832e0297f..65388944eb 100644
--- a/fs/dlm/Makefile
+++ b/fs/dlm/Makefile
@@ -4,7 +4,6 @@ dlm-y :=			ast.o \
 				dir.o \
 				lock.o \
 				lockspace.o \
-				lowcomms.o \
 				main.o \
 				member.o \
 				memory.o \
@@ -17,3 +16,6 @@ dlm-y :=			ast.o \
 				util.o
 dlm-$(CONFIG_DLM_DEBUG) +=	debug_fs.o
 
+dlm-$(CONFIG_DLM_TCP)   += lowcomms-tcp.o
+
+dlm-$(CONFIG_DLM_SCTP)  += lowcomms-sctp.o
\ No newline at end of file
diff --git a/fs/dlm/lowcomms-sctp.c b/fs/dlm/lowcomms-sctp.c
new file mode 100644
index 0000000000..6da6b14d5a
--- /dev/null
+++ b/fs/dlm/lowcomms-sctp.c
@@ -0,0 +1,1239 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+/*
+ * lowcomms.c
+ *
+ * This is the "low-level" comms layer.
+ *
+ * It is responsible for sending/receiving messages
+ * from other nodes in the cluster.
+ *
+ * Cluster nodes are referred to by their nodeids. nodeids are
+ * simply 32 bit numbers to the locking module - if they need to
+ * be expanded for the cluster infrastructure then that is it's
+ * responsibility. It is this layer's
+ * responsibility to resolve these into IP address or
+ * whatever it needs for inter-node communication.
+ *
+ * The comms level is two kernel threads that deal mainly with
+ * the receiving of messages from other nodes and passing them
+ * up to the mid-level comms layer (which understands the
+ * message format) for execution by the locking core, and
+ * a send thread which does all the setting up of connections
+ * to remote nodes and the sending of data. Threads are not allowed
+ * to send their own data because it may cause them to wait in times
+ * of high load. Also, this way, the sending thread can collect together
+ * messages bound for one node and send them in one block.
+ *
+ * I don't see any problem with the recv thread executing the locking
+ * code on behalf of remote processes as the locking code is
+ * short, efficient and never (well, hardly ever) waits.
+ *
+ */
+
+#include <asm/ioctls.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/sctp/user.h>
+#include <linux/pagemap.h>
+#include <linux/socket.h>
+#include <linux/idr.h>
+
+#include "dlm_internal.h"
+#include "lowcomms.h"
+#include "config.h"
+#include "midcomms.h"
+
+static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
+static int			dlm_local_count;
+static int			dlm_local_nodeid;
+
+/* One of these per connected node */
+
+#define NI_INIT_PENDING 1
+#define NI_WRITE_PENDING 2
+
+struct nodeinfo {
+	spinlock_t		lock;
+	sctp_assoc_t		assoc_id;
+	unsigned long		flags;
+	struct list_head	write_list; /* nodes with pending writes */
+	struct list_head	writequeue; /* outgoing writequeue_entries */
+	spinlock_t		writequeue_lock;
+	int			nodeid;
+};
+
+static DEFINE_IDR(nodeinfo_idr);
+static struct rw_semaphore	nodeinfo_lock;
+static int			max_nodeid;
+
+struct cbuf {
+	unsigned		base;
+	unsigned		len;
+	unsigned		mask;
+};
+
+/* Just the one of these, now. But this struct keeps
+   the connection-specific variables together */
+
+#define CF_READ_PENDING 1
+
+struct connection {
+	struct socket          *sock;
+	unsigned long		flags;
+	struct page            *rx_page;
+	atomic_t		waiting_requests;
+	struct cbuf		cb;
+	int                     eagain_flag;
+};
+
+/* An entry waiting to be sent */
+
+struct writequeue_entry {
+	struct list_head	list;
+	struct page            *page;
+	int			offset;
+	int			len;
+	int			end;
+	int			users;
+	struct nodeinfo        *ni;
+};
+
+#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0)
+#define CBUF_EMPTY(cb) ((cb)->len == 0)
+#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1))
+#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask)
+
+#define CBUF_INIT(cb, size) \
+do { \
+	(cb)->base = (cb)->len = 0; \
+	(cb)->mask = ((size)-1); \
+} while(0)
+
+#define CBUF_EAT(cb, n) \
+do { \
+	(cb)->len  -= (n); \
+	(cb)->base += (n); \
+	(cb)->base &= (cb)->mask; \
+} while(0)
+
+
+/* List of nodes which have writes pending */
+static struct list_head write_nodes;
+static spinlock_t write_nodes_lock;
+
+/* Maximum number of incoming messages to process before
+ * doing a schedule()
+ */
+#define MAX_RX_MSG_COUNT 25
+
+/* Manage daemons */
+static struct task_struct *recv_task;
+static struct task_struct *send_task;
+static wait_queue_head_t lowcomms_recv_wait;
+static atomic_t accepting;
+
+/* The SCTP connection */
+static struct connection sctp_con;
+
+
+static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
+{
+	struct sockaddr_storage addr;
+	int error;
+
+	if (!dlm_local_count)
+		return -1;
+
+	error = dlm_nodeid_to_addr(nodeid, &addr);
+	if (error)
+		return error;
+
+	if (dlm_local_addr[0]->ss_family == AF_INET) {
+	        struct sockaddr_in *in4  = (struct sockaddr_in *) &addr;
+		struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr;
+		ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
+	} else {
+	        struct sockaddr_in6 *in6  = (struct sockaddr_in6 *) &addr;
+		struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
+		memcpy(&ret6->sin6_addr, &in6->sin6_addr,
+		       sizeof(in6->sin6_addr));
+	}
+
+	return 0;
+}
+
+static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
+{
+	struct nodeinfo *ni;
+	int r;
+	int n;
+
+	down_read(&nodeinfo_lock);
+	ni = idr_find(&nodeinfo_idr, nodeid);
+	up_read(&nodeinfo_lock);
+
+	if (!ni && alloc) {
+		down_write(&nodeinfo_lock);
+
+		ni = idr_find(&nodeinfo_idr, nodeid);
+		if (ni)
+			goto out_up;
+
+		r = idr_pre_get(&nodeinfo_idr, alloc);
+		if (!r)
+			goto out_up;
+
+		ni = kmalloc(sizeof(struct nodeinfo), alloc);
+		if (!ni)
+			goto out_up;
+
+		r = idr_get_new_above(&nodeinfo_idr, ni, nodeid, &n);
+		if (r) {
+			kfree(ni);
+			ni = NULL;
+			goto out_up;
+		}
+		if (n != nodeid) {
+			idr_remove(&nodeinfo_idr, n);
+			kfree(ni);
+			ni = NULL;
+			goto out_up;
+		}
+		memset(ni, 0, sizeof(struct nodeinfo));
+		spin_lock_init(&ni->lock);
+		INIT_LIST_HEAD(&ni->writequeue);
+		spin_lock_init(&ni->writequeue_lock);
+		ni->nodeid = nodeid;
+
+		if (nodeid > max_nodeid)
+			max_nodeid = nodeid;
+	out_up:
+		up_write(&nodeinfo_lock);
+	}
+
+	return ni;
+}
+
+/* Don't call this too often... */
+static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc)
+{
+	int i;
+	struct nodeinfo *ni;
+
+	for (i=1; i<=max_nodeid; i++) {
+		ni = nodeid2nodeinfo(i, 0);
+		if (ni && ni->assoc_id == assoc)
+			return ni;
+	}
+	return NULL;
+}
+
+/* Data or notification available on socket */
+static void lowcomms_data_ready(struct sock *sk, int count_unused)
+{
+	atomic_inc(&sctp_con.waiting_requests);
+	if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags))
+		return;
+
+	wake_up_interruptible(&lowcomms_recv_wait);
+}
+
+
+/* Add the port number to an IP6 or 4 sockaddr and return the address length.
+   Also padd out the struct with zeros to make comparisons meaningful */
+
+static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
+			  int *addr_len)
+{
+	struct sockaddr_in *local4_addr;
+	struct sockaddr_in6 *local6_addr;
+
+	if (!dlm_local_count)
+		return;
+
+	if (!port) {
+		if (dlm_local_addr[0]->ss_family == AF_INET) {
+			local4_addr = (struct sockaddr_in *)dlm_local_addr[0];
+			port = be16_to_cpu(local4_addr->sin_port);
+		} else {
+			local6_addr = (struct sockaddr_in6 *)dlm_local_addr[0];
+			port = be16_to_cpu(local6_addr->sin6_port);
+		}
+	}
+
+	saddr->ss_family = dlm_local_addr[0]->ss_family;
+	if (dlm_local_addr[0]->ss_family == AF_INET) {
+		struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
+		in4_addr->sin_port = cpu_to_be16(port);
+		memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero));
+		memset(in4_addr+1, 0, sizeof(struct sockaddr_storage) -
+				      sizeof(struct sockaddr_in));
+		*addr_len = sizeof(struct sockaddr_in);
+	} else {
+		struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
+		in6_addr->sin6_port = cpu_to_be16(port);
+		memset(in6_addr+1, 0, sizeof(struct sockaddr_storage) -
+				      sizeof(struct sockaddr_in6));
+		*addr_len = sizeof(struct sockaddr_in6);
+	}
+}
+
+/* Close the connection and tidy up */
+static void close_connection(void)
+{
+	if (sctp_con.sock) {
+		sock_release(sctp_con.sock);
+		sctp_con.sock = NULL;
+	}
+
+	if (sctp_con.rx_page) {
+		__free_page(sctp_con.rx_page);
+		sctp_con.rx_page = NULL;
+	}
+}
+
+/* We only send shutdown messages to nodes that are not part of the cluster */
+static void send_shutdown(sctp_assoc_t associd)
+{
+	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
+	struct msghdr outmessage;
+	struct cmsghdr *cmsg;
+	struct sctp_sndrcvinfo *sinfo;
+	int ret;
+
+	outmessage.msg_name = NULL;
+	outmessage.msg_namelen = 0;
+	outmessage.msg_control = outcmsg;
+	outmessage.msg_controllen = sizeof(outcmsg);
+	outmessage.msg_flags = MSG_EOR;
+
+	cmsg = CMSG_FIRSTHDR(&outmessage);
+	cmsg->cmsg_level = IPPROTO_SCTP;
+	cmsg->cmsg_type = SCTP_SNDRCV;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
+	outmessage.msg_controllen = cmsg->cmsg_len;
+	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
+
+	sinfo->sinfo_flags |= MSG_EOF;
+	sinfo->sinfo_assoc_id = associd;
+
+	ret = kernel_sendmsg(sctp_con.sock, &outmessage, NULL, 0, 0);
+
+	if (ret != 0)
+		log_print("send EOF to node failed: %d", ret);
+}
+
+
+/* INIT failed but we don't know which node...
+   restart INIT on all pending nodes */
+static void init_failed(void)
+{
+	int i;
+	struct nodeinfo *ni;
+
+	for (i=1; i<=max_nodeid; i++) {
+		ni = nodeid2nodeinfo(i, 0);
+		if (!ni)
+			continue;
+
+		if (test_and_clear_bit(NI_INIT_PENDING, &ni->flags)) {
+			ni->assoc_id = 0;
+			if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
+				spin_lock_bh(&write_nodes_lock);
+				list_add_tail(&ni->write_list, &write_nodes);
+				spin_unlock_bh(&write_nodes_lock);
+			}
+		}
+	}
+	wake_up_process(send_task);
+}
+
+/* Something happened to an association */
+static void process_sctp_notification(struct msghdr *msg, char *buf)
+{
+	union sctp_notification *sn = (union sctp_notification *)buf;
+
+	if (sn->sn_header.sn_type == SCTP_ASSOC_CHANGE) {
+		switch (sn->sn_assoc_change.sac_state) {
+
+		case SCTP_COMM_UP:
+		case SCTP_RESTART:
+		{
+			/* Check that the new node is in the lockspace */
+			struct sctp_prim prim;
+			mm_segment_t fs;
+			int nodeid;
+			int prim_len, ret;
+			int addr_len;
+			struct nodeinfo *ni;
+
+			/* This seems to happen when we received a connection
+			 * too early... or something...  anyway, it happens but
+			 * we always seem to get a real message too, see
+			 * receive_from_sock */
+
+			if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) {
+				log_print("COMM_UP for invalid assoc ID %d",
+					 (int)sn->sn_assoc_change.sac_assoc_id);
+				init_failed();
+				return;
+			}
+			memset(&prim, 0, sizeof(struct sctp_prim));
+			prim_len = sizeof(struct sctp_prim);
+			prim.ssp_assoc_id = sn->sn_assoc_change.sac_assoc_id;
+
+			fs = get_fs();
+			set_fs(get_ds());
+			ret = sctp_con.sock->ops->getsockopt(sctp_con.sock,
+						IPPROTO_SCTP, SCTP_PRIMARY_ADDR,
+						(char*)&prim, &prim_len);
+			set_fs(fs);
+			if (ret < 0) {
+				struct nodeinfo *ni;
+
+				log_print("getsockopt/sctp_primary_addr on "
+					  "new assoc %d failed : %d",
+				    (int)sn->sn_assoc_change.sac_assoc_id, ret);
+
+				/* Retry INIT later */
+				ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id);
+				if (ni)
+					clear_bit(NI_INIT_PENDING, &ni->flags);
+				return;
+			}
+			make_sockaddr(&prim.ssp_addr, 0, &addr_len);
+			if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
+				log_print("reject connect from unknown addr");
+				send_shutdown(prim.ssp_assoc_id);
+				return;
+			}
+
+			ni = nodeid2nodeinfo(nodeid, GFP_KERNEL);
+			if (!ni)
+				return;
+
+			/* Save the assoc ID */
+			spin_lock(&ni->lock);
+			ni->assoc_id = sn->sn_assoc_change.sac_assoc_id;
+			spin_unlock(&ni->lock);
+
+			log_print("got new/restarted association %d nodeid %d",
+			       (int)sn->sn_assoc_change.sac_assoc_id, nodeid);
+
+			/* Send any pending writes */
+			clear_bit(NI_INIT_PENDING, &ni->flags);
+			if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
+				spin_lock_bh(&write_nodes_lock);
+				list_add_tail(&ni->write_list, &write_nodes);
+				spin_unlock_bh(&write_nodes_lock);
+			}
+			wake_up_process(send_task);
+		}
+		break;
+
+		case SCTP_COMM_LOST:
+		case SCTP_SHUTDOWN_COMP:
+		{
+			struct nodeinfo *ni;
+
+			ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id);
+			if (ni) {
+				spin_lock(&ni->lock);
+				ni->assoc_id = 0;
+				spin_unlock(&ni->lock);
+			}
+		}
+		break;
+
+		/* We don't know which INIT failed, so clear the PENDING flags
+		 * on them all.  if assoc_id is zero then it will then try
+		 * again */
+
+		case SCTP_CANT_STR_ASSOC:
+		{
+			log_print("Can't start SCTP association - retrying");
+			init_failed();
+		}
+		break;
+
+		default:
+			log_print("unexpected SCTP assoc change id=%d state=%d",
+				  (int)sn->sn_assoc_change.sac_assoc_id,
+				  sn->sn_assoc_change.sac_state);
+		}
+	}
+}
+
+/* Data received from remote end */
+static int receive_from_sock(void)
+{
+	int ret = 0;
+	struct msghdr msg;
+	struct kvec iov[2];
+	unsigned len;
+	int r;
+	struct sctp_sndrcvinfo *sinfo;
+	struct cmsghdr *cmsg;
+	struct nodeinfo *ni;
+
+	/* These two are marginally too big for stack allocation, but this
+	 * function is (currently) only called by dlm_recvd so static should be
+	 * OK.
+	 */
+	static struct sockaddr_storage msgname;
+	static char incmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
+
+	if (sctp_con.sock == NULL)
+		goto out;
+
+	if (sctp_con.rx_page == NULL) {
+		/*
+		 * This doesn't need to be atomic, but I think it should
+		 * improve performance if it is.
+		 */
+		sctp_con.rx_page = alloc_page(GFP_ATOMIC);
+		if (sctp_con.rx_page == NULL)
+			goto out_resched;
+		CBUF_INIT(&sctp_con.cb, PAGE_CACHE_SIZE);
+	}
+
+	memset(&incmsg, 0, sizeof(incmsg));
+	memset(&msgname, 0, sizeof(msgname));
+
+	memset(incmsg, 0, sizeof(incmsg));
+	msg.msg_name = &msgname;
+	msg.msg_namelen = sizeof(msgname);
+	msg.msg_flags = 0;
+	msg.msg_control = incmsg;
+	msg.msg_controllen = sizeof(incmsg);
+	msg.msg_iovlen = 1;
+
+	/* I don't see why this circular buffer stuff is necessary for SCTP
+	 * which is a packet-based protocol, but the whole thing breaks under
+	 * load without it! The overhead is minimal (and is in the TCP lowcomms
+	 * anyway, of course) so I'll leave it in until I can figure out what's
+	 * really happening.
+	 */
+
+	/*
+	 * iov[0] is the bit of the circular buffer between the current end
+	 * point (cb.base + cb.len) and the end of the buffer.
+	 */
+	iov[0].iov_len = sctp_con.cb.base - CBUF_DATA(&sctp_con.cb);
+	iov[0].iov_base = page_address(sctp_con.rx_page) +
+			  CBUF_DATA(&sctp_con.cb);
+	iov[1].iov_len = 0;
+
+	/*
+	 * iov[1] is the bit of the circular buffer between the start of the
+	 * buffer and the start of the currently used section (cb.base)
+	 */
+	if (CBUF_DATA(&sctp_con.cb) >= sctp_con.cb.base) {
+		iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&sctp_con.cb);
+		iov[1].iov_len = sctp_con.cb.base;
+		iov[1].iov_base = page_address(sctp_con.rx_page);
+		msg.msg_iovlen = 2;
+	}
+	len = iov[0].iov_len + iov[1].iov_len;
+
+	r = ret = kernel_recvmsg(sctp_con.sock, &msg, iov, msg.msg_iovlen, len,
+				 MSG_NOSIGNAL | MSG_DONTWAIT);
+	if (ret <= 0)
+		goto out_close;
+
+	msg.msg_control = incmsg;
+	msg.msg_controllen = sizeof(incmsg);
+	cmsg = CMSG_FIRSTHDR(&msg);
+	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+
+	if (msg.msg_flags & MSG_NOTIFICATION) {
+		process_sctp_notification(&msg, page_address(sctp_con.rx_page));
+		return 0;
+	}
+
+	/* Is this a new association ? */
+	ni = nodeid2nodeinfo(le32_to_cpu(sinfo->sinfo_ppid), GFP_KERNEL);
+	if (ni) {
+		ni->assoc_id = sinfo->sinfo_assoc_id;
+		if (test_and_clear_bit(NI_INIT_PENDING, &ni->flags)) {
+
+			if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
+				spin_lock_bh(&write_nodes_lock);
+				list_add_tail(&ni->write_list, &write_nodes);
+				spin_unlock_bh(&write_nodes_lock);
+			}
+			wake_up_process(send_task);
+		}
+	}
+
+	/* INIT sends a message with length of 1 - ignore it */
+	if (r == 1)
+		return 0;
+
+	CBUF_ADD(&sctp_con.cb, ret);
+	ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid),
+					  page_address(sctp_con.rx_page),
+					  sctp_con.cb.base, sctp_con.cb.len,
+					  PAGE_CACHE_SIZE);
+	if (ret < 0)
+		goto out_close;
+	CBUF_EAT(&sctp_con.cb, ret);
+
+      out:
+	ret = 0;
+	goto out_ret;
+
+      out_resched:
+	lowcomms_data_ready(sctp_con.sock->sk, 0);
+	ret = 0;
+	schedule();
+	goto out_ret;
+
+      out_close:
+	if (ret != -EAGAIN)
+		log_print("error reading from sctp socket: %d", ret);
+      out_ret:
+	return ret;
+}
+
+/* Bind to an IP address. SCTP allows multiple address so it can do multi-homing */
+static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num)
+{
+	mm_segment_t fs;
+	int result = 0;
+
+	fs = get_fs();
+	set_fs(get_ds());
+	if (num == 1)
+		result = sctp_con.sock->ops->bind(sctp_con.sock,
+					(struct sockaddr *) addr, addr_len);
+	else
+		result = sctp_con.sock->ops->setsockopt(sctp_con.sock, SOL_SCTP,
+				SCTP_SOCKOPT_BINDX_ADD, (char *)addr, addr_len);
+	set_fs(fs);
+
+	if (result < 0)
+		log_print("Can't bind to port %d addr number %d",
+			  dlm_config.tcp_port, num);
+
+	return result;
+}
+
+static void init_local(void)
+{
+	struct sockaddr_storage sas, *addr;
+	int i;
+
+	dlm_local_nodeid = dlm_our_nodeid();
+
+	for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) {
+		if (dlm_our_addr(&sas, i))
+			break;
+
+		addr = kmalloc(sizeof(*addr), GFP_KERNEL);
+		if (!addr)
+			break;
+		memcpy(addr, &sas, sizeof(*addr));
+		dlm_local_addr[dlm_local_count++] = addr;
+	}
+}
+
+/* Initialise SCTP socket and bind to all interfaces */
+static int init_sock(void)
+{
+	mm_segment_t fs;
+	struct socket *sock = NULL;
+	struct sockaddr_storage localaddr;
+	struct sctp_event_subscribe subscribe;
+	int result = -EINVAL, num = 1, i, addr_len;
+
+	if (!dlm_local_count) {
+		init_local();
+		if (!dlm_local_count) {
+			log_print("no local IP address has been set");
+			goto out;
+		}
+	}
+
+	result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_SEQPACKET,
+				  IPPROTO_SCTP, &sock);
+	if (result < 0) {
+		log_print("Can't create comms socket, check SCTP is loaded");
+		goto out;
+	}
+
+	/* Listen for events */
+	memset(&subscribe, 0, sizeof(subscribe));
+	subscribe.sctp_data_io_event = 1;
+	subscribe.sctp_association_event = 1;
+	subscribe.sctp_send_failure_event = 1;
+	subscribe.sctp_shutdown_event = 1;
+	subscribe.sctp_partial_delivery_event = 1;
+
+	fs = get_fs();
+	set_fs(get_ds());
+	result = sock->ops->setsockopt(sock, SOL_SCTP, SCTP_EVENTS,
+				       (char *)&subscribe, sizeof(subscribe));
+	set_fs(fs);
+
+	if (result < 0) {
+		log_print("Failed to set SCTP_EVENTS on socket: result=%d",
+			  result);
+		goto create_delsock;
+	}
+
+	/* Init con struct */
+	sock->sk->sk_user_data = &sctp_con;
+	sctp_con.sock = sock;
+	sctp_con.sock->sk->sk_data_ready = lowcomms_data_ready;
+
+	/* Bind to all interfaces. */
+	for (i = 0; i < dlm_local_count; i++) {
+		memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
+		make_sockaddr(&localaddr, dlm_config.tcp_port, &addr_len);
+
+		result = add_bind_addr(&localaddr, addr_len, num);
+		if (result)
+			goto create_delsock;
+		++num;
+	}
+
+	result = sock->ops->listen(sock, 5);
+	if (result < 0) {
+		log_print("Can't set socket listening");
+		goto create_delsock;
+	}
+
+	return 0;
+
+ create_delsock:
+	sock_release(sock);
+	sctp_con.sock = NULL;
+ out:
+	return result;
+}
+
+
+static struct writequeue_entry *new_writequeue_entry(gfp_t allocation)
+{
+	struct writequeue_entry *entry;
+
+	entry = kmalloc(sizeof(struct writequeue_entry), allocation);
+	if (!entry)
+		return NULL;
+
+	entry->page = alloc_page(allocation);
+	if (!entry->page) {
+		kfree(entry);
+		return NULL;
+	}
+
+	entry->offset = 0;
+	entry->len = 0;
+	entry->end = 0;
+	entry->users = 0;
+
+	return entry;
+}
+
+void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
+{
+	struct writequeue_entry *e;
+	int offset = 0;
+	int users = 0;
+	struct nodeinfo *ni;
+
+	if (!atomic_read(&accepting))
+		return NULL;
+
+	ni = nodeid2nodeinfo(nodeid, allocation);
+	if (!ni)
+		return NULL;
+
+	spin_lock(&ni->writequeue_lock);
+	e = list_entry(ni->writequeue.prev, struct writequeue_entry, list);
+	if (((struct list_head *) e == &ni->writequeue) ||
+	    (PAGE_CACHE_SIZE - e->end < len)) {
+		e = NULL;
+	} else {
+		offset = e->end;
+		e->end += len;
+		users = e->users++;
+	}
+	spin_unlock(&ni->writequeue_lock);
+
+	if (e) {
+	      got_one:
+		if (users == 0)
+			kmap(e->page);
+		*ppc = page_address(e->page) + offset;
+		return e;
+	}
+
+	e = new_writequeue_entry(allocation);
+	if (e) {
+		spin_lock(&ni->writequeue_lock);
+		offset = e->end;
+		e->end += len;
+		e->ni = ni;
+		users = e->users++;
+		list_add_tail(&e->list, &ni->writequeue);
+		spin_unlock(&ni->writequeue_lock);
+		goto got_one;
+	}
+	return NULL;
+}
+
+void dlm_lowcomms_commit_buffer(void *arg)
+{
+	struct writequeue_entry *e = (struct writequeue_entry *) arg;
+	int users;
+	struct nodeinfo *ni = e->ni;
+
+	if (!atomic_read(&accepting))
+		return;
+
+	spin_lock(&ni->writequeue_lock);
+	users = --e->users;
+	if (users)
+		goto out;
+	e->len = e->end - e->offset;
+	kunmap(e->page);
+	spin_unlock(&ni->writequeue_lock);
+
+	if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
+		spin_lock_bh(&write_nodes_lock);
+		list_add_tail(&ni->write_list, &write_nodes);
+		spin_unlock_bh(&write_nodes_lock);
+		wake_up_process(send_task);
+	}
+	return;
+
+      out:
+	spin_unlock(&ni->writequeue_lock);
+	return;
+}
+
+static void free_entry(struct writequeue_entry *e)
+{
+	__free_page(e->page);
+	kfree(e);
+}
+
+/* Initiate an SCTP association. In theory we could just use sendmsg() on
+   the first IP address and it should work, but this allows us to set up the
+   association before sending any valuable data that we can't afford to lose.
+   It also keeps the send path clean as it can now always use the association ID */
+static void initiate_association(int nodeid)
+{
+	struct sockaddr_storage rem_addr;
+	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
+	struct msghdr outmessage;
+	struct cmsghdr *cmsg;
+	struct sctp_sndrcvinfo *sinfo;
+	int ret;
+	int addrlen;
+	char buf[1];
+	struct kvec iov[1];
+	struct nodeinfo *ni;
+
+	log_print("Initiating association with node %d", nodeid);
+
+	ni = nodeid2nodeinfo(nodeid, GFP_KERNEL);
+	if (!ni)
+		return;
+
+	if (nodeid_to_addr(nodeid, (struct sockaddr *)&rem_addr)) {
+		log_print("no address for nodeid %d", nodeid);
+		return;
+	}
+
+	make_sockaddr(&rem_addr, dlm_config.tcp_port, &addrlen);
+
+	outmessage.msg_name = &rem_addr;
+	outmessage.msg_namelen = addrlen;
+	outmessage.msg_control = outcmsg;
+	outmessage.msg_controllen = sizeof(outcmsg);
+	outmessage.msg_flags = MSG_EOR;
+
+	iov[0].iov_base = buf;
+	iov[0].iov_len = 1;
+
+	/* Real INIT messages seem to cause trouble. Just send a 1 byte message
+	   we can afford to lose */
+	cmsg = CMSG_FIRSTHDR(&outmessage);
+	cmsg->cmsg_level = IPPROTO_SCTP;
+	cmsg->cmsg_type = SCTP_SNDRCV;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
+	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
+	sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
+
+	outmessage.msg_controllen = cmsg->cmsg_len;
+	ret = kernel_sendmsg(sctp_con.sock, &outmessage, iov, 1, 1);
+	if (ret < 0) {
+		log_print("send INIT to node failed: %d", ret);
+		/* Try again later */
+		clear_bit(NI_INIT_PENDING, &ni->flags);
+	}
+}
+
+/* Send a message */
+static int send_to_sock(struct nodeinfo *ni)
+{
+	int ret = 0;
+	struct writequeue_entry *e;
+	int len, offset;
+	struct msghdr outmsg;
+	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
+	struct cmsghdr *cmsg;
+	struct sctp_sndrcvinfo *sinfo;
+	struct kvec iov;
+
+        /* See if we need to init an association before we start
+	   sending precious messages */
+	spin_lock(&ni->lock);
+	if (!ni->assoc_id && !test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
+		spin_unlock(&ni->lock);
+		initiate_association(ni->nodeid);
+		return 0;
+	}
+	spin_unlock(&ni->lock);
+
+	outmsg.msg_name = NULL; /* We use assoc_id */
+	outmsg.msg_namelen = 0;
+	outmsg.msg_control = outcmsg;
+	outmsg.msg_controllen = sizeof(outcmsg);
+	outmsg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | MSG_EOR;
+
+	cmsg = CMSG_FIRSTHDR(&outmsg);
+	cmsg->cmsg_level = IPPROTO_SCTP;
+	cmsg->cmsg_type = SCTP_SNDRCV;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
+	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
+	sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
+	sinfo->sinfo_assoc_id = ni->assoc_id;
+	outmsg.msg_controllen = cmsg->cmsg_len;
+
+	spin_lock(&ni->writequeue_lock);
+	for (;;) {
+		if (list_empty(&ni->writequeue))
+			break;
+		e = list_entry(ni->writequeue.next, struct writequeue_entry,
+			       list);
+		len = e->len;
+		offset = e->offset;
+		BUG_ON(len == 0 && e->users == 0);
+		spin_unlock(&ni->writequeue_lock);
+		kmap(e->page);
+
+		ret = 0;
+		if (len) {
+			iov.iov_base = page_address(e->page)+offset;
+			iov.iov_len = len;
+
+			ret = kernel_sendmsg(sctp_con.sock, &outmsg, &iov, 1,
+					     len);
+			if (ret == -EAGAIN) {
+				sctp_con.eagain_flag = 1;
+				goto out;
+			} else if (ret < 0)
+				goto send_error;
+		} else {
+			/* Don't starve people filling buffers */
+			schedule();
+		}
+
+		spin_lock(&ni->writequeue_lock);
+		e->offset += ret;
+		e->len -= ret;
+
+		if (e->len == 0 && e->users == 0) {
+			list_del(&e->list);
+			free_entry(e);
+			continue;
+		}
+	}
+	spin_unlock(&ni->writequeue_lock);
+ out:
+	return ret;
+
+ send_error:
+	log_print("Error sending to node %d %d", ni->nodeid, ret);
+	spin_lock(&ni->lock);
+	if (!test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
+		ni->assoc_id = 0;
+		spin_unlock(&ni->lock);
+		initiate_association(ni->nodeid);
+	} else
+		spin_unlock(&ni->lock);
+
+	return ret;
+}
+
+/* Try to send any messages that are pending */
+static void process_output_queue(void)
+{
+	struct list_head *list;
+	struct list_head *temp;
+
+	spin_lock_bh(&write_nodes_lock);
+	list_for_each_safe(list, temp, &write_nodes) {
+		struct nodeinfo *ni =
+		    list_entry(list, struct nodeinfo, write_list);
+		clear_bit(NI_WRITE_PENDING, &ni->flags);
+		list_del(&ni->write_list);
+
+		spin_unlock_bh(&write_nodes_lock);
+
+		send_to_sock(ni);
+		spin_lock_bh(&write_nodes_lock);
+	}
+	spin_unlock_bh(&write_nodes_lock);
+}
+
+/* Called after we've had -EAGAIN and been woken up */
+static void refill_write_queue(void)
+{
+	int i;
+
+	for (i=1; i<=max_nodeid; i++) {
+		struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
+
+		if (ni) {
+			if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
+				spin_lock_bh(&write_nodes_lock);
+				list_add_tail(&ni->write_list, &write_nodes);
+				spin_unlock_bh(&write_nodes_lock);
+			}
+		}
+	}
+}
+
+static void clean_one_writequeue(struct nodeinfo *ni)
+{
+	struct list_head *list;
+	struct list_head *temp;
+
+	spin_lock(&ni->writequeue_lock);
+	list_for_each_safe(list, temp, &ni->writequeue) {
+		struct writequeue_entry *e =
+			list_entry(list, struct writequeue_entry, list);
+		list_del(&e->list);
+		free_entry(e);
+	}
+	spin_unlock(&ni->writequeue_lock);
+}
+
+static void clean_writequeues(void)
+{
+	int i;
+
+	for (i=1; i<=max_nodeid; i++) {
+		struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
+		if (ni)
+			clean_one_writequeue(ni);
+	}
+}
+
+
+static void dealloc_nodeinfo(void)
+{
+	int i;
+
+	for (i=1; i<=max_nodeid; i++) {
+		struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
+		if (ni) {
+			idr_remove(&nodeinfo_idr, i);
+			kfree(ni);
+		}
+	}
+}
+
+int dlm_lowcomms_close(int nodeid)
+{
+	struct nodeinfo *ni;
+
+	ni = nodeid2nodeinfo(nodeid, 0);
+	if (!ni)
+		return -1;
+
+	spin_lock(&ni->lock);
+	if (ni->assoc_id) {
+		ni->assoc_id = 0;
+		/* Don't send shutdown here, sctp will just queue it
+		   till the node comes back up! */
+	}
+	spin_unlock(&ni->lock);
+
+	clean_one_writequeue(ni);
+	clear_bit(NI_INIT_PENDING, &ni->flags);
+	return 0;
+}
+
+static int write_list_empty(void)
+{
+	int status;
+
+	spin_lock_bh(&write_nodes_lock);
+	status = list_empty(&write_nodes);
+	spin_unlock_bh(&write_nodes_lock);
+
+	return status;
+}
+
+static int dlm_recvd(void *data)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	while (!kthread_should_stop()) {
+		int count = 0;
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(&lowcomms_recv_wait, &wait);
+		if (!test_bit(CF_READ_PENDING, &sctp_con.flags))
+			schedule();
+		remove_wait_queue(&lowcomms_recv_wait, &wait);
+		set_current_state(TASK_RUNNING);
+
+		if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
+			int ret;
+
+			do {
+				ret = receive_from_sock();
+
+				/* Don't starve out everyone else */
+				if (++count >= MAX_RX_MSG_COUNT) {
+					schedule();
+					count = 0;
+				}
+			} while (!kthread_should_stop() && ret >=0);
+		}
+		schedule();
+	}
+
+	return 0;
+}
+
+static int dlm_sendd(void *data)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue(sctp_con.sock->sk->sk_sleep, &wait);
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (write_list_empty())
+			schedule();
+		set_current_state(TASK_RUNNING);
+
+		if (sctp_con.eagain_flag) {
+			sctp_con.eagain_flag = 0;
+			refill_write_queue();
+		}
+		process_output_queue();
+	}
+
+	remove_wait_queue(sctp_con.sock->sk->sk_sleep, &wait);
+
+	return 0;
+}
+
+static void daemons_stop(void)
+{
+	kthread_stop(recv_task);
+	kthread_stop(send_task);
+}
+
+static int daemons_start(void)
+{
+	struct task_struct *p;
+	int error;
+
+	p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
+	error = IS_ERR(p);
+       	if (error) {
+		log_print("can't start dlm_recvd %d", error);
+		return error;
+	}
+	recv_task = p;
+
+	p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
+	error = IS_ERR(p);
+       	if (error) {
+		log_print("can't start dlm_sendd %d", error);
+		kthread_stop(recv_task);
+		return error;
+	}
+	send_task = p;
+
+	return 0;
+}
+
+/*
+ * This is quite likely to sleep...
+ */
+int dlm_lowcomms_start(void)
+{
+	int error;
+
+	error = init_sock();
+	if (error)
+		goto fail_sock;
+	error = daemons_start();
+	if (error)
+		goto fail_sock;
+	atomic_set(&accepting, 1);
+	return 0;
+
+ fail_sock:
+	close_connection();
+	return error;
+}
+
+/* Set all the activity flags to prevent any socket activity. */
+
+void dlm_lowcomms_stop(void)
+{
+	atomic_set(&accepting, 0);
+	sctp_con.flags = 0x7;
+	daemons_stop();
+	clean_writequeues();
+	close_connection();
+	dealloc_nodeinfo();
+	max_nodeid = 0;
+}
+
+int dlm_lowcomms_init(void)
+{
+	init_waitqueue_head(&lowcomms_recv_wait);
+	spin_lock_init(&write_nodes_lock);
+	INIT_LIST_HEAD(&write_nodes);
+	init_rwsem(&nodeinfo_lock);
+	return 0;
+}
+
+void dlm_lowcomms_exit(void)
+{
+	int i;
+
+	for (i = 0; i < dlm_local_count; i++)
+		kfree(dlm_local_addr[i]);
+	dlm_local_count = 0;
+	dlm_local_nodeid = 0;
+}
+
diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c
new file mode 100644
index 0000000000..7289e59b4b
--- /dev/null
+++ b/fs/dlm/lowcomms-tcp.c
@@ -0,0 +1,1263 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
+**  Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+/*
+ * lowcomms.c
+ *
+ * This is the "low-level" comms layer.
+ *
+ * It is responsible for sending/receiving messages
+ * from other nodes in the cluster.
+ *
+ * Cluster nodes are referred to by their nodeids. nodeids are
+ * simply 32 bit numbers to the locking module - if they need to
+ * be expanded for the cluster infrastructure then that is it's
+ * responsibility. It is this layer's
+ * responsibility to resolve these into IP address or
+ * whatever it needs for inter-node communication.
+ *
+ * The comms level is two kernel threads that deal mainly with
+ * the receiving of messages from other nodes and passing them
+ * up to the mid-level comms layer (which understands the
+ * message format) for execution by the locking core, and
+ * a send thread which does all the setting up of connections
+ * to remote nodes and the sending of data. Threads are not allowed
+ * to send their own data because it may cause them to wait in times
+ * of high load. Also, this way, the sending thread can collect together
+ * messages bound for one node and send them in one block.
+ *
+ * I don't see any problem with the recv thread executing the locking
+ * code on behalf of remote processes as the locking code is
+ * short, efficient and never waits.
+ *
+ */
+
+
+#include <asm/ioctls.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <linux/pagemap.h>
+
+#include "dlm_internal.h"
+#include "lowcomms.h"
+#include "midcomms.h"
+#include "config.h"
+
+struct cbuf {
+	unsigned base;
+	unsigned len;
+	unsigned mask;
+};
+
+#ifndef FALSE
+#define FALSE 0
+#define TRUE 1
+#endif
+#define NODE_INCREMENT 32
+
+#define CBUF_INIT(cb, size) do { (cb)->base = (cb)->len = 0; (cb)->mask = ((size)-1); } while(0)
+#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0)
+#define CBUF_EMPTY(cb) ((cb)->len == 0)
+#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1))
+#define CBUF_EAT(cb, n) do { (cb)->len  -= (n); \
+                             (cb)->base += (n); (cb)->base &= (cb)->mask; } while(0)
+#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask)
+
+/* Maximum number of incoming messages to process before
+   doing a schedule()
+*/
+#define MAX_RX_MSG_COUNT 25
+
+struct connection {
+	struct socket *sock;	/* NULL if not connected */
+	uint32_t nodeid;	/* So we know who we are in the list */
+	struct rw_semaphore sock_sem;	/* Stop connect races */
+	struct list_head read_list;	/* On this list when ready for reading */
+	struct list_head write_list;	/* On this list when ready for writing */
+	struct list_head state_list;	/* On this list when ready to connect */
+	unsigned long flags;	/* bit 1,2 = We are on the read/write lists */
+#define CF_READ_PENDING 1
+#define CF_WRITE_PENDING 2
+#define CF_CONNECT_PENDING 3
+#define CF_IS_OTHERCON 4
+	struct list_head writequeue;	/* List of outgoing writequeue_entries */
+	struct list_head listenlist;    /* List of allocated listening sockets */
+	spinlock_t writequeue_lock;
+	int (*rx_action) (struct connection *);	/* What to do when active */
+	struct page *rx_page;
+	struct cbuf cb;
+	int retries;
+	atomic_t waiting_requests;
+#define MAX_CONNECT_RETRIES 3
+	struct connection *othercon;
+};
+#define sock2con(x) ((struct connection *)(x)->sk_user_data)
+
+/* An entry waiting to be sent */
+struct writequeue_entry {
+	struct list_head list;
+	struct page *page;
+	int offset;
+	int len;
+	int end;
+	int users;
+	struct connection *con;
+};
+
+static struct sockaddr_storage dlm_local_addr;
+
+/* Manage daemons */
+static struct task_struct *recv_task;
+static struct task_struct *send_task;
+
+static wait_queue_t lowcomms_send_waitq_head;
+static wait_queue_head_t lowcomms_send_waitq;
+static wait_queue_t lowcomms_recv_waitq_head;
+static wait_queue_head_t lowcomms_recv_waitq;
+
+/* An array of pointers to connections, indexed by NODEID */
+static struct connection **connections;
+static struct semaphore connections_lock;
+static kmem_cache_t *con_cache;
+static int conn_array_size;
+static atomic_t accepting;
+
+/* List of sockets that have reads pending */
+static struct list_head read_sockets;
+static spinlock_t read_sockets_lock;
+
+/* List of sockets which have writes pending */
+static struct list_head write_sockets;
+static spinlock_t write_sockets_lock;
+
+/* List of sockets which have connects pending */
+static struct list_head state_sockets;
+static spinlock_t state_sockets_lock;
+
+static struct connection *nodeid2con(int nodeid, gfp_t allocation)
+{
+	struct connection *con = NULL;
+
+	down(&connections_lock);
+	if (nodeid >= conn_array_size) {
+		int new_size = nodeid + NODE_INCREMENT;
+		struct connection **new_conns;
+
+		new_conns = kmalloc(sizeof(struct connection *) *
+				    new_size, allocation);
+		if (!new_conns)
+			goto finish;
+
+		memset(new_conns, 0, sizeof(struct connection *) * new_size);
+		memcpy(new_conns, connections,  sizeof(struct connection *) * conn_array_size);
+		conn_array_size = new_size;
+		kfree(connections);
+		connections = new_conns;
+
+	}
+
+	con = connections[nodeid];
+	if (con == NULL && allocation) {
+		con = kmem_cache_alloc(con_cache, allocation);
+		if (!con)
+			goto finish;
+
+		memset(con, 0, sizeof(*con));
+		con->nodeid = nodeid;
+		init_rwsem(&con->sock_sem);
+		INIT_LIST_HEAD(&con->writequeue);
+		spin_lock_init(&con->writequeue_lock);
+
+		connections[nodeid] = con;
+	}
+
+ finish:
+	up(&connections_lock);
+	return con;
+}
+
+/* Data available on socket or listen socket received a connect */
+static void lowcomms_data_ready(struct sock *sk, int count_unused)
+{
+	struct connection *con = sock2con(sk);
+
+	atomic_inc(&con->waiting_requests);
+	if (test_and_set_bit(CF_READ_PENDING, &con->flags))
+		return;
+
+	spin_lock_bh(&read_sockets_lock);
+	list_add_tail(&con->read_list, &read_sockets);
+	spin_unlock_bh(&read_sockets_lock);
+
+	wake_up_interruptible(&lowcomms_recv_waitq);
+}
+
+static void lowcomms_write_space(struct sock *sk)
+{
+	struct connection *con = sock2con(sk);
+
+	if (test_and_set_bit(CF_WRITE_PENDING, &con->flags))
+		return;
+
+	spin_lock_bh(&write_sockets_lock);
+	list_add_tail(&con->write_list, &write_sockets);
+	spin_unlock_bh(&write_sockets_lock);
+
+	wake_up_interruptible(&lowcomms_send_waitq);
+}
+
+static inline void lowcomms_connect_sock(struct connection *con)
+{
+	if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
+		return;
+	if (!atomic_read(&accepting))
+		return;
+
+	spin_lock_bh(&state_sockets_lock);
+	list_add_tail(&con->state_list, &state_sockets);
+	spin_unlock_bh(&state_sockets_lock);
+
+	wake_up_interruptible(&lowcomms_send_waitq);
+}
+
+static void lowcomms_state_change(struct sock *sk)
+{
+/*	struct connection *con = sock2con(sk); */
+
+	switch (sk->sk_state) {
+	case TCP_ESTABLISHED:
+		lowcomms_write_space(sk);
+		break;
+
+	case TCP_FIN_WAIT1:
+	case TCP_FIN_WAIT2:
+	case TCP_TIME_WAIT:
+	case TCP_CLOSE:
+	case TCP_CLOSE_WAIT:
+	case TCP_LAST_ACK:
+	case TCP_CLOSING:
+		/* FIXME: I think this causes more trouble than it solves.
+		   lowcomms wil reconnect anyway when there is something to
+		   send. This just attempts reconnection if a node goes down!
+		*/
+		/* lowcomms_connect_sock(con); */
+		break;
+
+	default:
+		printk("dlm: lowcomms_state_change: state=%d\n", sk->sk_state);
+		break;
+	}
+}
+
+/* Make a socket active */
+static int add_sock(struct socket *sock, struct connection *con)
+{
+	con->sock = sock;
+
+	/* Install a data_ready callback */
+	con->sock->sk->sk_data_ready = lowcomms_data_ready;
+	con->sock->sk->sk_write_space = lowcomms_write_space;
+	con->sock->sk->sk_state_change = lowcomms_state_change;
+
+	return 0;
+}
+
+/* Add the port number to an IP6 or 4 sockaddr and return the address
+   length */
+static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
+			  int *addr_len)
+{
+        saddr->ss_family =  dlm_local_addr.ss_family;
+        if (saddr->ss_family == AF_INET) {
+		struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
+		in4_addr->sin_port = cpu_to_be16(port);
+		*addr_len = sizeof(struct sockaddr_in);
+	}
+	else {
+		struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
+		in6_addr->sin6_port = cpu_to_be16(port);
+		*addr_len = sizeof(struct sockaddr_in6);
+	}
+}
+
+/* Close a remote connection and tidy up */
+static void close_connection(struct connection *con, int and_other)
+{
+	down_write(&con->sock_sem);
+
+	if (con->sock) {
+		sock_release(con->sock);
+		con->sock = NULL;
+	}
+	if (con->othercon && and_other) {
+		/* Argh! recursion in kernel code!
+		   Actually, this isn't a list so it
+		   will only re-enter once.
+		*/
+		close_connection(con->othercon, FALSE);
+	}
+	if (con->rx_page) {
+		__free_page(con->rx_page);
+		con->rx_page = NULL;
+	}
+	con->retries = 0;
+	up_write(&con->sock_sem);
+}
+
+/* Data received from remote end */
+static int receive_from_sock(struct connection *con)
+{
+	int ret = 0;
+	struct msghdr msg;
+	struct iovec iov[2];
+	mm_segment_t fs;
+	unsigned len;
+	int r;
+	int call_again_soon = 0;
+
+	down_read(&con->sock_sem);
+
+	if (con->sock == NULL)
+		goto out;
+	if (con->rx_page == NULL) {
+		/*
+		 * This doesn't need to be atomic, but I think it should
+		 * improve performance if it is.
+		 */
+		con->rx_page = alloc_page(GFP_ATOMIC);
+		if (con->rx_page == NULL)
+			goto out_resched;
+		CBUF_INIT(&con->cb, PAGE_CACHE_SIZE);
+	}
+
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_iovlen = 1;
+	msg.msg_iov = iov;
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_flags = 0;
+
+	/*
+	 * iov[0] is the bit of the circular buffer between the current end
+	 * point (cb.base + cb.len) and the end of the buffer.
+	 */
+	iov[0].iov_len = con->cb.base - CBUF_DATA(&con->cb);
+	iov[0].iov_base = page_address(con->rx_page) + CBUF_DATA(&con->cb);
+	iov[1].iov_len = 0;
+
+	/*
+	 * iov[1] is the bit of the circular buffer between the start of the
+	 * buffer and the start of the currently used section (cb.base)
+	 */
+	if (CBUF_DATA(&con->cb) >= con->cb.base) {
+		iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&con->cb);
+		iov[1].iov_len = con->cb.base;
+		iov[1].iov_base = page_address(con->rx_page);
+		msg.msg_iovlen = 2;
+	}
+	len = iov[0].iov_len + iov[1].iov_len;
+
+	fs = get_fs();
+	set_fs(get_ds());
+	r = ret = sock_recvmsg(con->sock, &msg, len,
+			       MSG_DONTWAIT | MSG_NOSIGNAL);
+	set_fs(fs);
+
+	if (ret <= 0)
+		goto out_close;
+	if (ret == len)
+		call_again_soon = 1;
+	CBUF_ADD(&con->cb, ret);
+	ret = dlm_process_incoming_buffer(con->nodeid,
+					  page_address(con->rx_page),
+					  con->cb.base, con->cb.len,
+					  PAGE_CACHE_SIZE);
+	if (ret == -EBADMSG) {
+		printk(KERN_INFO "dlm: lowcomms: addr=%p, base=%u, len=%u, "
+		       "iov_len=%u, iov_base[0]=%p, read=%d\n",
+		       page_address(con->rx_page), con->cb.base, con->cb.len,
+		       len, iov[0].iov_base, r);
+	}
+	if (ret < 0)
+		goto out_close;
+	CBUF_EAT(&con->cb, ret);
+
+	if (CBUF_EMPTY(&con->cb) && !call_again_soon) {
+		__free_page(con->rx_page);
+		con->rx_page = NULL;
+	}
+
+      out:
+	if (call_again_soon)
+		goto out_resched;
+	up_read(&con->sock_sem);
+	ret = 0;
+	goto out_ret;
+
+      out_resched:
+	lowcomms_data_ready(con->sock->sk, 0);
+	up_read(&con->sock_sem);
+	ret = 0;
+	schedule();
+	goto out_ret;
+
+      out_close:
+	up_read(&con->sock_sem);
+	if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) {
+		close_connection(con, FALSE);
+		/* Reconnect when there is something to send */
+	}
+
+      out_ret:
+	return ret;
+}
+
+/* Listening socket is busy, accept a connection */
+static int accept_from_sock(struct connection *con)
+{
+	int result;
+	struct sockaddr_storage peeraddr;
+	struct socket *newsock;
+	int len;
+	int nodeid;
+	struct connection *newcon;
+
+	memset(&peeraddr, 0, sizeof(peeraddr));
+	result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &newsock);
+	if (result < 0)
+		return -ENOMEM;
+
+	down_read(&con->sock_sem);
+
+	result = -ENOTCONN;
+	if (con->sock == NULL)
+		goto accept_err;
+
+	newsock->type = con->sock->type;
+	newsock->ops = con->sock->ops;
+
+	result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK);
+	if (result < 0)
+		goto accept_err;
+
+	/* Get the connected socket's peer */
+	memset(&peeraddr, 0, sizeof(peeraddr));
+	if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr,
+				  &len, 2)) {
+		result = -ECONNABORTED;
+		goto accept_err;
+	}
+
+	/* Get the new node's NODEID */
+	make_sockaddr(&peeraddr, 0, &len);
+	if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
+	    	printk("dlm: connect from non cluster node\n");
+		sock_release(newsock);
+		up_read(&con->sock_sem);
+		return -1;
+	}
+
+	log_print("got connection from %d", nodeid);
+
+	/*  Check to see if we already have a connection to this node. This
+	 *  could happen if the two nodes initiate a connection at roughly
+	 *  the same time and the connections cross on the wire.
+	 * TEMPORARY FIX:
+	 *  In this case we store the incoming one in "othercon"
+	 */
+	newcon = nodeid2con(nodeid, GFP_KERNEL);
+	if (!newcon) {
+		result = -ENOMEM;
+		goto accept_err;
+	}
+	down_write(&newcon->sock_sem);
+	if (newcon->sock) {
+	        struct connection *othercon = newcon->othercon;
+
+		if (!othercon) {
+			othercon = kmem_cache_alloc(con_cache, GFP_KERNEL);
+			if (!othercon) {
+				printk("dlm: failed to allocate incoming socket\n");
+				up_write(&newcon->sock_sem);
+				result = -ENOMEM;
+				goto accept_err;
+			}
+			memset(othercon, 0, sizeof(*othercon));
+			othercon->nodeid = nodeid;
+			othercon->rx_action = receive_from_sock;
+			init_rwsem(&othercon->sock_sem);
+			set_bit(CF_IS_OTHERCON, &othercon->flags);
+			newcon->othercon = othercon;
+		}
+		othercon->sock = newsock;
+		newsock->sk->sk_user_data = othercon;
+		add_sock(newsock, othercon);
+	}
+	else {
+		newsock->sk->sk_user_data = newcon;
+		newcon->rx_action = receive_from_sock;
+		add_sock(newsock, newcon);
+
+	}
+
+	up_write(&newcon->sock_sem);
+
+	/*
+	 * Add it to the active queue in case we got data
+	 * beween processing the accept adding the socket
+	 * to the read_sockets list
+	 */
+	lowcomms_data_ready(newsock->sk, 0);
+	up_read(&con->sock_sem);
+
+	return 0;
+
+      accept_err:
+	up_read(&con->sock_sem);
+	sock_release(newsock);
+
+	if (result != -EAGAIN)
+		printk("dlm: error accepting connection from node: %d\n", result);
+	return result;
+}
+
+/* Connect a new socket to its peer */
+static int connect_to_sock(struct connection *con)
+{
+	int result = -EHOSTUNREACH;
+	struct sockaddr_storage saddr;
+	int addr_len;
+	struct socket *sock;
+
+	if (con->nodeid == 0) {
+		log_print("attempt to connect sock 0 foiled");
+		return 0;
+	}
+
+	down_write(&con->sock_sem);
+	if (con->retries++ > MAX_CONNECT_RETRIES)
+		goto out;
+
+	/* Some odd races can cause double-connects, ignore them */
+	if (con->sock) {
+		result = 0;
+		goto out;
+	}
+
+	/* Create a socket to communicate with */
+	result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &sock);
+	if (result < 0)
+		goto out_err;
+
+	memset(&saddr, 0, sizeof(saddr));
+	if (dlm_nodeid_to_addr(con->nodeid, &saddr))
+	        goto out_err;
+
+	sock->sk->sk_user_data = con;
+	con->rx_action = receive_from_sock;
+
+	make_sockaddr(&saddr, dlm_config.tcp_port, &addr_len);
+
+	add_sock(sock, con);
+
+	log_print("connecting to %d", con->nodeid);
+	result =
+		sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
+			       O_NONBLOCK);
+	if (result == -EINPROGRESS)
+		result = 0;
+	if (result != 0)
+		goto out_err;
+
+      out:
+	up_write(&con->sock_sem);
+	/*
+	 * Returning an error here means we've given up trying to connect to
+	 * a remote node, otherwise we return 0 and reschedule the connetion
+	 * attempt
+	 */
+	return result;
+
+      out_err:
+	if (con->sock) {
+		sock_release(con->sock);
+		con->sock = NULL;
+	}
+	/*
+	 * Some errors are fatal and this list might need adjusting. For other
+	 * errors we try again until the max number of retries is reached.
+	 */
+	if (result != -EHOSTUNREACH && result != -ENETUNREACH &&
+	    result != -ENETDOWN && result != EINVAL
+	    && result != -EPROTONOSUPPORT) {
+		lowcomms_connect_sock(con);
+		result = 0;
+	}
+	goto out;
+}
+
+static struct socket *create_listen_sock(struct connection *con, struct sockaddr_storage *saddr)
+{
+        struct socket *sock = NULL;
+	mm_segment_t fs;
+	int result = 0;
+	int one = 1;
+	int addr_len;
+
+	if (dlm_local_addr.ss_family == AF_INET)
+		addr_len = sizeof(struct sockaddr_in);
+	else
+		addr_len = sizeof(struct sockaddr_in6);
+
+	/* Create a socket to communicate with */
+	result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &sock);
+	if (result < 0) {
+		printk("dlm: Can't create listening comms socket\n");
+		goto create_out;
+	}
+
+	fs = get_fs();
+	set_fs(get_ds());
+	result = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(one));
+	set_fs(fs);
+	if (result < 0) {
+		printk("dlm: Failed to set SO_REUSEADDR on socket: result=%d\n",result);
+	}
+	sock->sk->sk_user_data = con;
+	con->rx_action = accept_from_sock;
+	con->sock = sock;
+
+	/* Bind to our port */
+	make_sockaddr(saddr, dlm_config.tcp_port, &addr_len);
+	result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
+	if (result < 0) {
+		printk("dlm: Can't bind to port %d\n", dlm_config.tcp_port);
+		sock_release(sock);
+		sock = NULL;
+		con->sock = NULL;
+		goto create_out;
+	}
+
+	fs = get_fs();
+	set_fs(get_ds());
+
+	result = sock_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&one, sizeof(one));
+	set_fs(fs);
+	if (result < 0) {
+		printk("dlm: Set keepalive failed: %d\n", result);
+	}
+
+	result = sock->ops->listen(sock, 5);
+	if (result < 0) {
+		printk("dlm: Can't listen on port %d\n", dlm_config.tcp_port);
+		sock_release(sock);
+		sock = NULL;
+		goto create_out;
+	}
+
+      create_out:
+	return sock;
+}
+
+
+/* Listen on all interfaces */
+static int listen_for_all(void)
+{
+	struct socket *sock = NULL;
+	struct connection *con = nodeid2con(0, GFP_KERNEL);
+	int result = -EINVAL;
+
+	/* We don't support multi-homed hosts */
+	memset(con, 0, sizeof(*con));
+	init_rwsem(&con->sock_sem);
+	spin_lock_init(&con->writequeue_lock);
+	INIT_LIST_HEAD(&con->writequeue);
+	set_bit(CF_IS_OTHERCON, &con->flags);
+
+	sock = create_listen_sock(con, &dlm_local_addr);
+	if (sock) {
+		add_sock(sock, con);
+		result = 0;
+	}
+	else {
+		result = -EADDRINUSE;
+	}
+
+	return result;
+}
+
+
+
+static struct writequeue_entry *new_writequeue_entry(struct connection *con,
+						     gfp_t allocation)
+{
+	struct writequeue_entry *entry;
+
+	entry = kmalloc(sizeof(struct writequeue_entry), allocation);
+	if (!entry)
+		return NULL;
+
+	entry->page = alloc_page(allocation);
+	if (!entry->page) {
+		kfree(entry);
+		return NULL;
+	}
+
+	entry->offset = 0;
+	entry->len = 0;
+	entry->end = 0;
+	entry->users = 0;
+	entry->con = con;
+
+	return entry;
+}
+
+void *dlm_lowcomms_get_buffer(int nodeid, int len,
+			      gfp_t allocation, char **ppc)
+{
+	struct connection *con;
+	struct writequeue_entry *e;
+	int offset = 0;
+	int users = 0;
+
+	if (!atomic_read(&accepting))
+		return NULL;
+
+	con = nodeid2con(nodeid, allocation);
+	if (!con)
+		return NULL;
+
+	spin_lock(&con->writequeue_lock);
+	e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
+	if (((struct list_head *) e == &con->writequeue) ||
+	    (PAGE_CACHE_SIZE - e->end < len)) {
+		e = NULL;
+	} else {
+		offset = e->end;
+		e->end += len;
+		users = e->users++;
+	}
+	spin_unlock(&con->writequeue_lock);
+
+	if (e) {
+	      got_one:
+		if (users == 0)
+			kmap(e->page);
+		*ppc = page_address(e->page) + offset;
+		return e;
+	}
+
+	e = new_writequeue_entry(con, allocation);
+	if (e) {
+		spin_lock(&con->writequeue_lock);
+		offset = e->end;
+		e->end += len;
+		users = e->users++;
+		list_add_tail(&e->list, &con->writequeue);
+		spin_unlock(&con->writequeue_lock);
+		goto got_one;
+	}
+	return NULL;
+}
+
+void dlm_lowcomms_commit_buffer(void *mh)
+{
+	struct writequeue_entry *e = (struct writequeue_entry *)mh;
+	struct connection *con = e->con;
+	int users;
+
+	if (!atomic_read(&accepting))
+		return;
+
+	spin_lock(&con->writequeue_lock);
+	users = --e->users;
+	if (users)
+		goto out;
+	e->len = e->end - e->offset;
+	kunmap(e->page);
+	spin_unlock(&con->writequeue_lock);
+
+	if (test_and_set_bit(CF_WRITE_PENDING, &con->flags) == 0) {
+		spin_lock_bh(&write_sockets_lock);
+		list_add_tail(&con->write_list, &write_sockets);
+		spin_unlock_bh(&write_sockets_lock);
+
+		wake_up_interruptible(&lowcomms_send_waitq);
+	}
+	return;
+
+      out:
+	spin_unlock(&con->writequeue_lock);
+	return;
+}
+
+static void free_entry(struct writequeue_entry *e)
+{
+	__free_page(e->page);
+	kfree(e);
+}
+
+/* Send a message */
+static int send_to_sock(struct connection *con)
+{
+	int ret = 0;
+	ssize_t(*sendpage) (struct socket *, struct page *, int, size_t, int);
+	const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
+	struct writequeue_entry *e;
+	int len, offset;
+
+	down_read(&con->sock_sem);
+	if (con->sock == NULL)
+		goto out_connect;
+
+	sendpage = con->sock->ops->sendpage;
+
+	spin_lock(&con->writequeue_lock);
+	for (;;) {
+		e = list_entry(con->writequeue.next, struct writequeue_entry,
+			       list);
+		if ((struct list_head *) e == &con->writequeue)
+			break;
+
+		len = e->len;
+		offset = e->offset;
+		BUG_ON(len == 0 && e->users == 0);
+		spin_unlock(&con->writequeue_lock);
+
+		ret = 0;
+		if (len) {
+			ret = sendpage(con->sock, e->page, offset, len,
+				       msg_flags);
+			if (ret == -EAGAIN || ret == 0)
+				goto out;
+			if (ret <= 0)
+				goto send_error;
+		}
+		else {
+			/* Don't starve people filling buffers */
+			schedule();
+		}
+
+		spin_lock(&con->writequeue_lock);
+		e->offset += ret;
+		e->len -= ret;
+
+		if (e->len == 0 && e->users == 0) {
+			list_del(&e->list);
+			free_entry(e);
+			continue;
+		}
+	}
+	spin_unlock(&con->writequeue_lock);
+      out:
+	up_read(&con->sock_sem);
+	return ret;
+
+      send_error:
+	up_read(&con->sock_sem);
+	close_connection(con, FALSE);
+	lowcomms_connect_sock(con);
+	return ret;
+
+      out_connect:
+	up_read(&con->sock_sem);
+	lowcomms_connect_sock(con);
+	return 0;
+}
+
+static void clean_one_writequeue(struct connection *con)
+{
+	struct list_head *list;
+	struct list_head *temp;
+
+	spin_lock(&con->writequeue_lock);
+	list_for_each_safe(list, temp, &con->writequeue) {
+		struct writequeue_entry *e =
+			list_entry(list, struct writequeue_entry, list);
+		list_del(&e->list);
+		free_entry(e);
+	}
+	spin_unlock(&con->writequeue_lock);
+}
+
+/* Called from recovery when it knows that a node has
+   left the cluster */
+int dlm_lowcomms_close(int nodeid)
+{
+	struct connection *con;
+
+	if (!connections)
+		goto out;
+
+	log_print("closing connection to node %d", nodeid);
+	con = nodeid2con(nodeid, 0);
+	if (con) {
+		clean_one_writequeue(con);
+		close_connection(con, TRUE);
+		atomic_set(&con->waiting_requests, 0);
+	}
+	return 0;
+
+      out:
+	return -1;
+}
+
+/* API send message call, may queue the request */
+/* N.B. This is the old interface - use the new one for new calls */
+int lowcomms_send_message(int nodeid, char *buf, int len, gfp_t allocation)
+{
+	struct writequeue_entry *e;
+	char *b;
+
+	e = dlm_lowcomms_get_buffer(nodeid, len, allocation, &b);
+	if (e) {
+		memcpy(b, buf, len);
+		dlm_lowcomms_commit_buffer(e);
+		return 0;
+	}
+	return -ENOBUFS;
+}
+
+/* Look for activity on active sockets */
+static void process_sockets(void)
+{
+	struct list_head *list;
+	struct list_head *temp;
+	int count = 0;
+
+	spin_lock_bh(&read_sockets_lock);
+	list_for_each_safe(list, temp, &read_sockets) {
+
+		struct connection *con =
+		    list_entry(list, struct connection, read_list);
+		list_del(&con->read_list);
+		clear_bit(CF_READ_PENDING, &con->flags);
+
+		spin_unlock_bh(&read_sockets_lock);
+
+		/* This can reach zero if we are processing requests
+		 * as they come in.
+		 */
+		if (atomic_read(&con->waiting_requests) == 0) {
+			spin_lock_bh(&read_sockets_lock);
+			continue;
+		}
+
+		do {
+			con->rx_action(con);
+
+			/* Don't starve out everyone else */
+			if (++count >= MAX_RX_MSG_COUNT) {
+				schedule();
+				count = 0;
+			}
+
+		} while (!atomic_dec_and_test(&con->waiting_requests) &&
+			 !kthread_should_stop());
+
+		spin_lock_bh(&read_sockets_lock);
+	}
+	spin_unlock_bh(&read_sockets_lock);
+}
+
+/* Try to send any messages that are pending
+ */
+static void process_output_queue(void)
+{
+	struct list_head *list;
+	struct list_head *temp;
+	int ret;
+
+	spin_lock_bh(&write_sockets_lock);
+	list_for_each_safe(list, temp, &write_sockets) {
+		struct connection *con =
+		    list_entry(list, struct connection, write_list);
+		clear_bit(CF_WRITE_PENDING, &con->flags);
+		list_del(&con->write_list);
+
+		spin_unlock_bh(&write_sockets_lock);
+
+		ret = send_to_sock(con);
+		if (ret < 0) {
+		}
+		spin_lock_bh(&write_sockets_lock);
+	}
+	spin_unlock_bh(&write_sockets_lock);
+}
+
+static void process_state_queue(void)
+{
+	struct list_head *list;
+	struct list_head *temp;
+	int ret;
+
+	spin_lock_bh(&state_sockets_lock);
+	list_for_each_safe(list, temp, &state_sockets) {
+		struct connection *con =
+		    list_entry(list, struct connection, state_list);
+		list_del(&con->state_list);
+		clear_bit(CF_CONNECT_PENDING, &con->flags);
+		spin_unlock_bh(&state_sockets_lock);
+
+		ret = connect_to_sock(con);
+		if (ret < 0) {
+		}
+		spin_lock_bh(&state_sockets_lock);
+	}
+	spin_unlock_bh(&state_sockets_lock);
+}
+
+
+/* Discard all entries on the write queues */
+static void clean_writequeues(void)
+{
+	int nodeid;
+
+	for (nodeid = 1; nodeid < conn_array_size; nodeid++) {
+		struct connection *con = nodeid2con(nodeid, 0);
+
+		if (con)
+			clean_one_writequeue(con);
+	}
+}
+
+static int read_list_empty(void)
+{
+	int status;
+
+	spin_lock_bh(&read_sockets_lock);
+	status = list_empty(&read_sockets);
+	spin_unlock_bh(&read_sockets_lock);
+
+	return status;
+}
+
+/* DLM Transport comms receive daemon */
+static int dlm_recvd(void *data)
+{
+	init_waitqueue_head(&lowcomms_recv_waitq);
+	init_waitqueue_entry(&lowcomms_recv_waitq_head, current);
+	add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head);
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (read_list_empty())
+			schedule();
+		set_current_state(TASK_RUNNING);
+
+		process_sockets();
+	}
+
+	return 0;
+}
+
+static int write_and_state_lists_empty(void)
+{
+	int status;
+
+	spin_lock_bh(&write_sockets_lock);
+	status = list_empty(&write_sockets);
+	spin_unlock_bh(&write_sockets_lock);
+
+	spin_lock_bh(&state_sockets_lock);
+	if (list_empty(&state_sockets) == 0)
+		status = 0;
+	spin_unlock_bh(&state_sockets_lock);
+
+	return status;
+}
+
+/* DLM Transport send daemon */
+static int dlm_sendd(void *data)
+{
+	init_waitqueue_head(&lowcomms_send_waitq);
+	init_waitqueue_entry(&lowcomms_send_waitq_head, current);
+	add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head);
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (write_and_state_lists_empty())
+			schedule();
+		set_current_state(TASK_RUNNING);
+
+		process_state_queue();
+		process_output_queue();
+	}
+
+	return 0;
+}
+
+static void daemons_stop(void)
+{
+	kthread_stop(recv_task);
+	kthread_stop(send_task);
+}
+
+static int daemons_start(void)
+{
+	struct task_struct *p;
+	int error;
+
+	p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
+	error = IS_ERR(p);
+       	if (error) {
+		log_print("can't start dlm_recvd %d", error);
+		return error;
+	}
+	recv_task = p;
+
+	p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
+	error = IS_ERR(p);
+       	if (error) {
+		log_print("can't start dlm_sendd %d", error);
+		kthread_stop(recv_task);
+		return error;
+	}
+	send_task = p;
+
+	return 0;
+}
+
+/*
+ * Return the largest buffer size we can cope with.
+ */
+int lowcomms_max_buffer_size(void)
+{
+	return PAGE_CACHE_SIZE;
+}
+
+void dlm_lowcomms_stop(void)
+{
+	int i;
+
+	atomic_set(&accepting, 0);
+
+	/* Set all the activity flags to prevent any
+	   socket activity.
+	*/
+	for (i = 0; i < conn_array_size; i++) {
+		if (connections[i])
+			connections[i]->flags |= 0x7;
+	}
+	daemons_stop();
+	clean_writequeues();
+
+	for (i = 0; i < conn_array_size; i++) {
+		if (connections[i]) {
+			close_connection(connections[i], TRUE);
+			if (connections[i]->othercon)
+				kmem_cache_free(con_cache, connections[i]->othercon);
+			kmem_cache_free(con_cache, connections[i]);
+		}
+	}
+
+	kfree(connections);
+	connections = NULL;
+
+	kmem_cache_destroy(con_cache);
+}
+
+/* This is quite likely to sleep... */
+int dlm_lowcomms_start(void)
+{
+	int error = 0;
+
+	error = -ENOTCONN;
+
+	/*
+	 * Temporarily initialise the waitq head so that lowcomms_send_message
+	 * doesn't crash if it gets called before the thread is fully
+	 * initialised
+	 */
+	init_waitqueue_head(&lowcomms_send_waitq);
+
+	error = -ENOMEM;
+	connections = kmalloc(sizeof(struct connection *) *
+			      NODE_INCREMENT, GFP_KERNEL);
+	if (!connections)
+		goto out;
+
+	memset(connections, 0,
+	       sizeof(struct connection *) * NODE_INCREMENT);
+
+	conn_array_size = NODE_INCREMENT;
+
+	if (dlm_our_addr(&dlm_local_addr, 0)) {
+		log_print("no local IP address has been set");
+		goto fail_free_conn;
+	}
+	if (!dlm_our_addr(&dlm_local_addr, 1)) {
+		log_print("This dlm comms module does not support multi-homed clustering");
+		goto fail_free_conn;
+	}
+
+	con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection),
+				      __alignof__(struct connection), 0, NULL, NULL);
+	if (!con_cache)
+		goto fail_free_conn;
+
+
+	/* Start listening */
+	error = listen_for_all();
+	if (error)
+		goto fail_unlisten;
+
+	error = daemons_start();
+	if (error)
+		goto fail_unlisten;
+
+	atomic_set(&accepting, 1);
+
+	return 0;
+
+      fail_unlisten:
+	close_connection(connections[0], 0);
+	kmem_cache_free(con_cache, connections[0]);
+	kmem_cache_destroy(con_cache);
+
+      fail_free_conn:
+	kfree(connections);
+
+      out:
+	return error;
+}
+
+int dlm_lowcomms_init(void)
+{
+	INIT_LIST_HEAD(&read_sockets);
+	INIT_LIST_HEAD(&write_sockets);
+	INIT_LIST_HEAD(&state_sockets);
+
+	spin_lock_init(&read_sockets_lock);
+	spin_lock_init(&write_sockets_lock);
+	spin_lock_init(&state_sockets_lock);
+	init_MUTEX(&connections_lock);
+
+	return 0;
+}
+
+void dlm_lowcomms_exit(void)
+{
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
deleted file mode 100644
index 6da6b14d5a..0000000000
--- a/fs/dlm/lowcomms.c
+++ /dev/null
@@ -1,1239 +0,0 @@
-/******************************************************************************
-*******************************************************************************
-**
-**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
-**
-**  This copyrighted material is made available to anyone wishing to use,
-**  modify, copy, or redistribute it subject to the terms and conditions
-**  of the GNU General Public License v.2.
-**
-*******************************************************************************
-******************************************************************************/
-
-/*
- * lowcomms.c
- *
- * This is the "low-level" comms layer.
- *
- * It is responsible for sending/receiving messages
- * from other nodes in the cluster.
- *
- * Cluster nodes are referred to by their nodeids. nodeids are
- * simply 32 bit numbers to the locking module - if they need to
- * be expanded for the cluster infrastructure then that is it's
- * responsibility. It is this layer's
- * responsibility to resolve these into IP address or
- * whatever it needs for inter-node communication.
- *
- * The comms level is two kernel threads that deal mainly with
- * the receiving of messages from other nodes and passing them
- * up to the mid-level comms layer (which understands the
- * message format) for execution by the locking core, and
- * a send thread which does all the setting up of connections
- * to remote nodes and the sending of data. Threads are not allowed
- * to send their own data because it may cause them to wait in times
- * of high load. Also, this way, the sending thread can collect together
- * messages bound for one node and send them in one block.
- *
- * I don't see any problem with the recv thread executing the locking
- * code on behalf of remote processes as the locking code is
- * short, efficient and never (well, hardly ever) waits.
- *
- */
-
-#include <asm/ioctls.h>
-#include <net/sock.h>
-#include <net/tcp.h>
-#include <net/sctp/user.h>
-#include <linux/pagemap.h>
-#include <linux/socket.h>
-#include <linux/idr.h>
-
-#include "dlm_internal.h"
-#include "lowcomms.h"
-#include "config.h"
-#include "midcomms.h"
-
-static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
-static int			dlm_local_count;
-static int			dlm_local_nodeid;
-
-/* One of these per connected node */
-
-#define NI_INIT_PENDING 1
-#define NI_WRITE_PENDING 2
-
-struct nodeinfo {
-	spinlock_t		lock;
-	sctp_assoc_t		assoc_id;
-	unsigned long		flags;
-	struct list_head	write_list; /* nodes with pending writes */
-	struct list_head	writequeue; /* outgoing writequeue_entries */
-	spinlock_t		writequeue_lock;
-	int			nodeid;
-};
-
-static DEFINE_IDR(nodeinfo_idr);
-static struct rw_semaphore	nodeinfo_lock;
-static int			max_nodeid;
-
-struct cbuf {
-	unsigned		base;
-	unsigned		len;
-	unsigned		mask;
-};
-
-/* Just the one of these, now. But this struct keeps
-   the connection-specific variables together */
-
-#define CF_READ_PENDING 1
-
-struct connection {
-	struct socket          *sock;
-	unsigned long		flags;
-	struct page            *rx_page;
-	atomic_t		waiting_requests;
-	struct cbuf		cb;
-	int                     eagain_flag;
-};
-
-/* An entry waiting to be sent */
-
-struct writequeue_entry {
-	struct list_head	list;
-	struct page            *page;
-	int			offset;
-	int			len;
-	int			end;
-	int			users;
-	struct nodeinfo        *ni;
-};
-
-#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0)
-#define CBUF_EMPTY(cb) ((cb)->len == 0)
-#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1))
-#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask)
-
-#define CBUF_INIT(cb, size) \
-do { \
-	(cb)->base = (cb)->len = 0; \
-	(cb)->mask = ((size)-1); \
-} while(0)
-
-#define CBUF_EAT(cb, n) \
-do { \
-	(cb)->len  -= (n); \
-	(cb)->base += (n); \
-	(cb)->base &= (cb)->mask; \
-} while(0)
-
-
-/* List of nodes which have writes pending */
-static struct list_head write_nodes;
-static spinlock_t write_nodes_lock;
-
-/* Maximum number of incoming messages to process before
- * doing a schedule()
- */
-#define MAX_RX_MSG_COUNT 25
-
-/* Manage daemons */
-static struct task_struct *recv_task;
-static struct task_struct *send_task;
-static wait_queue_head_t lowcomms_recv_wait;
-static atomic_t accepting;
-
-/* The SCTP connection */
-static struct connection sctp_con;
-
-
-static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
-{
-	struct sockaddr_storage addr;
-	int error;
-
-	if (!dlm_local_count)
-		return -1;
-
-	error = dlm_nodeid_to_addr(nodeid, &addr);
-	if (error)
-		return error;
-
-	if (dlm_local_addr[0]->ss_family == AF_INET) {
-	        struct sockaddr_in *in4  = (struct sockaddr_in *) &addr;
-		struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr;
-		ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
-	} else {
-	        struct sockaddr_in6 *in6  = (struct sockaddr_in6 *) &addr;
-		struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
-		memcpy(&ret6->sin6_addr, &in6->sin6_addr,
-		       sizeof(in6->sin6_addr));
-	}
-
-	return 0;
-}
-
-static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
-{
-	struct nodeinfo *ni;
-	int r;
-	int n;
-
-	down_read(&nodeinfo_lock);
-	ni = idr_find(&nodeinfo_idr, nodeid);
-	up_read(&nodeinfo_lock);
-
-	if (!ni && alloc) {
-		down_write(&nodeinfo_lock);
-
-		ni = idr_find(&nodeinfo_idr, nodeid);
-		if (ni)
-			goto out_up;
-
-		r = idr_pre_get(&nodeinfo_idr, alloc);
-		if (!r)
-			goto out_up;
-
-		ni = kmalloc(sizeof(struct nodeinfo), alloc);
-		if (!ni)
-			goto out_up;
-
-		r = idr_get_new_above(&nodeinfo_idr, ni, nodeid, &n);
-		if (r) {
-			kfree(ni);
-			ni = NULL;
-			goto out_up;
-		}
-		if (n != nodeid) {
-			idr_remove(&nodeinfo_idr, n);
-			kfree(ni);
-			ni = NULL;
-			goto out_up;
-		}
-		memset(ni, 0, sizeof(struct nodeinfo));
-		spin_lock_init(&ni->lock);
-		INIT_LIST_HEAD(&ni->writequeue);
-		spin_lock_init(&ni->writequeue_lock);
-		ni->nodeid = nodeid;
-
-		if (nodeid > max_nodeid)
-			max_nodeid = nodeid;
-	out_up:
-		up_write(&nodeinfo_lock);
-	}
-
-	return ni;
-}
-
-/* Don't call this too often... */
-static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc)
-{
-	int i;
-	struct nodeinfo *ni;
-
-	for (i=1; i<=max_nodeid; i++) {
-		ni = nodeid2nodeinfo(i, 0);
-		if (ni && ni->assoc_id == assoc)
-			return ni;
-	}
-	return NULL;
-}
-
-/* Data or notification available on socket */
-static void lowcomms_data_ready(struct sock *sk, int count_unused)
-{
-	atomic_inc(&sctp_con.waiting_requests);
-	if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags))
-		return;
-
-	wake_up_interruptible(&lowcomms_recv_wait);
-}
-
-
-/* Add the port number to an IP6 or 4 sockaddr and return the address length.
-   Also padd out the struct with zeros to make comparisons meaningful */
-
-static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
-			  int *addr_len)
-{
-	struct sockaddr_in *local4_addr;
-	struct sockaddr_in6 *local6_addr;
-
-	if (!dlm_local_count)
-		return;
-
-	if (!port) {
-		if (dlm_local_addr[0]->ss_family == AF_INET) {
-			local4_addr = (struct sockaddr_in *)dlm_local_addr[0];
-			port = be16_to_cpu(local4_addr->sin_port);
-		} else {
-			local6_addr = (struct sockaddr_in6 *)dlm_local_addr[0];
-			port = be16_to_cpu(local6_addr->sin6_port);
-		}
-	}
-
-	saddr->ss_family = dlm_local_addr[0]->ss_family;
-	if (dlm_local_addr[0]->ss_family == AF_INET) {
-		struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
-		in4_addr->sin_port = cpu_to_be16(port);
-		memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero));
-		memset(in4_addr+1, 0, sizeof(struct sockaddr_storage) -
-				      sizeof(struct sockaddr_in));
-		*addr_len = sizeof(struct sockaddr_in);
-	} else {
-		struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
-		in6_addr->sin6_port = cpu_to_be16(port);
-		memset(in6_addr+1, 0, sizeof(struct sockaddr_storage) -
-				      sizeof(struct sockaddr_in6));
-		*addr_len = sizeof(struct sockaddr_in6);
-	}
-}
-
-/* Close the connection and tidy up */
-static void close_connection(void)
-{
-	if (sctp_con.sock) {
-		sock_release(sctp_con.sock);
-		sctp_con.sock = NULL;
-	}
-
-	if (sctp_con.rx_page) {
-		__free_page(sctp_con.rx_page);
-		sctp_con.rx_page = NULL;
-	}
-}
-
-/* We only send shutdown messages to nodes that are not part of the cluster */
-static void send_shutdown(sctp_assoc_t associd)
-{
-	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
-	struct msghdr outmessage;
-	struct cmsghdr *cmsg;
-	struct sctp_sndrcvinfo *sinfo;
-	int ret;
-
-	outmessage.msg_name = NULL;
-	outmessage.msg_namelen = 0;
-	outmessage.msg_control = outcmsg;
-	outmessage.msg_controllen = sizeof(outcmsg);
-	outmessage.msg_flags = MSG_EOR;
-
-	cmsg = CMSG_FIRSTHDR(&outmessage);
-	cmsg->cmsg_level = IPPROTO_SCTP;
-	cmsg->cmsg_type = SCTP_SNDRCV;
-	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
-	outmessage.msg_controllen = cmsg->cmsg_len;
-	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
-	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
-
-	sinfo->sinfo_flags |= MSG_EOF;
-	sinfo->sinfo_assoc_id = associd;
-
-	ret = kernel_sendmsg(sctp_con.sock, &outmessage, NULL, 0, 0);
-
-	if (ret != 0)
-		log_print("send EOF to node failed: %d", ret);
-}
-
-
-/* INIT failed but we don't know which node...
-   restart INIT on all pending nodes */
-static void init_failed(void)
-{
-	int i;
-	struct nodeinfo *ni;
-
-	for (i=1; i<=max_nodeid; i++) {
-		ni = nodeid2nodeinfo(i, 0);
-		if (!ni)
-			continue;
-
-		if (test_and_clear_bit(NI_INIT_PENDING, &ni->flags)) {
-			ni->assoc_id = 0;
-			if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
-				spin_lock_bh(&write_nodes_lock);
-				list_add_tail(&ni->write_list, &write_nodes);
-				spin_unlock_bh(&write_nodes_lock);
-			}
-		}
-	}
-	wake_up_process(send_task);
-}
-
-/* Something happened to an association */
-static void process_sctp_notification(struct msghdr *msg, char *buf)
-{
-	union sctp_notification *sn = (union sctp_notification *)buf;
-
-	if (sn->sn_header.sn_type == SCTP_ASSOC_CHANGE) {
-		switch (sn->sn_assoc_change.sac_state) {
-
-		case SCTP_COMM_UP:
-		case SCTP_RESTART:
-		{
-			/* Check that the new node is in the lockspace */
-			struct sctp_prim prim;
-			mm_segment_t fs;
-			int nodeid;
-			int prim_len, ret;
-			int addr_len;
-			struct nodeinfo *ni;
-
-			/* This seems to happen when we received a connection
-			 * too early... or something...  anyway, it happens but
-			 * we always seem to get a real message too, see
-			 * receive_from_sock */
-
-			if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) {
-				log_print("COMM_UP for invalid assoc ID %d",
-					 (int)sn->sn_assoc_change.sac_assoc_id);
-				init_failed();
-				return;
-			}
-			memset(&prim, 0, sizeof(struct sctp_prim));
-			prim_len = sizeof(struct sctp_prim);
-			prim.ssp_assoc_id = sn->sn_assoc_change.sac_assoc_id;
-
-			fs = get_fs();
-			set_fs(get_ds());
-			ret = sctp_con.sock->ops->getsockopt(sctp_con.sock,
-						IPPROTO_SCTP, SCTP_PRIMARY_ADDR,
-						(char*)&prim, &prim_len);
-			set_fs(fs);
-			if (ret < 0) {
-				struct nodeinfo *ni;
-
-				log_print("getsockopt/sctp_primary_addr on "
-					  "new assoc %d failed : %d",
-				    (int)sn->sn_assoc_change.sac_assoc_id, ret);
-
-				/* Retry INIT later */
-				ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id);
-				if (ni)
-					clear_bit(NI_INIT_PENDING, &ni->flags);
-				return;
-			}
-			make_sockaddr(&prim.ssp_addr, 0, &addr_len);
-			if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
-				log_print("reject connect from unknown addr");
-				send_shutdown(prim.ssp_assoc_id);
-				return;
-			}
-
-			ni = nodeid2nodeinfo(nodeid, GFP_KERNEL);
-			if (!ni)
-				return;
-
-			/* Save the assoc ID */
-			spin_lock(&ni->lock);
-			ni->assoc_id = sn->sn_assoc_change.sac_assoc_id;
-			spin_unlock(&ni->lock);
-
-			log_print("got new/restarted association %d nodeid %d",
-			       (int)sn->sn_assoc_change.sac_assoc_id, nodeid);
-
-			/* Send any pending writes */
-			clear_bit(NI_INIT_PENDING, &ni->flags);
-			if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
-				spin_lock_bh(&write_nodes_lock);
-				list_add_tail(&ni->write_list, &write_nodes);
-				spin_unlock_bh(&write_nodes_lock);
-			}
-			wake_up_process(send_task);
-		}
-		break;
-
-		case SCTP_COMM_LOST:
-		case SCTP_SHUTDOWN_COMP:
-		{
-			struct nodeinfo *ni;
-
-			ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id);
-			if (ni) {
-				spin_lock(&ni->lock);
-				ni->assoc_id = 0;
-				spin_unlock(&ni->lock);
-			}
-		}
-		break;
-
-		/* We don't know which INIT failed, so clear the PENDING flags
-		 * on them all.  if assoc_id is zero then it will then try
-		 * again */
-
-		case SCTP_CANT_STR_ASSOC:
-		{
-			log_print("Can't start SCTP association - retrying");
-			init_failed();
-		}
-		break;
-
-		default:
-			log_print("unexpected SCTP assoc change id=%d state=%d",
-				  (int)sn->sn_assoc_change.sac_assoc_id,
-				  sn->sn_assoc_change.sac_state);
-		}
-	}
-}
-
-/* Data received from remote end */
-static int receive_from_sock(void)
-{
-	int ret = 0;
-	struct msghdr msg;
-	struct kvec iov[2];
-	unsigned len;
-	int r;
-	struct sctp_sndrcvinfo *sinfo;
-	struct cmsghdr *cmsg;
-	struct nodeinfo *ni;
-
-	/* These two are marginally too big for stack allocation, but this
-	 * function is (currently) only called by dlm_recvd so static should be
-	 * OK.
-	 */
-	static struct sockaddr_storage msgname;
-	static char incmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
-
-	if (sctp_con.sock == NULL)
-		goto out;
-
-	if (sctp_con.rx_page == NULL) {
-		/*
-		 * This doesn't need to be atomic, but I think it should
-		 * improve performance if it is.
-		 */
-		sctp_con.rx_page = alloc_page(GFP_ATOMIC);
-		if (sctp_con.rx_page == NULL)
-			goto out_resched;
-		CBUF_INIT(&sctp_con.cb, PAGE_CACHE_SIZE);
-	}
-
-	memset(&incmsg, 0, sizeof(incmsg));
-	memset(&msgname, 0, sizeof(msgname));
-
-	memset(incmsg, 0, sizeof(incmsg));
-	msg.msg_name = &msgname;
-	msg.msg_namelen = sizeof(msgname);
-	msg.msg_flags = 0;
-	msg.msg_control = incmsg;
-	msg.msg_controllen = sizeof(incmsg);
-	msg.msg_iovlen = 1;
-
-	/* I don't see why this circular buffer stuff is necessary for SCTP
-	 * which is a packet-based protocol, but the whole thing breaks under
-	 * load without it! The overhead is minimal (and is in the TCP lowcomms
-	 * anyway, of course) so I'll leave it in until I can figure out what's
-	 * really happening.
-	 */
-
-	/*
-	 * iov[0] is the bit of the circular buffer between the current end
-	 * point (cb.base + cb.len) and the end of the buffer.
-	 */
-	iov[0].iov_len = sctp_con.cb.base - CBUF_DATA(&sctp_con.cb);
-	iov[0].iov_base = page_address(sctp_con.rx_page) +
-			  CBUF_DATA(&sctp_con.cb);
-	iov[1].iov_len = 0;
-
-	/*
-	 * iov[1] is the bit of the circular buffer between the start of the
-	 * buffer and the start of the currently used section (cb.base)
-	 */
-	if (CBUF_DATA(&sctp_con.cb) >= sctp_con.cb.base) {
-		iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&sctp_con.cb);
-		iov[1].iov_len = sctp_con.cb.base;
-		iov[1].iov_base = page_address(sctp_con.rx_page);
-		msg.msg_iovlen = 2;
-	}
-	len = iov[0].iov_len + iov[1].iov_len;
-
-	r = ret = kernel_recvmsg(sctp_con.sock, &msg, iov, msg.msg_iovlen, len,
-				 MSG_NOSIGNAL | MSG_DONTWAIT);
-	if (ret <= 0)
-		goto out_close;
-
-	msg.msg_control = incmsg;
-	msg.msg_controllen = sizeof(incmsg);
-	cmsg = CMSG_FIRSTHDR(&msg);
-	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
-
-	if (msg.msg_flags & MSG_NOTIFICATION) {
-		process_sctp_notification(&msg, page_address(sctp_con.rx_page));
-		return 0;
-	}
-
-	/* Is this a new association ? */
-	ni = nodeid2nodeinfo(le32_to_cpu(sinfo->sinfo_ppid), GFP_KERNEL);
-	if (ni) {
-		ni->assoc_id = sinfo->sinfo_assoc_id;
-		if (test_and_clear_bit(NI_INIT_PENDING, &ni->flags)) {
-
-			if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
-				spin_lock_bh(&write_nodes_lock);
-				list_add_tail(&ni->write_list, &write_nodes);
-				spin_unlock_bh(&write_nodes_lock);
-			}
-			wake_up_process(send_task);
-		}
-	}
-
-	/* INIT sends a message with length of 1 - ignore it */
-	if (r == 1)
-		return 0;
-
-	CBUF_ADD(&sctp_con.cb, ret);
-	ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid),
-					  page_address(sctp_con.rx_page),
-					  sctp_con.cb.base, sctp_con.cb.len,
-					  PAGE_CACHE_SIZE);
-	if (ret < 0)
-		goto out_close;
-	CBUF_EAT(&sctp_con.cb, ret);
-
-      out:
-	ret = 0;
-	goto out_ret;
-
-      out_resched:
-	lowcomms_data_ready(sctp_con.sock->sk, 0);
-	ret = 0;
-	schedule();
-	goto out_ret;
-
-      out_close:
-	if (ret != -EAGAIN)
-		log_print("error reading from sctp socket: %d", ret);
-      out_ret:
-	return ret;
-}
-
-/* Bind to an IP address. SCTP allows multiple address so it can do multi-homing */
-static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num)
-{
-	mm_segment_t fs;
-	int result = 0;
-
-	fs = get_fs();
-	set_fs(get_ds());
-	if (num == 1)
-		result = sctp_con.sock->ops->bind(sctp_con.sock,
-					(struct sockaddr *) addr, addr_len);
-	else
-		result = sctp_con.sock->ops->setsockopt(sctp_con.sock, SOL_SCTP,
-				SCTP_SOCKOPT_BINDX_ADD, (char *)addr, addr_len);
-	set_fs(fs);
-
-	if (result < 0)
-		log_print("Can't bind to port %d addr number %d",
-			  dlm_config.tcp_port, num);
-
-	return result;
-}
-
-static void init_local(void)
-{
-	struct sockaddr_storage sas, *addr;
-	int i;
-
-	dlm_local_nodeid = dlm_our_nodeid();
-
-	for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) {
-		if (dlm_our_addr(&sas, i))
-			break;
-
-		addr = kmalloc(sizeof(*addr), GFP_KERNEL);
-		if (!addr)
-			break;
-		memcpy(addr, &sas, sizeof(*addr));
-		dlm_local_addr[dlm_local_count++] = addr;
-	}
-}
-
-/* Initialise SCTP socket and bind to all interfaces */
-static int init_sock(void)
-{
-	mm_segment_t fs;
-	struct socket *sock = NULL;
-	struct sockaddr_storage localaddr;
-	struct sctp_event_subscribe subscribe;
-	int result = -EINVAL, num = 1, i, addr_len;
-
-	if (!dlm_local_count) {
-		init_local();
-		if (!dlm_local_count) {
-			log_print("no local IP address has been set");
-			goto out;
-		}
-	}
-
-	result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_SEQPACKET,
-				  IPPROTO_SCTP, &sock);
-	if (result < 0) {
-		log_print("Can't create comms socket, check SCTP is loaded");
-		goto out;
-	}
-
-	/* Listen for events */
-	memset(&subscribe, 0, sizeof(subscribe));
-	subscribe.sctp_data_io_event = 1;
-	subscribe.sctp_association_event = 1;
-	subscribe.sctp_send_failure_event = 1;
-	subscribe.sctp_shutdown_event = 1;
-	subscribe.sctp_partial_delivery_event = 1;
-
-	fs = get_fs();
-	set_fs(get_ds());
-	result = sock->ops->setsockopt(sock, SOL_SCTP, SCTP_EVENTS,
-				       (char *)&subscribe, sizeof(subscribe));
-	set_fs(fs);
-
-	if (result < 0) {
-		log_print("Failed to set SCTP_EVENTS on socket: result=%d",
-			  result);
-		goto create_delsock;
-	}
-
-	/* Init con struct */
-	sock->sk->sk_user_data = &sctp_con;
-	sctp_con.sock = sock;
-	sctp_con.sock->sk->sk_data_ready = lowcomms_data_ready;
-
-	/* Bind to all interfaces. */
-	for (i = 0; i < dlm_local_count; i++) {
-		memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
-		make_sockaddr(&localaddr, dlm_config.tcp_port, &addr_len);
-
-		result = add_bind_addr(&localaddr, addr_len, num);
-		if (result)
-			goto create_delsock;
-		++num;
-	}
-
-	result = sock->ops->listen(sock, 5);
-	if (result < 0) {
-		log_print("Can't set socket listening");
-		goto create_delsock;
-	}
-
-	return 0;
-
- create_delsock:
-	sock_release(sock);
-	sctp_con.sock = NULL;
- out:
-	return result;
-}
-
-
-static struct writequeue_entry *new_writequeue_entry(gfp_t allocation)
-{
-	struct writequeue_entry *entry;
-
-	entry = kmalloc(sizeof(struct writequeue_entry), allocation);
-	if (!entry)
-		return NULL;
-
-	entry->page = alloc_page(allocation);
-	if (!entry->page) {
-		kfree(entry);
-		return NULL;
-	}
-
-	entry->offset = 0;
-	entry->len = 0;
-	entry->end = 0;
-	entry->users = 0;
-
-	return entry;
-}
-
-void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
-{
-	struct writequeue_entry *e;
-	int offset = 0;
-	int users = 0;
-	struct nodeinfo *ni;
-
-	if (!atomic_read(&accepting))
-		return NULL;
-
-	ni = nodeid2nodeinfo(nodeid, allocation);
-	if (!ni)
-		return NULL;
-
-	spin_lock(&ni->writequeue_lock);
-	e = list_entry(ni->writequeue.prev, struct writequeue_entry, list);
-	if (((struct list_head *) e == &ni->writequeue) ||
-	    (PAGE_CACHE_SIZE - e->end < len)) {
-		e = NULL;
-	} else {
-		offset = e->end;
-		e->end += len;
-		users = e->users++;
-	}
-	spin_unlock(&ni->writequeue_lock);
-
-	if (e) {
-	      got_one:
-		if (users == 0)
-			kmap(e->page);
-		*ppc = page_address(e->page) + offset;
-		return e;
-	}
-
-	e = new_writequeue_entry(allocation);
-	if (e) {
-		spin_lock(&ni->writequeue_lock);
-		offset = e->end;
-		e->end += len;
-		e->ni = ni;
-		users = e->users++;
-		list_add_tail(&e->list, &ni->writequeue);
-		spin_unlock(&ni->writequeue_lock);
-		goto got_one;
-	}
-	return NULL;
-}
-
-void dlm_lowcomms_commit_buffer(void *arg)
-{
-	struct writequeue_entry *e = (struct writequeue_entry *) arg;
-	int users;
-	struct nodeinfo *ni = e->ni;
-
-	if (!atomic_read(&accepting))
-		return;
-
-	spin_lock(&ni->writequeue_lock);
-	users = --e->users;
-	if (users)
-		goto out;
-	e->len = e->end - e->offset;
-	kunmap(e->page);
-	spin_unlock(&ni->writequeue_lock);
-
-	if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
-		spin_lock_bh(&write_nodes_lock);
-		list_add_tail(&ni->write_list, &write_nodes);
-		spin_unlock_bh(&write_nodes_lock);
-		wake_up_process(send_task);
-	}
-	return;
-
-      out:
-	spin_unlock(&ni->writequeue_lock);
-	return;
-}
-
-static void free_entry(struct writequeue_entry *e)
-{
-	__free_page(e->page);
-	kfree(e);
-}
-
-/* Initiate an SCTP association. In theory we could just use sendmsg() on
-   the first IP address and it should work, but this allows us to set up the
-   association before sending any valuable data that we can't afford to lose.
-   It also keeps the send path clean as it can now always use the association ID */
-static void initiate_association(int nodeid)
-{
-	struct sockaddr_storage rem_addr;
-	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
-	struct msghdr outmessage;
-	struct cmsghdr *cmsg;
-	struct sctp_sndrcvinfo *sinfo;
-	int ret;
-	int addrlen;
-	char buf[1];
-	struct kvec iov[1];
-	struct nodeinfo *ni;
-
-	log_print("Initiating association with node %d", nodeid);
-
-	ni = nodeid2nodeinfo(nodeid, GFP_KERNEL);
-	if (!ni)
-		return;
-
-	if (nodeid_to_addr(nodeid, (struct sockaddr *)&rem_addr)) {
-		log_print("no address for nodeid %d", nodeid);
-		return;
-	}
-
-	make_sockaddr(&rem_addr, dlm_config.tcp_port, &addrlen);
-
-	outmessage.msg_name = &rem_addr;
-	outmessage.msg_namelen = addrlen;
-	outmessage.msg_control = outcmsg;
-	outmessage.msg_controllen = sizeof(outcmsg);
-	outmessage.msg_flags = MSG_EOR;
-
-	iov[0].iov_base = buf;
-	iov[0].iov_len = 1;
-
-	/* Real INIT messages seem to cause trouble. Just send a 1 byte message
-	   we can afford to lose */
-	cmsg = CMSG_FIRSTHDR(&outmessage);
-	cmsg->cmsg_level = IPPROTO_SCTP;
-	cmsg->cmsg_type = SCTP_SNDRCV;
-	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
-	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
-	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
-	sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
-
-	outmessage.msg_controllen = cmsg->cmsg_len;
-	ret = kernel_sendmsg(sctp_con.sock, &outmessage, iov, 1, 1);
-	if (ret < 0) {
-		log_print("send INIT to node failed: %d", ret);
-		/* Try again later */
-		clear_bit(NI_INIT_PENDING, &ni->flags);
-	}
-}
-
-/* Send a message */
-static int send_to_sock(struct nodeinfo *ni)
-{
-	int ret = 0;
-	struct writequeue_entry *e;
-	int len, offset;
-	struct msghdr outmsg;
-	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
-	struct cmsghdr *cmsg;
-	struct sctp_sndrcvinfo *sinfo;
-	struct kvec iov;
-
-        /* See if we need to init an association before we start
-	   sending precious messages */
-	spin_lock(&ni->lock);
-	if (!ni->assoc_id && !test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
-		spin_unlock(&ni->lock);
-		initiate_association(ni->nodeid);
-		return 0;
-	}
-	spin_unlock(&ni->lock);
-
-	outmsg.msg_name = NULL; /* We use assoc_id */
-	outmsg.msg_namelen = 0;
-	outmsg.msg_control = outcmsg;
-	outmsg.msg_controllen = sizeof(outcmsg);
-	outmsg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | MSG_EOR;
-
-	cmsg = CMSG_FIRSTHDR(&outmsg);
-	cmsg->cmsg_level = IPPROTO_SCTP;
-	cmsg->cmsg_type = SCTP_SNDRCV;
-	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
-	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
-	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
-	sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
-	sinfo->sinfo_assoc_id = ni->assoc_id;
-	outmsg.msg_controllen = cmsg->cmsg_len;
-
-	spin_lock(&ni->writequeue_lock);
-	for (;;) {
-		if (list_empty(&ni->writequeue))
-			break;
-		e = list_entry(ni->writequeue.next, struct writequeue_entry,
-			       list);
-		len = e->len;
-		offset = e->offset;
-		BUG_ON(len == 0 && e->users == 0);
-		spin_unlock(&ni->writequeue_lock);
-		kmap(e->page);
-
-		ret = 0;
-		if (len) {
-			iov.iov_base = page_address(e->page)+offset;
-			iov.iov_len = len;
-
-			ret = kernel_sendmsg(sctp_con.sock, &outmsg, &iov, 1,
-					     len);
-			if (ret == -EAGAIN) {
-				sctp_con.eagain_flag = 1;
-				goto out;
-			} else if (ret < 0)
-				goto send_error;
-		} else {
-			/* Don't starve people filling buffers */
-			schedule();
-		}
-
-		spin_lock(&ni->writequeue_lock);
-		e->offset += ret;
-		e->len -= ret;
-
-		if (e->len == 0 && e->users == 0) {
-			list_del(&e->list);
-			free_entry(e);
-			continue;
-		}
-	}
-	spin_unlock(&ni->writequeue_lock);
- out:
-	return ret;
-
- send_error:
-	log_print("Error sending to node %d %d", ni->nodeid, ret);
-	spin_lock(&ni->lock);
-	if (!test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
-		ni->assoc_id = 0;
-		spin_unlock(&ni->lock);
-		initiate_association(ni->nodeid);
-	} else
-		spin_unlock(&ni->lock);
-
-	return ret;
-}
-
-/* Try to send any messages that are pending */
-static void process_output_queue(void)
-{
-	struct list_head *list;
-	struct list_head *temp;
-
-	spin_lock_bh(&write_nodes_lock);
-	list_for_each_safe(list, temp, &write_nodes) {
-		struct nodeinfo *ni =
-		    list_entry(list, struct nodeinfo, write_list);
-		clear_bit(NI_WRITE_PENDING, &ni->flags);
-		list_del(&ni->write_list);
-
-		spin_unlock_bh(&write_nodes_lock);
-
-		send_to_sock(ni);
-		spin_lock_bh(&write_nodes_lock);
-	}
-	spin_unlock_bh(&write_nodes_lock);
-}
-
-/* Called after we've had -EAGAIN and been woken up */
-static void refill_write_queue(void)
-{
-	int i;
-
-	for (i=1; i<=max_nodeid; i++) {
-		struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
-
-		if (ni) {
-			if (!test_and_set_bit(NI_WRITE_PENDING, &ni->flags)) {
-				spin_lock_bh(&write_nodes_lock);
-				list_add_tail(&ni->write_list, &write_nodes);
-				spin_unlock_bh(&write_nodes_lock);
-			}
-		}
-	}
-}
-
-static void clean_one_writequeue(struct nodeinfo *ni)
-{
-	struct list_head *list;
-	struct list_head *temp;
-
-	spin_lock(&ni->writequeue_lock);
-	list_for_each_safe(list, temp, &ni->writequeue) {
-		struct writequeue_entry *e =
-			list_entry(list, struct writequeue_entry, list);
-		list_del(&e->list);
-		free_entry(e);
-	}
-	spin_unlock(&ni->writequeue_lock);
-}
-
-static void clean_writequeues(void)
-{
-	int i;
-
-	for (i=1; i<=max_nodeid; i++) {
-		struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
-		if (ni)
-			clean_one_writequeue(ni);
-	}
-}
-
-
-static void dealloc_nodeinfo(void)
-{
-	int i;
-
-	for (i=1; i<=max_nodeid; i++) {
-		struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
-		if (ni) {
-			idr_remove(&nodeinfo_idr, i);
-			kfree(ni);
-		}
-	}
-}
-
-int dlm_lowcomms_close(int nodeid)
-{
-	struct nodeinfo *ni;
-
-	ni = nodeid2nodeinfo(nodeid, 0);
-	if (!ni)
-		return -1;
-
-	spin_lock(&ni->lock);
-	if (ni->assoc_id) {
-		ni->assoc_id = 0;
-		/* Don't send shutdown here, sctp will just queue it
-		   till the node comes back up! */
-	}
-	spin_unlock(&ni->lock);
-
-	clean_one_writequeue(ni);
-	clear_bit(NI_INIT_PENDING, &ni->flags);
-	return 0;
-}
-
-static int write_list_empty(void)
-{
-	int status;
-
-	spin_lock_bh(&write_nodes_lock);
-	status = list_empty(&write_nodes);
-	spin_unlock_bh(&write_nodes_lock);
-
-	return status;
-}
-
-static int dlm_recvd(void *data)
-{
-	DECLARE_WAITQUEUE(wait, current);
-
-	while (!kthread_should_stop()) {
-		int count = 0;
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		add_wait_queue(&lowcomms_recv_wait, &wait);
-		if (!test_bit(CF_READ_PENDING, &sctp_con.flags))
-			schedule();
-		remove_wait_queue(&lowcomms_recv_wait, &wait);
-		set_current_state(TASK_RUNNING);
-
-		if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
-			int ret;
-
-			do {
-				ret = receive_from_sock();
-
-				/* Don't starve out everyone else */
-				if (++count >= MAX_RX_MSG_COUNT) {
-					schedule();
-					count = 0;
-				}
-			} while (!kthread_should_stop() && ret >=0);
-		}
-		schedule();
-	}
-
-	return 0;
-}
-
-static int dlm_sendd(void *data)
-{
-	DECLARE_WAITQUEUE(wait, current);
-
-	add_wait_queue(sctp_con.sock->sk->sk_sleep, &wait);
-
-	while (!kthread_should_stop()) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (write_list_empty())
-			schedule();
-		set_current_state(TASK_RUNNING);
-
-		if (sctp_con.eagain_flag) {
-			sctp_con.eagain_flag = 0;
-			refill_write_queue();
-		}
-		process_output_queue();
-	}
-
-	remove_wait_queue(sctp_con.sock->sk->sk_sleep, &wait);
-
-	return 0;
-}
-
-static void daemons_stop(void)
-{
-	kthread_stop(recv_task);
-	kthread_stop(send_task);
-}
-
-static int daemons_start(void)
-{
-	struct task_struct *p;
-	int error;
-
-	p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
-	error = IS_ERR(p);
-       	if (error) {
-		log_print("can't start dlm_recvd %d", error);
-		return error;
-	}
-	recv_task = p;
-
-	p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
-	error = IS_ERR(p);
-       	if (error) {
-		log_print("can't start dlm_sendd %d", error);
-		kthread_stop(recv_task);
-		return error;
-	}
-	send_task = p;
-
-	return 0;
-}
-
-/*
- * This is quite likely to sleep...
- */
-int dlm_lowcomms_start(void)
-{
-	int error;
-
-	error = init_sock();
-	if (error)
-		goto fail_sock;
-	error = daemons_start();
-	if (error)
-		goto fail_sock;
-	atomic_set(&accepting, 1);
-	return 0;
-
- fail_sock:
-	close_connection();
-	return error;
-}
-
-/* Set all the activity flags to prevent any socket activity. */
-
-void dlm_lowcomms_stop(void)
-{
-	atomic_set(&accepting, 0);
-	sctp_con.flags = 0x7;
-	daemons_stop();
-	clean_writequeues();
-	close_connection();
-	dealloc_nodeinfo();
-	max_nodeid = 0;
-}
-
-int dlm_lowcomms_init(void)
-{
-	init_waitqueue_head(&lowcomms_recv_wait);
-	spin_lock_init(&write_nodes_lock);
-	INIT_LIST_HEAD(&write_nodes);
-	init_rwsem(&nodeinfo_lock);
-	return 0;
-}
-
-void dlm_lowcomms_exit(void)
-{
-	int i;
-
-	for (i = 0; i < dlm_local_count; i++)
-		kfree(dlm_local_addr[i]);
-	dlm_local_count = 0;
-	dlm_local_nodeid = 0;
-}
-
-- 
cgit v1.2.2


From 520698096436f7da5b9142e63e3bed5580c5f14e Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Thu, 2 Nov 2006 09:49:02 -0600
Subject: [DLM] res_recover_locks_count not reset when recover_locks is aborted

Red Hat BZ 213684

If a node sends an lkb to the new master (RCOM_LOCK message) during
recovery and recovery is then aborted on both nodes before it gets a
reply, the res_recover_locks_count needs to be reset to 0 so that when the
subsequent recovery comes along and sends the lkb to the new master again
the assertion doesn't trigger that checks that counter is zero.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/recover.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index a5e6d18487..cf9f6831ba 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -252,6 +252,7 @@ static void recover_list_clear(struct dlm_ls *ls)
 	spin_lock(&ls->ls_recover_list_lock);
 	list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) {
 		list_del_init(&r->res_recover_list);
+		r->res_recover_locks_count = 0;
 		dlm_put_rsb(r);
 		ls->ls_recover_list_count--;
 	}
-- 
cgit v1.2.2


From 435618b75b82b5ee511cc01fcdda9c44adb2f4bd Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Thu, 2 Nov 2006 09:45:56 -0600
Subject: [DLM] status messages ping-pong between unmounted nodes

Red Hat BZ 213682

If two nodes leave the lockspace (while unmounting the fs in the case of
gfs) after one has sent a STATUS message to the other, STATUS/STATUS_REPLY
messages will then ping-pong between the nodes when neither of them can
find the lockspace in question any longer.  We kill this by not sending
another STATUS message when we get a STATUS_REPLY for an unknown
lockspace.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/rcom.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 518239a8b1..87b12f7d3d 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -412,9 +412,10 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
 
 	ls = dlm_find_lockspace_global(hd->h_lockspace);
 	if (!ls) {
-		log_print("lockspace %x from %d not found",
-			  hd->h_lockspace, nodeid);
-		send_ls_not_ready(nodeid, rc);
+		log_print("lockspace %x from %d type %x not found",
+			  hd->h_lockspace, nodeid, rc->rc_type);
+		if (rc->rc_type == DLM_RCOM_STATUS)
+			send_ls_not_ready(nodeid, rc);
 		return;
 	}
 
-- 
cgit v1.2.2


From d4400156d415540086c34a06e5d233122d6bf56a Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Tue, 31 Oct 2006 11:55:56 -0600
Subject: [DLM] fix requestqueue race

Red Hat BZ 211914

There's a race between dlm_recoverd (1) enabling locking and (2) clearing
out the requestqueue, and dlm_recvd (1) checking if locking is enabled and
(2) adding a message to the requestqueue.  An order of recoverd(1),
recvd(1), recvd(2), recoverd(2) will result in a message being left on the
requestqueue.  The fix is to have dlm_recvd check if dlm_recoverd has
enabled locking after taking the mutex for the requestqueue and if it has
processing the message instead of queueing it.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/lock.c         | 15 +++++++++++----
 fs/dlm/requestqueue.c | 21 +++++++++++++++++----
 fs/dlm/requestqueue.h |  2 +-
 3 files changed, 29 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 3f2befa479..6088a16926 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -3028,10 +3028,17 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
 
 	while (1) {
 		if (dlm_locking_stopped(ls)) {
-			if (!recovery)
-				dlm_add_requestqueue(ls, nodeid, hd);
-			error = -EINTR;
-			goto out;
+			if (recovery) {
+				error = -EINTR;
+				goto out;
+			}
+			error = dlm_add_requestqueue(ls, nodeid, hd);
+			if (error == -EAGAIN)
+				continue;
+			else {
+				error = -EINTR;
+				goto out;
+			}
 		}
 
 		if (lock_recovery_try(ls))
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index 7b2b089634..0226d2a0a0 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -30,26 +30,39 @@ struct rq_entry {
  * lockspace is enabled on some while still suspended on others.
  */
 
-void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
+int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
 {
 	struct rq_entry *e;
 	int length = hd->h_length;
+	int rv = 0;
 
 	if (dlm_is_removed(ls, nodeid))
-		return;
+		return 0;
 
 	e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
 	if (!e) {
 		log_print("dlm_add_requestqueue: out of memory\n");
-		return;
+		return 0;
 	}
 
 	e->nodeid = nodeid;
 	memcpy(e->request, hd, length);
 
+	/* We need to check dlm_locking_stopped() after taking the mutex to
+	   avoid a race where dlm_recoverd enables locking and runs
+	   process_requestqueue between our earlier dlm_locking_stopped check
+	   and this addition to the requestqueue. */
+
 	mutex_lock(&ls->ls_requestqueue_mutex);
-	list_add_tail(&e->list, &ls->ls_requestqueue);
+	if (dlm_locking_stopped(ls))
+		list_add_tail(&e->list, &ls->ls_requestqueue);
+	else {
+		log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid);
+		kfree(e);
+		rv = -EAGAIN;
+	}
 	mutex_unlock(&ls->ls_requestqueue_mutex);
+	return rv;
 }
 
 int dlm_process_requestqueue(struct dlm_ls *ls)
diff --git a/fs/dlm/requestqueue.h b/fs/dlm/requestqueue.h
index 349f0d292d..6a53ea0333 100644
--- a/fs/dlm/requestqueue.h
+++ b/fs/dlm/requestqueue.h
@@ -13,7 +13,7 @@
 #ifndef __REQUESTQUEUE_DOT_H__
 #define __REQUESTQUEUE_DOT_H__
 
-void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd);
+int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd);
 int dlm_process_requestqueue(struct dlm_ls *ls);
 void dlm_wait_requestqueue(struct dlm_ls *ls);
 void dlm_purge_requestqueue(struct dlm_ls *ls);
-- 
cgit v1.2.2


From 91c0dc93a1a6bbdd79707ed311e48b4397df177f Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Tue, 31 Oct 2006 11:56:01 -0600
Subject: [DLM] fix aborted recovery during node removal

Red Hat BZ 211914

With the new cluster infrastructure, dlm recovery for a node removal can
be aborted and restarted for a node addition.  When this happens, the
restarted recovery isn't aware that it's doing recovery for the earlier
removal as well as the addition.  So, it then skips the recovery steps
only required when nodes are removed.  This can result in locks not being
purged for failed/removed nodes.  The fix is to check for removed nodes
for which recovery has not been completed at the start of a new recovery
sequence.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/member.c   | 8 ++++++++
 fs/dlm/recoverd.c | 7 +++++++
 2 files changed, 15 insertions(+)

(limited to 'fs')

diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index a3f7de7f3a..85e2897bd7 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -186,6 +186,14 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
 	struct dlm_member *memb, *safe;
 	int i, error, found, pos = 0, neg = 0, low = -1;
 
+	/* previously removed members that we've not finished removing need to
+	   count as a negative change so the "neg" recovery steps will happen */
+
+	list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
+		log_debug(ls, "prev removed member %d", memb->nodeid);
+		neg++;
+	}
+
 	/* move departed members from ls_nodes to ls_nodes_gone */
 
 	list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) {
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 362e3eff4d..4a1d6023fd 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -164,6 +164,13 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 		 */
 
 		dlm_recover_rsbs(ls);
+	} else {
+		/*
+		 * Other lockspace members may be going through the "neg" steps
+		 * while also adding us to the lockspace, in which case they'll
+		 * be looking for this status bit during dlm_recover_locks().
+		 */
+		dlm_set_recover_status(ls, DLM_RS_LOCKS);
 	}
 
 	dlm_release_root_list(ls);
-- 
cgit v1.2.2


From 2cdc98aaf072d573df10c503d3b3b0b74e2a6d06 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Tue, 31 Oct 2006 11:56:08 -0600
Subject: [DLM] fix stopping unstarted recovery

Red Hat BZ 211914

When many nodes are joining a lockspace simultaneously, the dlm gets a
quick sequence of stop/start events, a pair for adding each node.
dlm_controld in user space sends dlm_recoverd in the kernel each stop and
start event.  dlm_controld will sometimes send the stop before
dlm_recoverd has had a chance to take up the previously queued start.  The
stop aborts the processing of the previous start by setting the
RECOVERY_STOP flag.  dlm_recoverd is erroneously clearing this flag and
ignoring the stop/abort if it happens to take up the start after the stop
meant to abort it.  The fix is to check the sequence number that's
incremented for each stop/start before clearing the flag.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/recoverd.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 4a1d6023fd..6e4ee94ce7 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -219,6 +219,10 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 	return error;
 }
 
+/* The dlm_ls_start() that created the rv we take here may already have been
+   stopped via dlm_ls_stop(); in that case we need to leave the RECOVERY_STOP
+   flag set. */
+
 static void do_ls_recovery(struct dlm_ls *ls)
 {
 	struct dlm_recover *rv = NULL;
@@ -226,7 +230,8 @@ static void do_ls_recovery(struct dlm_ls *ls)
 	spin_lock(&ls->ls_recover_lock);
 	rv = ls->ls_recover_args;
 	ls->ls_recover_args = NULL;
-	clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
+	if (rv && ls->ls_recover_seq == rv->seq)
+		clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
 	spin_unlock(&ls->ls_recover_lock);
 
 	if (rv) {
-- 
cgit v1.2.2


From 4b77f2c93d052adca8cc8690b9b5e7f8798f4ddd Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Wed, 1 Nov 2006 09:31:48 -0600
Subject: [DLM] do full recover_locks barrier

Red Hat BZ 211914

The previous patch "[DLM] fix aborted recovery during
node removal" was incomplete as discovered with further testing.  It set
the bit for the RS_LOCKS barrier but did not then wait for the barrier.
This is often ok, but sometimes it will cause yet another recovery hang.
If it's a new node that also has the lowest nodeid that skips the barrier
wait, then it misses the important step of collecting and reporting the
barrier status from the other nodes (which is the job of the low nodeid in
the barrier wait routine).

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/recoverd.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 6e4ee94ce7..8bb895ffd9 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -168,9 +168,15 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 		/*
 		 * Other lockspace members may be going through the "neg" steps
 		 * while also adding us to the lockspace, in which case they'll
-		 * be looking for this status bit during dlm_recover_locks().
+		 * be doing the recover_locks (RS_LOCKS) barrier.
 		 */
 		dlm_set_recover_status(ls, DLM_RS_LOCKS);
+
+		error = dlm_recover_locks_wait(ls);
+		if (error) {
+			log_error(ls, "recover_locks_wait failed %d", error);
+			goto fail;
+		}
 	}
 
 	dlm_release_root_list(ls);
-- 
cgit v1.2.2


From 6f90a8b1b87f97144911790390d56f695b59db9b Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Fri, 10 Nov 2006 14:16:27 -0600
Subject: [DLM] clear sbflags on lock master

RH BZ 211622

The ALTMODE flag can be set in the lock master's copy of the lock but
never cleared, so ALTMODE will also be returned in a subsequent conversion
of the lock when it shouldn't be.  This results in lock_dlm incorrectly
switching to the alternate lock mode when returning the result to gfs
which then asserts when it sees the wrong lock state.  The fix is to
propagate the cleared sbflags value to the master node when the lock is
requested.  QA's d_rwrandirectlarge test triggers this bug very quickly.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/lock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 6088a16926..30878defae 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -2372,6 +2372,7 @@ static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in,
 static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
 {
 	lkb->lkb_exflags = ms->m_exflags;
+	lkb->lkb_sbflags = ms->m_sbflags;
 	lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
 		         (ms->m_flags & 0x0000FFFF);
 }
-- 
cgit v1.2.2


From b98c95af01c10827e3443157651eb469071391a3 Mon Sep 17 00:00:00 2001
From: Patrick Caulfield <pcaulfie@redhat.com>
Date: Wed, 15 Nov 2006 12:29:24 -0500
Subject: [DLM] Fix DLM config

The attached patch fixes the DLM config so that it selects the chosen network
transport. It should fix the bug where DLM can be left selected when NET gets
unselected. This incorporates all the comments received about this patch.

Cc: Adrian Bunk <bunk@stusta.de>
Cc: Andrew Morton <akpm@osdl.org>
Signed-Off-By: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index c5985b883b..b5654a284f 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -1,10 +1,11 @@
 menu "Distributed Lock Manager"
-	depends on INET && IP_SCTP && EXPERIMENTAL
+	depends on EXPERIMENTAL && INET
 
 config DLM
 	tristate "Distributed Lock Manager (DLM)"
 	depends on IPV6 || IPV6=n
 	select CONFIGFS_FS
+	select IP_SCTP if DLM_SCTP
 	help
 	A general purpose distributed lock manager for kernel or userspace
 	applications.
-- 
cgit v1.2.2


From ab923031ceb95ec50ef33ccadf28663c660aa94c Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 15 Nov 2006 15:17:03 -0500
Subject: [GFS2] Fix memory allocation in glock.c

Change from GFP_KERNEL to GFP_NOFS as this was causing a
slow down when trying to push inodes from cache.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 746347a404..edc21c8d7f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -769,7 +769,7 @@ restart:
 	} else {
 		spin_unlock(&gl->gl_spin);
 
-		new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_KERNEL);
+		new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_NOFS);
 		if (!new_gh)
 			return;
 		set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
-- 
cgit v1.2.2


From 4cf1ed8144e740de27c6146c25d5d7ea26679cc5 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 15 Nov 2006 15:21:06 -0500
Subject: [GFS2] Tidy up bmap & fix boundary bug

This moves the locking for bmap into the bmap function itself
rather than using a wrapper function. It also fixes a bug where
the boundary flag was set on the wrong bh. Also the flags on
the mapped bh are reset earlier in the function to ensure that
they are 100% correct on the error path.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/bmap.c | 117 ++++++++++++++++++++++++++-------------------------------
 1 file changed, 54 insertions(+), 63 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 06e3447ea1..8240c1ff94 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -423,12 +423,29 @@ static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
 	return 0;
 }
 
+static inline void bmap_lock(struct inode *inode, int create)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	if (create)
+		down_write(&ip->i_rw_mutex);
+	else
+		down_read(&ip->i_rw_mutex);
+}
+
+static inline void bmap_unlock(struct inode *inode, int create)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	if (create)
+		up_write(&ip->i_rw_mutex);
+	else
+		up_read(&ip->i_rw_mutex);
+}
+
 /**
- * gfs2_block_pointers - Map a block from an inode to a disk block
+ * gfs2_block_map - Map a block from an inode to a disk block
  * @inode: The inode
  * @lblock: The logical block number
- * @map_bh: The bh to be mapped
- * @mp: metapath to use
+ * @bh_map: The bh to be mapped
  *
  * Find the block number on the current device which corresponds to an
  * inode's block. If the block had to be created, "new" will be set.
@@ -436,8 +453,8 @@ static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
  * Returns: errno
  */
 
-static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
-			       struct buffer_head *bh_map, struct metapath *mp)
+int gfs2_block_map(struct inode *inode, u64 lblock, int create,
+		   struct buffer_head *bh_map)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -451,51 +468,55 @@ static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
 	u64 dblock = 0;
 	int boundary;
 	unsigned int maxlen = bh_map->b_size >> inode->i_blkbits;
+	struct metapath mp;
+	u64 size;
 
 	BUG_ON(maxlen == 0);
 
 	if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
 		return 0;
 
+	bmap_lock(inode, create);
+	clear_buffer_mapped(bh_map);
+	clear_buffer_new(bh_map);
+	clear_buffer_boundary(bh_map);
 	bsize = gfs2_is_dir(ip) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
-
-	height = calc_tree_height(ip, (lblock + 1) * bsize);
-	if (ip->i_di.di_height < height) {
-		if (!create)
-			return 0;
-
-		error = build_height(inode, height);
-		if (error)
-			return error;
+	size = (lblock + 1) * bsize;
+
+	if (size > ip->i_di.di_size) {
+		height = calc_tree_height(ip, size);
+		if (ip->i_di.di_height < height) {
+			if (!create)
+				goto out_ok;
+	
+			error = build_height(inode, height);
+			if (error)
+				goto out_fail;
+		}
 	}
 
-	find_metapath(ip, lblock, mp);
+	find_metapath(ip, lblock, &mp);
 	end_of_metadata = ip->i_di.di_height - 1;
-
 	error = gfs2_meta_inode_buffer(ip, &bh);
 	if (error)
-		return error;
+		goto out_fail;
 
 	for (x = 0; x < end_of_metadata; x++) {
-		lookup_block(ip, bh, x, mp, create, &new, &dblock);
+		lookup_block(ip, bh, x, &mp, create, &new, &dblock);
 		brelse(bh);
 		if (!dblock)
-			return 0;
+			goto out_ok;
 
 		error = gfs2_meta_indirect_buffer(ip, x+1, dblock, new, &bh);
 		if (error)
-			return error;
+			goto out_fail;
 	}
 
-	boundary = lookup_block(ip, bh, end_of_metadata, mp, create, &new, &dblock);
-	clear_buffer_mapped(bh_map);
-	clear_buffer_new(bh_map);
-	clear_buffer_boundary(bh_map);
-
+	boundary = lookup_block(ip, bh, end_of_metadata, &mp, create, &new, &dblock);
 	if (dblock) {
 		map_bh(bh_map, inode->i_sb, dblock);
 		if (boundary)
-			set_buffer_boundary(bh);
+			set_buffer_boundary(bh_map);
 		if (new) {
 			struct buffer_head *dibh;
 			error = gfs2_meta_inode_buffer(ip, &dibh);
@@ -510,8 +531,8 @@ static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
 		while(--maxlen && !buffer_boundary(bh_map)) {
 			u64 eblock;
 
-			mp->mp_list[end_of_metadata]++;
-			boundary = lookup_block(ip, bh, end_of_metadata, mp, 0, &new, &eblock);
+			mp.mp_list[end_of_metadata]++;
+			boundary = lookup_block(ip, bh, end_of_metadata, &mp, 0, &new, &eblock);
 			if (eblock != ++dblock)
 				break;
 			bh_map->b_size += (1 << inode->i_blkbits);
@@ -521,43 +542,15 @@ static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create,
 	}
 out_brelse:
 	brelse(bh);
-	return 0;
-}
-
-
-static inline void bmap_lock(struct inode *inode, int create)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	if (create)
-		down_write(&ip->i_rw_mutex);
-	else
-		down_read(&ip->i_rw_mutex);
-}
-
-static inline void bmap_unlock(struct inode *inode, int create)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	if (create)
-		up_write(&ip->i_rw_mutex);
-	else
-		up_read(&ip->i_rw_mutex);
-}
-
-int gfs2_block_map(struct inode *inode, u64 lblock, int create,
-		   struct buffer_head *bh)
-{
-	struct metapath mp;
-	int ret;
-
-	bmap_lock(inode, create);
-	ret = gfs2_block_pointers(inode, lblock, create, bh, &mp);
+out_ok:
+	error = 0;
+out_fail:
 	bmap_unlock(inode, create);
-	return ret;
+	return error;
 }
 
 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
 {
-	struct metapath mp;
 	struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
 	int ret;
 	int create = *new;
@@ -567,9 +560,7 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
 	BUG_ON(!new);
 
 	bh.b_size = 1 << (inode->i_blkbits + 5);
-	bmap_lock(inode, create);
-	ret = gfs2_block_pointers(inode, lblock, create, &bh, &mp);
-	bmap_unlock(inode, create);
+	ret = gfs2_block_map(inode, lblock, create, &bh);
 	*extlen = bh.b_size >> inode->i_blkbits;
 	*dblock = bh.b_blocknr;
 	if (buffer_new(&bh))
-- 
cgit v1.2.2


From 175011cf6edddea32e5f5e0e04434104cc348de9 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 16 Nov 2006 10:58:55 -0500
Subject: [GFS2] Remove unused sysfs files

Four of the sysfs files are unused and can therefore be removed.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/sys.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 0e0ec988f7..983eaf1e06 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -426,9 +426,6 @@ static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
 }                                                                             \
 TUNE_ATTR_2(name, name##_store)
 
-TUNE_ATTR(ilimit, 0);
-TUNE_ATTR(ilimit_tries, 0);
-TUNE_ATTR(ilimit_min, 0);
 TUNE_ATTR(demote_secs, 0);
 TUNE_ATTR(incore_log_blocks, 0);
 TUNE_ATTR(log_flush_secs, 0);
@@ -447,7 +444,6 @@ TUNE_ATTR(quota_simul_sync, 1);
 TUNE_ATTR(quota_cache_secs, 1);
 TUNE_ATTR(max_atomic_write, 1);
 TUNE_ATTR(stall_secs, 1);
-TUNE_ATTR(entries_per_readdir, 1);
 TUNE_ATTR(greedy_default, 1);
 TUNE_ATTR(greedy_quantum, 1);
 TUNE_ATTR(greedy_max, 1);
@@ -459,9 +455,6 @@ TUNE_ATTR_DAEMON(quotad_secs, quotad_process);
 TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
 
 static struct attribute *tune_attrs[] = {
-	&tune_attr_ilimit.attr,
-	&tune_attr_ilimit_tries.attr,
-	&tune_attr_ilimit_min.attr,
 	&tune_attr_demote_secs.attr,
 	&tune_attr_incore_log_blocks.attr,
 	&tune_attr_log_flush_secs.attr,
@@ -478,7 +471,6 @@ static struct attribute *tune_attrs[] = {
 	&tune_attr_quota_cache_secs.attr,
 	&tune_attr_max_atomic_write.attr,
 	&tune_attr_stall_secs.attr,
-	&tune_attr_entries_per_readdir.attr,
 	&tune_attr_greedy_default.attr,
 	&tune_attr_greedy_quantum.attr,
 	&tune_attr_greedy_max.attr,
-- 
cgit v1.2.2


From dcd2479959c79d44f5dd77e71672e70f1f8b1f06 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 16 Nov 2006 11:08:16 -0500
Subject: [GFS2] Remove unused function from inode.c

The gfs2_glock_nq_m_atime function is unused in so far as its only
ever called with num_gh = 1, and this falls through to the
gfs2_glock_nq_atime function, so we might as well call that directly.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c       | 86 ---------------------------------------------------
 fs/gfs2/inode.h       |  4 ---
 fs/gfs2/ops_address.c |  8 ++---
 fs/gfs2/ops_file.c    |  2 +-
 4 files changed, 5 insertions(+), 95 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index ea9ca239c8..ce7f833069 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1234,92 +1234,6 @@ fail:
 	return error;
 }
 
-/**
- * glock_compare_atime - Compare two struct gfs2_glock structures for sort
- * @arg_a: the first structure
- * @arg_b: the second structure
- *
- * Returns: 1 if A > B
- *         -1 if A < B
- *          0 if A == B
- */
-
-static int glock_compare_atime(const void *arg_a, const void *arg_b)
-{
-	const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a;
-	const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b;
-	const struct lm_lockname *a = &gh_a->gh_gl->gl_name;
-	const struct lm_lockname *b = &gh_b->gh_gl->gl_name;
-
-	if (a->ln_number > b->ln_number)
-		return 1;
-	if (a->ln_number < b->ln_number)
-		return -1;
-	if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE)
-		return 1;
-	if (gh_a->gh_state == LM_ST_SHARED && (gh_b->gh_flags & GL_ATIME))
-		return 1;
-
-	return 0;
-}
-
-/**
- * gfs2_glock_nq_m_atime - acquire multiple glocks where one may need an
- *      atime update
- * @num_gh: the number of structures
- * @ghs: an array of struct gfs2_holder structures
- *
- * Returns: 0 on success (all glocks acquired),
- *          errno on failure (no glocks acquired)
- */
-
-int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs)
-{
-	struct gfs2_holder **p;
-	unsigned int x;
-	int error = 0;
-
-	if (!num_gh)
-		return 0;
-
-	if (num_gh == 1) {
-		ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
-		if (ghs->gh_flags & GL_ATIME)
-			error = gfs2_glock_nq_atime(ghs);
-		else
-			error = gfs2_glock_nq(ghs);
-		return error;
-	}
-
-	p = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
-	if (!p)
-		return -ENOMEM;
-
-	for (x = 0; x < num_gh; x++)
-		p[x] = &ghs[x];
-
-	sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare_atime,NULL);
-
-	for (x = 0; x < num_gh; x++) {
-		p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
-
-		if (p[x]->gh_flags & GL_ATIME)
-			error = gfs2_glock_nq_atime(p[x]);
-		else
-			error = gfs2_glock_nq(p[x]);
-
-		if (error) {
-			while (x--)
-				gfs2_glock_dq(p[x]);
-			break;
-		}
-	}
-
-	kfree(p);
-	return error;
-}
-
-
 static int
 __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 {
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 46917edfdd..b57f448b15 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -50,12 +50,8 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 		   struct gfs2_inode *ip);
 int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
 int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
-
 int gfs2_glock_nq_atime(struct gfs2_holder *gh);
-int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs);
-
 int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
-
 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
 
 #endif /* __INODE_DOT_H__ */
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 2f7ef980d0..8676c39d0d 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -217,7 +217,7 @@ static int gfs2_readpage(struct file *file, struct page *page)
 		}
 		gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
 		do_unlock = 1;
-		error = gfs2_glock_nq_m_atime(1, &gh);
+		error = gfs2_glock_nq_atime(&gh);
 		if (unlikely(error))
 			goto out_unlock;
 	}
@@ -282,7 +282,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
 		gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
 				 LM_FLAG_TRY_1CB|GL_ATIME, &gh);
 		do_unlock = 1;
-		ret = gfs2_glock_nq_m_atime(1, &gh);
+		ret = gfs2_glock_nq_atime(&gh);
 		if (ret == GLR_TRYFAILED)
 			goto out_noerror;
 		if (unlikely(ret))
@@ -354,7 +354,7 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
 
 
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh);
-	error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
+	error = gfs2_glock_nq_atime(&ip->i_gh);
 	if (unlikely(error)) {
 		if (error == GLR_TRYFAILED)
 			error = AOP_TRUNCATED_PAGE;
@@ -609,7 +609,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
 	 * on this path. All we need change is atime.
 	 */
 	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
-	rv = gfs2_glock_nq_m_atime(1, &gh);
+	rv = gfs2_glock_nq_atime(&gh);
 	if (rv)
 		goto out;
 
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index eabf6c61a9..c2be216eb9 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -253,7 +253,7 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
 	u32 fsflags;
 
 	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
-	error = gfs2_glock_nq_m_atime(1, &gh);
+	error = gfs2_glock_nq_atime(&gh);
 	if (error)
 		return error;
 
-- 
cgit v1.2.2


From 5e7d65cd9d3819512b059f4260de0119b985454c Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 17 Nov 2006 12:27:44 -0500
Subject: [GFS2] Make sentinel dirents compatible with gfs1

When deleting directory entries, we set the inum.no_addr to zero
in a dirent when its the first dirent in a block and thus cannot
be merged into the previous dirent as is the usual case. In gfs1,
inum.no_formal_ino was used instead.

This patch changes gfs2 to set both inum.no_addr and inum.no_formal_ino
to zero. It also changes the test from just looking at inum.no_addr to
look at both inum.no_addr and inum.no_formal_ino and a sentinel is
now considered to be a dirent in which _either_ (or both) of them
is set to zero.

This resolves Red Hat bugzillas: #215809, #211465

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/dir.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index a2923fb91b..0fdcb7713c 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -340,10 +340,15 @@ fail:
 	return (copied) ? copied : error;
 }
 
+static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
+{
+	return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0;
+}
+
 static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent,
 				     const struct qstr *name, int ret)
 {
-	if (dent->de_inum.no_addr != 0 &&
+	if (!gfs2_dirent_sentinel(dent) &&
 	    be32_to_cpu(dent->de_hash) == name->hash &&
 	    be16_to_cpu(dent->de_name_len) == name->len &&
 	    memcmp(dent+1, name->name, name->len) == 0)
@@ -388,7 +393,7 @@ static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
 	unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
 	unsigned totlen = be16_to_cpu(dent->de_rec_len);
 
-	if (!dent->de_inum.no_addr)
+	if (gfs2_dirent_sentinel(dent))
 		actual = GFS2_DIRENT_SIZE(0);
 	if (totlen - actual >= required)
 		return 1;
@@ -405,7 +410,7 @@ static int gfs2_dirent_gather(const struct gfs2_dirent *dent,
 			      void *opaque)
 {
 	struct dirent_gather *g = opaque;
-	if (dent->de_inum.no_addr) {
+	if (!gfs2_dirent_sentinel(dent)) {
 		g->pdent[g->offset++] = dent;
 	}
 	return 0;
@@ -433,10 +438,10 @@ static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset,
 	if (unlikely(offset + size > len))
 		goto error;
 	msg = "zero inode number";
-	if (unlikely(!first && !dent->de_inum.no_addr))
+	if (unlikely(!first && gfs2_dirent_sentinel(dent)))
 		goto error;
 	msg = "name length is greater than space in dirent";
-	if (dent->de_inum.no_addr &&
+	if (!gfs2_dirent_sentinel(dent) &&
 	    unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) >
 		     size))
 		goto error;
@@ -598,7 +603,7 @@ static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh,
 		return ret;
 
         /* Only the first dent could ever have de_inum.no_addr == 0 */
-	if (!tmp->de_inum.no_addr) {
+	if (gfs2_dirent_sentinel(tmp)) {
 		gfs2_consist_inode(dip);
 		return -EIO;
 	}
@@ -621,7 +626,7 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
 {
 	u16 cur_rec_len, prev_rec_len;
 
-	if (!cur->de_inum.no_addr) {
+	if (gfs2_dirent_sentinel(cur)) {
 		gfs2_consist_inode(dip);
 		return;
 	}
@@ -633,7 +638,8 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
 	   out the inode number and return.  */
 
 	if (!prev) {
-		cur->de_inum.no_addr = 0;	/* No endianess worries */
+		cur->de_inum.no_addr = 0;
+		cur->de_inum.no_formal_ino = 0;
 		return;
 	}
 
@@ -664,7 +670,7 @@ static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
 	struct gfs2_dirent *ndent;
 	unsigned offset = 0, totlen;
 
-	if (dent->de_inum.no_addr)
+	if (!gfs2_dirent_sentinel(dent))
 		offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
 	totlen = be16_to_cpu(dent->de_rec_len);
 	BUG_ON(offset + name->len > totlen);
@@ -1002,7 +1008,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
 		if (dirent_next(dip, obh, &next))
 			next = NULL;
 
-		if (dent->de_inum.no_addr &&
+		if (!gfs2_dirent_sentinel(dent) &&
 		    be32_to_cpu(dent->de_hash) < divider) {
 			struct qstr str;
 			str.name = (char*)(dent+1);
-- 
cgit v1.2.2


From fa2ecfc5e11b12f25b67f9c84ac6b0e74a6a0115 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 20 Nov 2006 10:04:49 -0500
Subject: [GFS2] Fix Kconfig wrt CRC32
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GFS2 requires the CRC32 library function. This was reported by
Toralf Förster.

Cc: Toralf Förster <toralf.foerster@gmx.de>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 8c27de8b95..c0791cbaca 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -2,6 +2,7 @@ config GFS2_FS
 	tristate "GFS2 file system support"
 	depends on EXPERIMENTAL
 	select FS_POSIX_ACL
+	select CRC32
 	help
 	A cluster filesystem.
 
-- 
cgit v1.2.2


From 1a14d3a68f04527546121eb7b45187ff6af63151 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 20 Nov 2006 10:37:45 -0500
Subject: [GFS2] Simplify glops functions

The go_sync callback took two flags, but one of them was set on every
call, so this patch removes once of the flags and makes the previously
conditional operations (on this flag), unconditional.

The go_inval callback took three flags, each of which was set on every
call to it. This patch removes the flags and makes the operations
unconditional, which makes the logic rather more obvious.

Two now unused flags are also removed from incore.h.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c  | 10 +++++-----
 fs/gfs2/glops.c  | 42 +++++++++++-------------------------------
 fs/gfs2/incore.h | 25 +++++++++++--------------
 fs/gfs2/super.c  |  2 +-
 4 files changed, 28 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index edc21c8d7f..b8ba4d5c1d 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -847,12 +847,12 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
 
 	if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) {
 		if (glops->go_inval)
-			glops->go_inval(gl, DIO_METADATA | DIO_DATA);
+			glops->go_inval(gl, DIO_METADATA);
 	} else if (gl->gl_state == LM_ST_DEFERRED) {
 		/* We might not want to do this here.
 		   Look at moving to the inode glops. */
 		if (glops->go_inval)
-			glops->go_inval(gl, DIO_DATA);
+			glops->go_inval(gl, 0);
 	}
 
 	/*  Deal with each possible exit condition  */
@@ -954,7 +954,7 @@ void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags)
 	gfs2_assert_warn(sdp, state != gl->gl_state);
 
 	if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
-		glops->go_sync(gl, DIO_METADATA | DIO_DATA | DIO_RELEASE);
+		glops->go_sync(gl);
 
 	gfs2_glock_hold(gl);
 	gl->gl_req_bh = xmote_bh;
@@ -995,7 +995,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
 	state_change(gl, LM_ST_UNLOCKED);
 
 	if (glops->go_inval)
-		glops->go_inval(gl, DIO_METADATA | DIO_DATA);
+		glops->go_inval(gl, DIO_METADATA);
 
 	if (gh) {
 		spin_lock(&gl->gl_spin);
@@ -1041,7 +1041,7 @@ void gfs2_glock_drop_th(struct gfs2_glock *gl)
 	gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
 
 	if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
-		glops->go_sync(gl, DIO_METADATA | DIO_DATA | DIO_RELEASE);
+		glops->go_sync(gl);
 
 	gfs2_glock_hold(gl);
 	gl->gl_req_bh = drop_bh;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index b92de0af0b..60561ca070 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -173,23 +173,18 @@ static void gfs2_page_writeback(struct gfs2_glock *gl)
 /**
  * meta_go_sync - sync out the metadata for this glock
  * @gl: the glock
- * @flags: DIO_*
  *
  * Called when demoting or unlocking an EX glock.  We must flush
  * to disk all dirty buffers/pages relating to this glock, and must not
  * not return to caller to demote/unlock the glock until I/O is complete.
  */
 
-static void meta_go_sync(struct gfs2_glock *gl, int flags)
+static void meta_go_sync(struct gfs2_glock *gl)
 {
-	if (!(flags & DIO_METADATA))
-		return;
-
 	if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
 		gfs2_log_flush(gl->gl_sbd, gl);
 		gfs2_meta_sync(gl);
-		if (flags & DIO_RELEASE)
-			gfs2_ail_empty_gl(gl);
+		gfs2_ail_empty_gl(gl);
 	}
 
 }
@@ -264,31 +259,18 @@ static void inode_go_drop_th(struct gfs2_glock *gl)
 /**
  * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
  * @gl: the glock protecting the inode
- * @flags:
  *
  */
 
-static void inode_go_sync(struct gfs2_glock *gl, int flags)
+static void inode_go_sync(struct gfs2_glock *gl)
 {
-	int meta = (flags & DIO_METADATA);
-	int data = (flags & DIO_DATA);
-
 	if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
-		if (meta && data) {
-			gfs2_page_writeback(gl);
-			gfs2_log_flush(gl->gl_sbd, gl);
-			gfs2_meta_sync(gl);
-			gfs2_page_wait(gl);
-			clear_bit(GLF_DIRTY, &gl->gl_flags);
-		} else if (meta) {
-			gfs2_log_flush(gl->gl_sbd, gl);
-			gfs2_meta_sync(gl);
-		} else if (data) {
-			gfs2_page_writeback(gl);
-			gfs2_page_wait(gl);
-		}
-		if (flags & DIO_RELEASE)
-			gfs2_ail_empty_gl(gl);
+		gfs2_page_writeback(gl);
+		gfs2_log_flush(gl->gl_sbd, gl);
+		gfs2_meta_sync(gl);
+		gfs2_page_wait(gl);
+		clear_bit(GLF_DIRTY, &gl->gl_flags);
+		gfs2_ail_empty_gl(gl);
 	}
 }
 
@@ -302,15 +284,13 @@ static void inode_go_sync(struct gfs2_glock *gl, int flags)
 static void inode_go_inval(struct gfs2_glock *gl, int flags)
 {
 	int meta = (flags & DIO_METADATA);
-	int data = (flags & DIO_DATA);
 
 	if (meta) {
 		struct gfs2_inode *ip = gl->gl_object;
 		gfs2_meta_inval(gl);
 		set_bit(GIF_INVALID, &ip->i_flags);
 	}
-	if (data)
-		gfs2_page_inval(gl);
+	gfs2_page_inval(gl);
 }
 
 /**
@@ -494,7 +474,7 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl)
 	if (gl->gl_state != LM_ST_UNLOCKED &&
 	    test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
 		gfs2_meta_cache_flush(GFS2_I(sdp->sd_jdesc->jd_inode));
-		j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
+		j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
 
 		error = gfs2_find_jhead(sdp->sd_jdesc, &head);
 		if (error)
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 227a74dc5e..734421edae 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -14,8 +14,6 @@
 
 #define DIO_WAIT	0x00000010
 #define DIO_METADATA	0x00000020
-#define DIO_DATA	0x00000040
-#define DIO_RELEASE	0x00000080
 #define DIO_ALL		0x00000100
 
 struct gfs2_log_operations;
@@ -103,18 +101,17 @@ struct gfs2_bufdata {
 };
 
 struct gfs2_glock_operations {
-	void (*go_xmote_th) (struct gfs2_glock * gl, unsigned int state,
-			     int flags);
-	void (*go_xmote_bh) (struct gfs2_glock * gl);
-	void (*go_drop_th) (struct gfs2_glock * gl);
-	void (*go_drop_bh) (struct gfs2_glock * gl);
-	void (*go_sync) (struct gfs2_glock * gl, int flags);
-	void (*go_inval) (struct gfs2_glock * gl, int flags);
-	int (*go_demote_ok) (struct gfs2_glock * gl);
-	int (*go_lock) (struct gfs2_holder * gh);
-	void (*go_unlock) (struct gfs2_holder * gh);
-	void (*go_callback) (struct gfs2_glock * gl, unsigned int state);
-	void (*go_greedy) (struct gfs2_glock * gl);
+	void (*go_xmote_th) (struct gfs2_glock *gl, unsigned int state, int flags);
+	void (*go_xmote_bh) (struct gfs2_glock *gl);
+	void (*go_drop_th) (struct gfs2_glock *gl);
+	void (*go_drop_bh) (struct gfs2_glock *gl);
+	void (*go_sync) (struct gfs2_glock *gl);
+	void (*go_inval) (struct gfs2_glock *gl, int flags);
+	int (*go_demote_ok) (struct gfs2_glock *gl);
+	int (*go_lock) (struct gfs2_holder *gh);
+	void (*go_unlock) (struct gfs2_holder *gh);
+	void (*go_callback) (struct gfs2_glock *gl, unsigned int state);
+	void (*go_greedy) (struct gfs2_glock *gl);
 	const int go_type;
 };
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 0ef8317270..1408c5f313 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -517,7 +517,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 		return error;
 
 	gfs2_meta_cache_flush(ip);
-	j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
+	j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
 
 	error = gfs2_find_jhead(sdp->sd_jdesc, &head);
 	if (error)
-- 
cgit v1.2.2


From 28626e2078571c4b776a17eaa486bbd2b7dfe2cd Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 22 Nov 2006 11:13:21 -0500
Subject: [GFS2] Fix glock ordering on inode creation

The lock order here should be parent -> child rather than
numeric order.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/inode.c | 31 ++++---------------------------
 1 file changed, 4 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index ce7f833069..d122074c45 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -870,33 +870,10 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 	if (error)
 		goto fail_gunlock;
 
-	if (inum.no_addr < dip->i_num.no_addr) {
-		gfs2_glock_dq(ghs);
-
-		error = gfs2_glock_nq_num(sdp, inum.no_addr,
-					  &gfs2_inode_glops, LM_ST_EXCLUSIVE,
-					  GL_SKIP, ghs + 1);
-		if (error) {
-			return ERR_PTR(error);
-		}
-
-		gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
-		error = gfs2_glock_nq(ghs);
-		if (error) {
-			gfs2_glock_dq_uninit(ghs + 1);
-			return ERR_PTR(error);
-		}
-
-		error = create_ok(dip, name, mode);
-		if (error)
-			goto fail_gunlock2;
-	} else {
-		error = gfs2_glock_nq_num(sdp, inum.no_addr,
-					  &gfs2_inode_glops, LM_ST_EXCLUSIVE,
-					  GL_SKIP, ghs + 1);
-		if (error)
-			goto fail_gunlock;
-	}
+	error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops,
+				  LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
+	if (error)
+		goto fail_gunlock;
 
 	error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev);
 	if (error)
-- 
cgit v1.2.2


From ae619320b22f8e0b2bbe4a3a5ac2f9ccf08d7ec2 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 22 Nov 2006 11:28:47 -0500
Subject: [GFS2] mark_inode_dirty after write to stuffed file

Writes to stuffed files were not being marked dirty correctly.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_address.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 8676c39d0d..d8d69a72a1 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -472,8 +472,10 @@ static int gfs2_commit_write(struct file *file, struct page *page,
 
 		SetPageUptodate(page);
 
-		if (inode->i_size < file_size)
+		if (inode->i_size < file_size) {
 			i_size_write(inode, file_size);
+			mark_inode_dirty(inode);
+		}
 	} else {
 		if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED ||
 		    gfs2_is_jdata(ip))
-- 
cgit v1.2.2


From b004157ab5b374a498a5874cda68c389219d23e7 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 23 Nov 2006 10:51:34 -0500
Subject: [GFS2] Fix journal flush problem

This fixes a bug which resulted in poor performance due to flushing
the journal too often. The code path in question was via the inode_go_sync()
function in glops.c. The solution is not to flush the journal immediately
when inodes are ejected from memory, but batch up the work for glockd to
deal with later on. This means that glocks may now live on beyond the end of
the lifetime of their inodes (but not very much longer in the normal case).

Also fixed in this patch is a bug (which was hidden by the bug mentioned above) in
calculation of the number of free journal blocks.

The gfs2_logd process has been altered to be more responsive to the journal
filling up. We now wake it up when the number of uncommitted journal blocks
has reached the threshold level rather than trying to flush directly at the
end of each transaction. This again means doing fewer, but larger, log
flushes in general.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/daemon.c    |  7 ++--
 fs/gfs2/glock.c     | 17 +---------
 fs/gfs2/glock.h     |  1 -
 fs/gfs2/glops.c     | 93 ++++++++++++++---------------------------------------
 fs/gfs2/log.c       | 17 ++++++----
 fs/gfs2/meta_io.c   |  3 ++
 fs/gfs2/ops_super.c |  7 ++--
 7 files changed, 46 insertions(+), 99 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
index cab1f68d46..683cb5bda8 100644
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@@ -112,6 +112,7 @@ int gfs2_logd(void *data)
 	struct gfs2_sbd *sdp = data;
 	struct gfs2_holder ji_gh;
 	unsigned long t;
+	int need_flush;
 
 	while (!kthread_should_stop()) {
 		/* Advance the log tail */
@@ -120,8 +121,10 @@ int gfs2_logd(void *data)
 		    gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
 
 		gfs2_ail1_empty(sdp, DIO_ALL);
-
-		if (time_after_eq(jiffies, t)) {
+		gfs2_log_lock(sdp);
+		need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks);
+		gfs2_log_unlock(sdp);
+		if (need_flush || time_after_eq(jiffies, t)) {
 			gfs2_log_flush(sdp, NULL);
 			sdp->sd_log_flush_time = jiffies;
 		}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index b8ba4d5c1d..3c2ff81c84 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -785,21 +785,6 @@ out:
 		gfs2_holder_put(new_gh);
 }
 
-void gfs2_glock_inode_squish(struct inode *inode)
-{
-	struct gfs2_holder gh;
-	struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
-	gfs2_holder_init(gl, LM_ST_UNLOCKED, 0, &gh);
-	set_bit(HIF_DEMOTE, &gh.gh_iflags);
-	spin_lock(&gl->gl_spin);
-	gfs2_assert(inode->i_sb->s_fs_info, list_empty(&gl->gl_holders));
-	list_add_tail(&gh.gh_list, &gl->gl_waiters2);
-	run_queue(gl);
-	spin_unlock(&gl->gl_spin);
-	wait_for_completion(&gh.gh_wait);
-	gfs2_holder_uninit(&gh);
-}
-
 /**
  * state_change - record that the glock is now in a different state
  * @gl: the glock
@@ -1920,7 +1905,7 @@ out:
 
 static void scan_glock(struct gfs2_glock *gl)
 {
-	if (gl->gl_ops == &gfs2_inode_glops)
+	if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object)
 		return;
 
 	if (gfs2_glmutex_trylock(gl)) {
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index a331bf8175..fb39108fc0 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -106,7 +106,6 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
 void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
 			     const struct gfs2_glock_operations *glops,
 			     unsigned int state, int flags);
-void gfs2_glock_inode_squish(struct inode *inode);
 
 /**
  * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 60561ca070..b068d10bcb 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -106,70 +106,6 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
 	clear_bit(GIF_SW_PAGED, &ip->i_flags);
 }
 
-/**
- * gfs2_page_inval - Invalidate all pages associated with a glock
- * @gl: the glock
- *
- */
-
-static void gfs2_page_inval(struct gfs2_glock *gl)
-{
-	struct gfs2_inode *ip;
-	struct inode *inode;
-
-	ip = gl->gl_object;
-	inode = &ip->i_inode;
-	if (!ip || !S_ISREG(inode->i_mode))
-		return;
-
-	truncate_inode_pages(inode->i_mapping, 0);
-	gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), !inode->i_mapping->nrpages);
-	clear_bit(GIF_PAGED, &ip->i_flags);
-}
-
-/**
- * gfs2_page_wait - Wait for writeback of data
- * @gl: the glock
- *
- * Syncs data (not metadata) for a regular file.
- * No-op for all other types.
- */
-
-static void gfs2_page_wait(struct gfs2_glock *gl)
-{
-	struct gfs2_inode *ip = gl->gl_object;
-	struct inode *inode = &ip->i_inode;
-	struct address_space *mapping = inode->i_mapping;
-	int error;
-
-	if (!S_ISREG(inode->i_mode))
-		return;
-
-	error = filemap_fdatawait(mapping);
-
-	/* Put back any errors cleared by filemap_fdatawait()
-	   so they can be caught by someone who can pass them
-	   up to user space. */
-
-	if (error == -ENOSPC)
-		set_bit(AS_ENOSPC, &mapping->flags);
-	else if (error)
-		set_bit(AS_EIO, &mapping->flags);
-
-}
-
-static void gfs2_page_writeback(struct gfs2_glock *gl)
-{
-	struct gfs2_inode *ip = gl->gl_object;
-	struct inode *inode = &ip->i_inode;
-	struct address_space *mapping = inode->i_mapping;
-
-	if (!S_ISREG(inode->i_mode))
-		return;
-
-	filemap_fdatawrite(mapping);
-}
-
 /**
  * meta_go_sync - sync out the metadata for this glock
  * @gl: the glock
@@ -264,11 +200,24 @@ static void inode_go_drop_th(struct gfs2_glock *gl)
 
 static void inode_go_sync(struct gfs2_glock *gl)
 {
+	struct gfs2_inode *ip = gl->gl_object;
+
+	if (ip && !S_ISREG(ip->i_inode.i_mode))
+		ip = NULL;
+
 	if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
-		gfs2_page_writeback(gl);
 		gfs2_log_flush(gl->gl_sbd, gl);
+		if (ip)
+			filemap_fdatawrite(ip->i_inode.i_mapping);
 		gfs2_meta_sync(gl);
-		gfs2_page_wait(gl);
+		if (ip) {
+			struct address_space *mapping = ip->i_inode.i_mapping;
+			int error = filemap_fdatawait(mapping);
+			if (error == -ENOSPC)
+				set_bit(AS_ENOSPC, &mapping->flags);
+			else if (error)
+				set_bit(AS_EIO, &mapping->flags);
+		}
 		clear_bit(GLF_DIRTY, &gl->gl_flags);
 		gfs2_ail_empty_gl(gl);
 	}
@@ -283,14 +232,20 @@ static void inode_go_sync(struct gfs2_glock *gl)
 
 static void inode_go_inval(struct gfs2_glock *gl, int flags)
 {
+	struct gfs2_inode *ip = gl->gl_object;
 	int meta = (flags & DIO_METADATA);
 
 	if (meta) {
-		struct gfs2_inode *ip = gl->gl_object;
 		gfs2_meta_inval(gl);
-		set_bit(GIF_INVALID, &ip->i_flags);
+		if (ip)
+			set_bit(GIF_INVALID, &ip->i_flags);
+	}
+
+	if (ip && S_ISREG(ip->i_inode.i_mode)) {
+		truncate_inode_pages(ip->i_inode.i_mapping, 0);
+		gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), !ip->i_inode.i_mapping->nrpages);
+		clear_bit(GIF_PAGED, &ip->i_flags);
 	}
-	gfs2_page_inval(gl);
 }
 
 /**
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 0cace3da9d..6456fc39aa 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -261,6 +261,12 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
  * @sdp: The GFS2 superblock
  * @blks: The number of blocks to reserve
  *
+ * Note that we never give out the last 6 blocks of the journal. Thats
+ * due to the fact that there is are a small number of header blocks
+ * associated with each log flush. The exact number can't be known until
+ * flush time, so we ensure that we have just enough free blocks at all
+ * times to avoid running out during a log flush.
+ *
  * Returns: errno
  */
 
@@ -274,7 +280,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 
 	mutex_lock(&sdp->sd_log_reserve_mutex);
 	gfs2_log_lock(sdp);
-	while(sdp->sd_log_blks_free <= blks) {
+	while(sdp->sd_log_blks_free <= (blks + 6)) {
 		gfs2_log_unlock(sdp);
 		gfs2_ail1_empty(sdp, 0);
 		gfs2_log_flush(sdp, NULL);
@@ -643,12 +649,9 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 	up_read(&sdp->sd_log_flush_lock);
 
 	gfs2_log_lock(sdp);
-	if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) {
-		gfs2_log_unlock(sdp);
-		gfs2_log_flush(sdp, NULL);
-	} else {
-		gfs2_log_unlock(sdp);
-	}
+	if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks))
+		wake_up_process(sdp->sd_logd_process);
+	gfs2_log_unlock(sdp);
 }
 
 /**
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 3912d6a4b1..939a09f6e8 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -472,6 +472,9 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
 	struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height;
 	int in_cache = 0;
 
+	BUG_ON(!gl);
+	BUG_ON(!sdp);
+
 	spin_lock(&ip->i_spin);
 	if (*bh_slot && (*bh_slot)->b_blocknr == num) {
 		bh = *bh_slot;
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 8635175692..7685b46f93 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -157,7 +157,8 @@ static void gfs2_write_super(struct super_block *sb)
 static int gfs2_sync_fs(struct super_block *sb, int wait)
 {
 	sb->s_dirt = 0;
-	gfs2_log_flush(sb->s_fs_info, NULL);
+	if (wait)
+		gfs2_log_flush(sb->s_fs_info, NULL);
 	return 0;
 }
 
@@ -293,8 +294,6 @@ static void gfs2_clear_inode(struct inode *inode)
 	 */
 	if (inode->i_private) {
 		struct gfs2_inode *ip = GFS2_I(inode);
-		gfs2_glock_inode_squish(inode);
-		gfs2_assert(inode->i_sb->s_fs_info, ip->i_gl->gl_state == LM_ST_UNLOCKED);
 		ip->i_gl->gl_object = NULL;
 		gfs2_glock_schedule_for_reclaim(ip->i_gl);
 		gfs2_glock_put(ip->i_gl);
@@ -395,7 +394,7 @@ static void gfs2_delete_inode(struct inode *inode)
 	if (!inode->i_private)
 		goto out;
 
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &gh);
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh);
 	if (unlikely(error)) {
 		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
 		goto out;
-- 
cgit v1.2.2


From a25311c8e0b7071b129ca9a9e49e22eeaf620864 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 23 Nov 2006 11:06:35 -0500
Subject: [GFS2] Move gfs2_meta_syncfs() into log.c

By moving gfs2_meta_syncfs() into log.c, gfs2_ail1_start()
can be made static.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/log.c     | 21 ++++++++++++++++++++-
 fs/gfs2/log.h     |  2 +-
 fs/gfs2/meta_io.c | 17 -----------------
 fs/gfs2/meta_io.h |  1 -
 4 files changed, 21 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6456fc39aa..7713d59186 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -15,6 +15,7 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
 #include <linux/lm_interface.h>
+#include <linux/delay.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -142,7 +143,7 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
 	return list_empty(&ai->ai_ail1_list);
 }
 
-void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
+static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
 {
 	struct list_head *head = &sdp->sd_ail1_list;
 	u64 sync_gen;
@@ -689,3 +690,21 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
 	up_write(&sdp->sd_log_flush_lock);
 }
 
+
+/**
+ * gfs2_meta_syncfs - sync all the buffers in a filesystem
+ * @sdp: the filesystem
+ *
+ */
+
+void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
+{
+	gfs2_log_flush(sdp, NULL);
+	for (;;) {
+		gfs2_ail1_start(sdp, DIO_ALL);
+		if (gfs2_ail1_empty(sdp, DIO_ALL))
+			break;
+		msleep(10);
+	}
+}
+
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 7f5737d556..8e7aa0f291 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -48,7 +48,6 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
 unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
 			    unsigned int ssize);
 
-void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags);
 int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
 
 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
@@ -61,5 +60,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
 void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
 
 void gfs2_log_shutdown(struct gfs2_sbd *sdp);
+void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
 
 #endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 939a09f6e8..fbeba813c9 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -574,20 +574,3 @@ out:
 	return first_bh;
 }
 
-/**
- * gfs2_meta_syncfs - sync all the buffers in a filesystem
- * @sdp: the filesystem
- *
- */
-
-void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
-{
-	gfs2_log_flush(sdp, NULL);
-	for (;;) {
-		gfs2_ail1_start(sdp, DIO_ALL);
-		if (gfs2_ail1_empty(sdp, DIO_ALL))
-			break;
-		msleep(10);
-	}
-}
-
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 3ec939e20d..e037425bc0 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -67,7 +67,6 @@ static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
 }
 
 struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
-void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
 
 #define buffer_busy(bh) \
 ((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
-- 
cgit v1.2.2


From cb4c03131836a55bf95e1c165409244ac6b4f39f Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 23 Nov 2006 11:16:32 -0500
Subject: [GFS2] Reduce number of arguments to meta_io.c:getbuf()

Since the superblock and the address_space are determined by the
glock, we might as well just pass that as the argument since all
the callers already have that available.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/meta_io.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index fbeba813c9..0e34d99189 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -127,17 +127,17 @@ void gfs2_meta_sync(struct gfs2_glock *gl)
 
 /**
  * getbuf - Get a buffer with a given address space
- * @sdp: the filesystem
- * @aspace: the address space
+ * @gl: the glock
  * @blkno: the block number (filesystem scope)
  * @create: 1 if the buffer should be created
  *
  * Returns: the buffer
  */
 
-static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace,
-				  u64 blkno, int create)
+static struct buffer_head *getbuf(struct gfs2_glock *gl, u64 blkno, int create)
 {
+	struct address_space *mapping = gl->gl_aspace->i_mapping;
+	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct page *page;
 	struct buffer_head *bh;
 	unsigned int shift;
@@ -150,13 +150,13 @@ static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace,
 
 	if (create) {
 		for (;;) {
-			page = grab_cache_page(aspace->i_mapping, index);
+			page = grab_cache_page(mapping, index);
 			if (page)
 				break;
 			yield();
 		}
 	} else {
-		page = find_lock_page(aspace->i_mapping, index);
+		page = find_lock_page(mapping, index);
 		if (!page)
 			return NULL;
 	}
@@ -202,7 +202,7 @@ static void meta_prep_new(struct buffer_head *bh)
 struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
 {
 	struct buffer_head *bh;
-	bh = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
+	bh = getbuf(gl, blkno, CREATE);
 	meta_prep_new(bh);
 	return bh;
 }
@@ -220,7 +220,7 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
 int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
 		   struct buffer_head **bhp)
 {
-	*bhp = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
+	*bhp = getbuf(gl, blkno, CREATE);
 	if (!buffer_uptodate(*bhp))
 		ll_rw_block(READ_META, 1, bhp);
 	if (flags & DIO_WAIT) {
@@ -379,11 +379,10 @@ void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
 void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct inode *aspace = ip->i_gl->gl_aspace;
 	struct buffer_head *bh;
 
 	while (blen) {
-		bh = getbuf(sdp, aspace, bstart, NO_CREATE);
+		bh = getbuf(ip->i_gl, bstart, NO_CREATE);
 		if (bh) {
 			struct gfs2_bufdata *bd = bh->b_private;
 
@@ -484,7 +483,7 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
 	spin_unlock(&ip->i_spin);
 
 	if (!bh)
-		bh = getbuf(gl->gl_sbd, gl->gl_aspace, num, CREATE);
+		bh = getbuf(gl, num, CREATE);
 
 	if (!bh)
 		return -ENOBUFS;
@@ -535,7 +534,6 @@ err:
 struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
 {
 	struct gfs2_sbd *sdp = gl->gl_sbd;
-	struct inode *aspace = gl->gl_aspace;
 	struct buffer_head *first_bh, *bh;
 	u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >>
 			  sdp->sd_sb.sb_bsize_shift;
@@ -547,7 +545,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
 	if (extlen > max_ra)
 		extlen = max_ra;
 
-	first_bh = getbuf(sdp, aspace, dblock, CREATE);
+	first_bh = getbuf(gl, dblock, CREATE);
 
 	if (buffer_uptodate(first_bh))
 		goto out;
@@ -558,7 +556,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
 	extlen--;
 
 	while (extlen) {
-		bh = getbuf(sdp, aspace, dblock, CREATE);
+		bh = getbuf(gl, dblock, CREATE);
 
 		if (!buffer_uptodate(bh) && !buffer_locked(bh))
 			ll_rw_block(READA, 1, &bh);
-- 
cgit v1.2.2


From 300c7d75f3a5e8edd3e390ccd56b808f3fb14e33 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 27 Nov 2006 09:55:28 -0500
Subject: [GFS2] Fix recursive locking in gfs2_permission

Since gfs2_permission may be called either from the VFS (in which case
we need to obtain a shared glock) or from GFS2 (in which case we already
have a glock) we need to test to see whether or not a lock is required.
The original test was buggy due to a potential race. This one should
be safe.

This fixes Red Hat bugzilla #217129

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_inode.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index b247f25eff..fd9fee2cee 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -835,6 +835,10 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
  * @mask:
  * @nd: passed from Linux VFS, ignored by us
  *
+ * This may be called from the VFS directly, or from within GFS2 with the
+ * inode locked, so we look to see if the glock is already locked and only
+ * lock the glock if its not already been done.
+ *
  * Returns: errno
  */
 
@@ -843,15 +847,18 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder i_gh;
 	int error;
+	int unlock = 0;
 
-	if (!test_bit(GIF_INVALID, &ip->i_flags))
-		return generic_permission(inode, mask, gfs2_check_acl);
+	if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) {
+		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+		if (error)
+			return error;
+		unlock = 1;
+	}
 
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
-	if (!error) {
-		error = generic_permission(inode, mask, gfs2_check_acl_locked);
+	error = generic_permission(inode, mask, gfs2_check_acl_locked);
+	if (unlock)
 		gfs2_glock_dq_uninit(&i_gh);
-	}
 
 	return error;
 }
-- 
cgit v1.2.2


From dcf3dd852f554bb0016aa23892596717cc123a26 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 27 Nov 2006 10:12:05 -0500
Subject: [GFS2] Fix recursive locking in gfs2_getattr

The readdirplus NFS operation can result in gfs2_getattr being
called with the glock already held. In this case we do not want
to try and grab the lock again.

This fixes Red Hat bugzilla #215727

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_inode.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index fd9fee2cee..fbe38c1ab5 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -992,6 +992,12 @@ out:
  * @dentry: The dentry to stat
  * @stat: The inode's stats
  *
+ * This may be called from the VFS directly, or from within GFS2 with the
+ * inode locked, so we look to see if the glock is already locked and only
+ * lock the glock if its not already been done. Note that its the NFS
+ * readdirplus operation which causes this to be called (from filldir)
+ * with the glock already held.
+ *
  * Returns: errno
  */
 
@@ -1002,14 +1008,20 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
 	int error;
+	int unlock = 0;
 
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
-	if (!error) {
-		generic_fillattr(inode, stat);
-		gfs2_glock_dq_uninit(&gh);
+	if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) {
+		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+		if (error)
+			return error;
+		unlock = 1;
 	}
 
-	return error;
+	generic_fillattr(inode, stat);
+	if (unlock);
+		gfs2_glock_dq_uninit(&gh);
+
+	return 0;
 }
 
 static int gfs2_setxattr(struct dentry *dentry, const char *name,
-- 
cgit v1.2.2


From 2896ee37ccc1f9acb244c9b02becb74a43661009 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Mon, 27 Nov 2006 11:31:22 -0600
Subject: [DLM] fix add_requestqueue checking nodes list

Requests that arrive after recovery has started are saved in the
requestqueue and processed after recovery is done.  Some of these requests
are purged during recovery if they are from nodes that have been removed.
We move the purging of the requests (dlm_purge_requestqueue) to later in
the recovery sequence which allows the routine saving requests
(dlm_add_requestqueue) to avoid filtering out requests by nodeid since the
same will be done by the purge.  The current code has add_requestqueue
filtering by nodeid but doesn't hold any locks when accessing the list of
current nodes.  This also means that we need to call the purge routine
when the lockspace is being shut down since the add routine will not be
rejecting requests itself any more.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/lockspace.c    |  2 ++
 fs/dlm/recoverd.c     | 16 ++++++++--------
 fs/dlm/requestqueue.c |  7 ++++---
 3 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index f8842ca443..791388b25c 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -22,6 +22,7 @@
 #include "memory.h"
 #include "lock.h"
 #include "recover.h"
+#include "requestqueue.h"
 
 #ifdef CONFIG_DLM_DEBUG
 int dlm_create_debug_file(struct dlm_ls *ls);
@@ -684,6 +685,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
 	 * Free structures on any other lists
 	 */
 
+	dlm_purge_requestqueue(ls);
 	kfree(ls->ls_recover_args);
 	dlm_clear_free_entries(ls);
 	dlm_clear_members(ls);
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 8bb895ffd9..9dc2f9156f 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -93,14 +93,6 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 		goto fail;
 	}
 
-	/*
-	 * Purge directory-related requests that are saved in requestqueue.
-	 * All dir requests from before recovery are invalid now due to the dir
-	 * rebuild and will be resent by the requesting nodes.
-	 */
-
-	dlm_purge_requestqueue(ls);
-
 	/*
 	 * Wait for all nodes to complete directory rebuild.
 	 */
@@ -181,6 +173,14 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
 	dlm_release_root_list(ls);
 
+	/*
+	 * Purge directory-related requests that are saved in requestqueue.
+	 * All dir requests from before recovery are invalid now due to the dir
+	 * rebuild and will be resent by the requesting nodes.
+	 */
+
+	dlm_purge_requestqueue(ls);
+
 	dlm_set_recover_status(ls, DLM_RS_DONE);
 	error = dlm_recover_done_wait(ls);
 	if (error) {
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index 0226d2a0a0..65008d79c9 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -36,9 +36,6 @@ int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
 	int length = hd->h_length;
 	int rv = 0;
 
-	if (dlm_is_removed(ls, nodeid))
-		return 0;
-
 	e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
 	if (!e) {
 		log_print("dlm_add_requestqueue: out of memory\n");
@@ -133,6 +130,10 @@ static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid)
 {
 	uint32_t type = ms->m_type;
 
+	/* the ls is being cleaned up and freed by release_lockspace */
+	if (!ls->ls_count)
+		return 1;
+
 	if (dlm_is_removed(ls, nodeid))
 		return 1;
 
-- 
cgit v1.2.2


From aed3255f2267e2d1d95b9cf7f2995ce24e6c873b Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <ryusuke@osrg.net>
Date: Tue, 28 Nov 2006 02:53:22 +0900
Subject: [GFS2] fs/gfs2/log.c:log_bmap() fix printk format warning

Fix a printk format warning in fs/gfs2/log.c:
fs/gfs2/log.c:322: warning: format '%llu' expects type 'long long unsigned int', but argument 3 has type 'sector_t'

Signed-off-by: Ryusuke Konishi <ryusuke@osrg.net>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/log.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 7713d59186..291415ddfe 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -326,7 +326,8 @@ static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
 	bh_map.b_size = 1 << inode->i_blkbits;
 	error = gfs2_block_map(inode, lbn, 0, &bh_map);
 	if (error || !bh_map.b_blocknr)
-		printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error, bh_map.b_blocknr, lbn);
+		printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error,
+		       (unsigned long long)bh_map.b_blocknr, lbn);
 	gfs2_assert_withdraw(sdp, !error && bh_map.b_blocknr);
 
 	return bh_map.b_blocknr;
-- 
cgit v1.2.2


From 1babdb453138f17b8ed3d1d5711089c4e2fa5ace Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Mon, 27 Nov 2006 13:18:41 -0600
Subject: [DLM] fix size of STATUS_REPLY message

When the not_ready routine sends a "fake" status reply with blank status
flags, it needs to use the correct size for a normal STATUS_REPLY by
including the size of the would-be config parameters.  We also fill in the
non-existant config parameters with an invalid lvblen value so it's easier
to notice if these invalid paratmers are ever being used.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/rcom.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 87b12f7d3d..6ac195cec0 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -370,9 +370,10 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
 {
 	struct dlm_rcom *rc;
+	struct rcom_config *rf;
 	struct dlm_mhandle *mh;
 	char *mb;
-	int mb_len = sizeof(struct dlm_rcom);
+	int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
 
 	mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb);
 	if (!mh)
@@ -391,6 +392,9 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
 	rc->rc_id = rc_in->rc_id;
 	rc->rc_result = -ESRCH;
 
+	rf = (struct rcom_config *) rc->rc_buf;
+	rf->rf_lvblen = -1;
+
 	dlm_rcom_out(rc);
 	dlm_lowcomms_commit_buffer(mh);
 
-- 
cgit v1.2.2


From 98f176fb32f33795b6d0f83856008b932123ab38 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Mon, 27 Nov 2006 13:19:28 -0600
Subject: [DLM] don't accept replies to old recovery messages

We often abort a recovery after sending a status request to a remote node.
We want to ignore any potential status reply we get from the remote node.
If we get one of these unwanted replies, we've often moved on to the next
recovery message and incremented the message sequence counter, so the
reply will be ignored due to the seq number.  In some cases, we've not
moved on to the next message so the seq number of the reply we want to
ignore is still correct, causing the reply to be accepted.  The next
recovery message will then mistake this old reply as a new one.

To fix this, we add the flag RCOM_WAIT to indicate when we can accept a
new reply.  We clear this flag if we abort recovery while waiting for a
reply.  Before the flag is set again (to allow new replies) we know that
any old replies will be rejected due to their sequence number.  We also
initialize the recovery-message sequence number to a random value when a
lockspace is first created.  This makes it clear when messages are being
rejected from an old instance of a lockspace that has since been
recreated.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/dlm_internal.h |  4 +++-
 fs/dlm/lockspace.c    |  2 ++
 fs/dlm/rcom.c         | 44 ++++++++++++++++++++++++++++++++++----------
 3 files changed, 39 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 1e5cd67e1b..1ee8195e6f 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -471,6 +471,7 @@ struct dlm_ls {
 	char			*ls_recover_buf;
 	int			ls_recover_nodeid; /* for debugging */
 	uint64_t		ls_rcom_seq;
+	spinlock_t		ls_rcom_spin;
 	struct list_head	ls_recover_list;
 	spinlock_t		ls_recover_list_lock;
 	int			ls_recover_list_count;
@@ -488,7 +489,8 @@ struct dlm_ls {
 #define LSFL_RUNNING		1
 #define LSFL_RECOVERY_STOP	2
 #define LSFL_RCOM_READY		3
-#define LSFL_UEVENT_WAIT	4
+#define LSFL_RCOM_WAIT		4
+#define LSFL_UEVENT_WAIT	5
 
 /* much of this is just saving user space pointers associated with the
    lock that we pass back to the user lib with an ast */
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 791388b25c..59012b089e 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -479,6 +479,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 	ls->ls_recoverd_task = NULL;
 	mutex_init(&ls->ls_recoverd_active);
 	spin_lock_init(&ls->ls_recover_lock);
+	spin_lock_init(&ls->ls_rcom_spin);
+	get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
 	ls->ls_recover_status = 0;
 	ls->ls_recover_seq = 0;
 	ls->ls_recover_args = NULL;
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 6ac195cec0..c42f2db6c4 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -90,13 +90,28 @@ static int check_config(struct dlm_ls *ls, struct rcom_config *rf, int nodeid)
 	return 0;
 }
 
+static void allow_sync_reply(struct dlm_ls *ls, uint64_t *new_seq)
+{
+	spin_lock(&ls->ls_rcom_spin);
+	*new_seq = ++ls->ls_rcom_seq;
+	set_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
+	spin_unlock(&ls->ls_rcom_spin);
+}
+
+static void disallow_sync_reply(struct dlm_ls *ls)
+{
+	spin_lock(&ls->ls_rcom_spin);
+	clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
+	clear_bit(LSFL_RCOM_READY, &ls->ls_flags);
+	spin_unlock(&ls->ls_rcom_spin);
+}
+
 int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
 {
 	struct dlm_rcom *rc;
 	struct dlm_mhandle *mh;
 	int error = 0;
 
-	memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
 	ls->ls_recover_nodeid = nodeid;
 
 	if (nodeid == dlm_our_nodeid()) {
@@ -108,12 +123,14 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
 	error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, 0, &rc, &mh);
 	if (error)
 		goto out;
-	rc->rc_id = ++ls->ls_rcom_seq;
+
+	allow_sync_reply(ls, &rc->rc_id);
+	memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
 
 	send_rcom(ls, mh, rc);
 
 	error = dlm_wait_function(ls, &rcom_response);
-	clear_bit(LSFL_RCOM_READY, &ls->ls_flags);
+	disallow_sync_reply(ls);
 	if (error)
 		goto out;
 
@@ -150,14 +167,20 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 
 static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 {
-	if (rc_in->rc_id != ls->ls_rcom_seq) {
-		log_debug(ls, "reject old reply %d got %llx wanted %llx",
-			  rc_in->rc_type, rc_in->rc_id, ls->ls_rcom_seq);
-		return;
+	spin_lock(&ls->ls_rcom_spin);
+	if (!test_bit(LSFL_RCOM_WAIT, &ls->ls_flags) ||
+	    rc_in->rc_id != ls->ls_rcom_seq) {
+		log_debug(ls, "reject reply %d from %d seq %llx expect %llx",
+			  rc_in->rc_type, rc_in->rc_header.h_nodeid,
+			  rc_in->rc_id, ls->ls_rcom_seq);
+		goto out;
 	}
 	memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length);
 	set_bit(LSFL_RCOM_READY, &ls->ls_flags);
+	clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
 	wake_up(&ls->ls_wait_general);
+ out:
+	spin_unlock(&ls->ls_rcom_spin);
 }
 
 static void receive_rcom_status_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
@@ -171,7 +194,6 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
 	struct dlm_mhandle *mh;
 	int error = 0, len = sizeof(struct dlm_rcom);
 
-	memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
 	ls->ls_recover_nodeid = nodeid;
 
 	if (nodeid == dlm_our_nodeid()) {
@@ -185,12 +207,14 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
 	if (error)
 		goto out;
 	memcpy(rc->rc_buf, last_name, last_len);
-	rc->rc_id = ++ls->ls_rcom_seq;
+
+	allow_sync_reply(ls, &rc->rc_id);
+	memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
 
 	send_rcom(ls, mh, rc);
 
 	error = dlm_wait_function(ls, &rcom_response);
-	clear_bit(LSFL_RCOM_READY, &ls->ls_flags);
+	disallow_sync_reply(ls);
  out:
 	return error;
 }
-- 
cgit v1.2.2


From 0ac230699a0f3f0d15ad4e4ad99446dac5b4a21f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 28 Nov 2006 22:29:19 -0800
Subject: [GFS2] lock function parameter

Fix function parameter typing:
fs/gfs2/glock.c:100: warning: function declaration isn't a prototype

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3c2ff81c84..f130f9894b 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -96,7 +96,7 @@ static inline rwlock_t *gl_lock_addr(unsigned int x)
 	return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)];
 }
 #else /* not SMP, so no spinlocks required */
-static inline rwlock_t *gl_lock_addr(x)
+static inline rwlock_t *gl_lock_addr(unsigned int x)
 {
 	return NULL;
 }
-- 
cgit v1.2.2


From 57adf7eede38d315e0e328c52484d6a596e9a238 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <ryusuke@osrg.net>
Date: Wed, 29 Nov 2006 09:33:48 -0500
Subject: [DLM] fix format warnings in rcom.c and recoverd.c

This fixes the following gcc warnings generated on
the architectures where uint64_t != unsigned long long (e.g. ppc64).

fs/dlm/rcom.c:154: warning: format '%llx' expects type 'long long unsigned int', but argument 4 has type 'uint64_t'
fs/dlm/rcom.c:154: warning: format '%llx' expects type 'long long unsigned int', but argument 5 has type 'uint64_t'
fs/dlm/recoverd.c:48: warning: format '%llx' expects type 'long long unsigned int', but argument 3 has type 'uint64_t'
fs/dlm/recoverd.c:202: warning: format '%llx' expects type 'long long unsigned int', but argument 3 has type 'uint64_t'
fs/dlm/recoverd.c:210: warning: format '%llx' expects type 'long long unsigned int', but argument 3 has type 'uint64_t'

Signed-off-by: Ryusuke Konishi <ryusuke@osrg.net>
Signed-off-by: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/rcom.c     | 3 ++-
 fs/dlm/recoverd.c | 8 +++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index c42f2db6c4..4cc31be9cd 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -172,7 +172,8 @@ static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	    rc_in->rc_id != ls->ls_rcom_seq) {
 		log_debug(ls, "reject reply %d from %d seq %llx expect %llx",
 			  rc_in->rc_type, rc_in->rc_header.h_nodeid,
-			  rc_in->rc_id, ls->ls_rcom_seq);
+			  (unsigned long long)rc_in->rc_id,
+			  (unsigned long long)ls->ls_rcom_seq);
 		goto out;
 	}
 	memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length);
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 9dc2f9156f..650536aa51 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -45,7 +45,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 	unsigned long start;
 	int error, neg = 0;
 
-	log_debug(ls, "recover %llx", rv->seq);
+	log_debug(ls, "recover %llx", (unsigned long long)rv->seq);
 
 	mutex_lock(&ls->ls_recoverd_active);
 
@@ -212,7 +212,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
 	dlm_astd_wake();
 
-	log_debug(ls, "recover %llx done: %u ms", rv->seq,
+	log_debug(ls, "recover %llx done: %u ms",
+		  (unsigned long long)rv->seq,
 		  jiffies_to_msecs(jiffies - start));
 	mutex_unlock(&ls->ls_recoverd_active);
 
@@ -220,7 +221,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
  fail:
 	dlm_release_root_list(ls);
-	log_debug(ls, "recover %llx error %d", rv->seq, error);
+	log_debug(ls, "recover %llx error %d",
+		  (unsigned long long)rv->seq, error);
 	mutex_unlock(&ls->ls_recoverd_active);
 	return error;
 }
-- 
cgit v1.2.2


From 77386e1f662f104680da7885d32e068e4b11b882 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Wed, 29 Nov 2006 10:41:49 -0500
Subject: [GFS2] Remove gfs2_check_acl()

As pointed out by Adrian Bunk, the gfs2_check_acl() function is no
longer used. This patch removes it and renamed gfs2_check_acl_locked()
to gfs2_check_acl() since we only need one variant of that function now.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Adrian Bunk <bunk@stusta.de>
---
 fs/gfs2/acl.c       | 19 ++-----------------
 fs/gfs2/acl.h       |  1 -
 fs/gfs2/ops_inode.c |  2 +-
 3 files changed, 3 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 3908992b27..6e80844367 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -145,14 +145,14 @@ out:
 }
 
 /**
- * gfs2_check_acl_locked - Check an ACL to see if we're allowed to do something
+ * gfs2_check_acl - Check an ACL to see if we're allowed to do something
  * @inode: the file we want to do something to
  * @mask: what we want to do
  *
  * Returns: errno
  */
 
-int gfs2_check_acl_locked(struct inode *inode, int mask)
+int gfs2_check_acl(struct inode *inode, int mask)
 {
 	struct posix_acl *acl = NULL;
 	int error;
@@ -170,21 +170,6 @@ int gfs2_check_acl_locked(struct inode *inode, int mask)
 	return -EAGAIN;
 }
 
-int gfs2_check_acl(struct inode *inode, int mask)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_holder i_gh;
-	int error;
-
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
-	if (!error) {
-		error = gfs2_check_acl_locked(inode, mask);
-		gfs2_glock_dq_uninit(&i_gh);
-	}
-
-	return error;
-}
-
 static int munge_mode(struct gfs2_inode *ip, mode_t mode)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index 05c294fe0d..6751930bfb 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -31,7 +31,6 @@ int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
 			  struct gfs2_ea_request *er,
 			  int *remove, mode_t *mode);
 int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access);
-int gfs2_check_acl_locked(struct inode *inode, int mask);
 int gfs2_check_acl(struct inode *inode, int mask);
 int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip);
 int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index fbe38c1ab5..636dda4c7d 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -856,7 +856,7 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
 		unlock = 1;
 	}
 
-	error = generic_permission(inode, mask, gfs2_check_acl_locked);
+	error = generic_permission(inode, mask, gfs2_check_acl);
 	if (unlock)
 		gfs2_glock_dq_uninit(&i_gh);
 
-- 
cgit v1.2.2


From 0da3585e1ef650d3224b4d6f9799558d1d99fa1e Mon Sep 17 00:00:00 2001
From: Srinivasa Ds <srinivasa@in.ibm.com>
Date: Thu, 30 Nov 2006 15:04:55 +0530
Subject: [GFS2] Mount problem with the GFS2 code

  While mounting the gfs2 filesystem,our test team had a problem and we
got this error message.
=======================================================

GFS2: fsid=: Trying to join cluster "lock_nolock", "dasde1"
GFS2: fsid=dasde1.0: Joined cluster. Now mounting FS...
GFS2: not a GFS2 filesystem
GFS2: fsid=dasde1.0: can't read superblock: -22

==========================================================================
On debugging further we found that problem is while reading the super
block(gfs2_read_super) and comparing the magic number in it.
When I  replace the submit_bio() call(present in gfs2_read_super) with
the sb_getblk() and ll_rw_block(), mount operation succeded.
On further analysis we found that before calling submit_bio(),
bio->bi_sector was set to "sector" variable. This "sector" variable has
the same value of bh->b_blocknr(block number). Hence there is a need to
multiply this valuwith (blocksize >> 9)(9 because,sector size
2^9,samething happens in ll_rw_block also, before calling submit_bio()).
So I have developed the patch which solves this problem. Please let me
know your comments.
================================================================

Signed-off-by: Srinivasa DS <srinivasa@in.ibm.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 1408c5f313..3b22727427 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -199,7 +199,7 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
 		return NULL;
 	}
 
-	bio->bi_sector = sector;
+	bio->bi_sector = sector * (sb->s_blocksize >> 9);
 	bio->bi_bdev = sb->s_bdev;
 	bio_add_page(bio, page, PAGE_SIZE, 0);
 
-- 
cgit v1.2.2


From aac1a3c77a46c2d06f297641760dd740ac2a84af Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 30 Nov 2006 10:02:19 -0500
Subject: [GFS2] Add a comment about reading the super block

The comment explains why we use the bio functions to read
the super block.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Srinivasa Ds <srinivasa@in.ibm.com>
---
 fs/gfs2/super.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'fs')

diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 3b22727427..43a24f2e59 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -180,6 +180,24 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
 	return 0;
 }
 
+/**
+ * gfs2_read_super - Read the gfs2 super block from disk
+ * @sb: The VFS super block
+ * @sector: The location of the super block
+ *
+ * This uses the bio functions to read the super block from disk
+ * because we want to be 100% sure that we never read cached data.
+ * A super block is read twice only during each GFS2 mount and is
+ * never written to by the filesystem. The first time its read no
+ * locks are held, and the only details which are looked at are those
+ * relating to the locking protocol. Once locking is up and working,
+ * the sb is read again under the lock to establish the location of
+ * the master directory (contains pointers to journals etc) and the
+ * root directory.
+ *
+ * Returns: A page containing the sb or NULL
+ */
+
 struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
 {
 	struct page *page;
-- 
cgit v1.2.2


From 33c3de32872ef3c075e4dac04c0de8f86ac39f6f Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 30 Nov 2006 10:14:32 -0500
Subject: [GFS2] Don't flush everything on fdatasync

The gfs2_fsync() function was doing a journal flush on each
and every call. While this is correct, its also a lot of
overhead. This patch means that on fdatasync flushes we
rely on the VFS to flush the data for us and we don't do
a journal flush unless we really need to.

We have to do a journal flush for stuffed files though because
they have the data and the inode metadata in the same block.
Journaled files also need a journal flush too of course.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_file.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index c2be216eb9..7bd971bf7c 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -22,6 +22,7 @@
 #include <linux/ext2_fs.h>
 #include <linux/crc32.h>
 #include <linux/lm_interface.h>
+#include <linux/writeback.h>
 #include <asm/uaccess.h>
 
 #include "gfs2.h"
@@ -503,16 +504,39 @@ static int gfs2_close(struct inode *inode, struct file *file)
  * @file: the file that points to the dentry (we ignore this)
  * @dentry: the dentry that points to the inode to sync
  *
+ * The VFS will flush "normal" data for us. We only need to worry
+ * about metadata here. For journaled data, we just do a log flush
+ * as we can't avoid it. Otherwise we can just bale out if datasync
+ * is set. For stuffed inodes we must flush the log in order to
+ * ensure that all data is on disk.
+ *
  * Returns: errno
  */
 
 static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
-	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+	struct inode *inode = dentry->d_inode;
+	int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
+	int ret = 0;
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = 0,
+	};
+
+	if (gfs2_is_jdata(GFS2_I(inode))) {
+		gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
+		return 0;
+	}
 
-	gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl);
+	if (sync_state != 0) {
+		if (!datasync)
+			ret = sync_inode(inode, &wbc);
 
-	return 0;
+		if (gfs2_is_stuffed(GFS2_I(inode)))
+			gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
+	}
+
+	return ret;
 }
 
 /**
-- 
cgit v1.2.2


From 887bc5d00c02b32763845247024e8db5243ef857 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 5 Dec 2006 13:34:17 -0500
Subject: [GFS2] Fix indent in recovery.c

As per comments from Andrew Morton and Jan Engelhardt, this fixes the
indent and removes the "static" from a variable declaration since its
not needed in this case (now allocated on the stack of the function
in question).

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Jan Engelhardt <jengelh@linux01.gwdg.de>
Cc: Andrew Morton <akpm@osdl.org>
---
 fs/gfs2/recovery.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 4acf238e1d..d0c806b85c 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -136,7 +136,7 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 {
 	struct buffer_head *bh;
 	struct gfs2_log_header_host lh;
-static const u32 nothing = 0;
+	const u32 nothing = 0;
 	u32 hash;
 	int error;
 
-- 
cgit v1.2.2


From 67fd44fea274a5033ceb90284683bc44df61df54 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 7 Dec 2006 02:14:03 +0100
Subject: [PATCH] x86-64: Implement compat code for SIOCSIFHWBROADCAST

This network ioctl wasn't handled before.

Reported by Alexandra.Kossovsky@oktetlabs.ru (Alexandra Kossovsky)
Signed-off-by: Andi Kleen <ak@suse.de>
---
 fs/compat_ioctl.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a91f2628c9..47bb78d1c3 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2397,6 +2397,7 @@ HANDLE_IOCTL(SIOCGIFMAP, dev_ifsioc)
 HANDLE_IOCTL(SIOCSIFMAP, dev_ifsioc)
 HANDLE_IOCTL(SIOCGIFADDR, dev_ifsioc)
 HANDLE_IOCTL(SIOCSIFADDR, dev_ifsioc)
+HANDLE_IOCTL(SIOCSIFHWBROADCAST, dev_ifsioc)
 
 /* ioctls used by appletalk ddp.c */
 HANDLE_IOCTL(SIOCATALKDIFADDR, dev_ifsioc)
-- 
cgit v1.2.2


From 34126f9f41901ca9d7d0031c2b11fc0d6c07b72d Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 7 Dec 2006 09:13:14 -0500
Subject: [GFS2] Change gfs2_fsync() to use write_inode_now()

This is a bit better than the previous version of gfs2_fsync()
although it would be better still if we were able to call a
function which only wrote the inode & metadata. Its no big deal
though that this will potentially write the data as well since
the VFS has already done that before calling gfs2_fsync(). I've
also added a comment to explain whats going on here.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Andrew Morton <akpm@osdl.org>
---
 fs/gfs2/ops_file.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 7bd971bf7c..b3f1e0349a 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -510,6 +510,11 @@ static int gfs2_close(struct inode *inode, struct file *file)
  * is set. For stuffed inodes we must flush the log in order to
  * ensure that all data is on disk.
  *
+ * The call to write_inode_now() is there to write back metadata and
+ * the inode itself. It does also try and write the data, but thats
+ * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite()
+ * for us.
+ *
  * Returns: errno
  */
 
@@ -518,10 +523,6 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
 	struct inode *inode = dentry->d_inode;
 	int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
 	int ret = 0;
-	struct writeback_control wbc = {
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = 0,
-	};
 
 	if (gfs2_is_jdata(GFS2_I(inode))) {
 		gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
@@ -530,7 +531,7 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
 
 	if (sync_state != 0) {
 		if (!datasync)
-			ret = sync_inode(inode, &wbc);
+			ret = write_inode_now(inode, 0);
 
 		if (gfs2_is_stuffed(GFS2_I(inode)))
 			gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
-- 
cgit v1.2.2


From ac33d0710595579e3cfca42dde2257eb0b123f6d Mon Sep 17 00:00:00 2001
From: Patrick Caulfield <pcaulfie@redhat.com>
Date: Wed, 6 Dec 2006 15:10:37 +0000
Subject: [DLM] Clean up lowcomms

This fixes up most of the things pointed out by akpm and Pavel Machek
with comments below indicating why some things have been left:

Andrew Morton wrote:
>
>> +static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
>> +{
>> +	struct nodeinfo *ni;
>> +	int r;
>> +	int n;
>> +
>> +	down_read(&nodeinfo_lock);
>
> Given that this function can sleep, I wonder if `alloc' is useful.
>
> I see lots of callers passing in a literal "0" for `alloc'.  That's in fact
> a secret (GFP_ATOMIC & ~__GFP_HIGH).  I doubt if that's what you really
> meant.  Particularly as the code could at least have used __GFP_WAIT (aka
> GFP_NOIO) which is much, much more reliable than "0".  In fact "0" is the
> least reliable mode possible.
>
> IOW, this is all bollixed up.

When 0 is passed into nodeid2nodeinfo the function does not try to allocate a
new structure at all. it's an indication that the caller only wants the nodeinfo
struct for that nodeid if there actually is one in existance.
I've tidied the function itself so it's more obvious, (and tidier!)

>> +/* Data received from remote end */
>> +static int receive_from_sock(void)
>> +{
>> +	int ret = 0;
>> +	struct msghdr msg;
>> +	struct kvec iov[2];
>> +	unsigned len;
>> +	int r;
>> +	struct sctp_sndrcvinfo *sinfo;
>> +	struct cmsghdr *cmsg;
>> +	struct nodeinfo *ni;
>> +
>> +	/* These two are marginally too big for stack allocation, but this
>> +	 * function is (currently) only called by dlm_recvd so static should be
>> +	 * OK.
>> +	 */
>> +	static struct sockaddr_storage msgname;
>> +	static char incmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
>
> whoa.  This is globally singly-threaded code??

Yes. it is only ever run in the context of dlm_recvd.
>>
>> +static void initiate_association(int nodeid)
>> +{
>> +	struct sockaddr_storage rem_addr;
>> +	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
>
> Another static buffer to worry about.  Globally singly-threaded code?

Yes. Only ever called by dlm_sendd.

>> +
>> +/* Send a message */
>> +static int send_to_sock(struct nodeinfo *ni)
>> +{
>> +	int ret = 0;
>> +	struct writequeue_entry *e;
>> +	int len, offset;
>> +	struct msghdr outmsg;
>> +	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
>
> Singly-threaded?

Yep.

>>
>> +static void dealloc_nodeinfo(void)
>> +{
>> +	int i;
>> +
>> +	for (i=1; i<=max_nodeid; i++) {
>> +		struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
>> +		if (ni) {
>> +			idr_remove(&nodeinfo_idr, i);
>
> Didn't that need locking?

Not. it's only ever called at DLM shutdown after all the other threads
have been stopped.

>>
>> +static int write_list_empty(void)
>> +{
>> +	int status;
>> +
>> +	spin_lock_bh(&write_nodes_lock);
>> +	status = list_empty(&write_nodes);
>> +	spin_unlock_bh(&write_nodes_lock);
>> +
>> +	return status;
>> +}
>
> This function's return value is meaningless.  As soon as the lock gets
> dropped, the return value can get out of sync with reality.
>
> Looking at the caller, this _might_ happen to be OK, but it's a nasty and
> dangerous thing.  Really the locking should be moved into the caller.

It's just an optimisation to allow the caller to schedule if there is no work
to do. if something arrives immediately afterwards then it will get picked up
when the process re-awakes (and it will be woken by that arrival).

The 'accepting' atomic has gone completely. as Andrew pointed out it didn't
really achieve much anyway. I suspect it was a plaster over some other
startup or shutdown bug to be honest.


Signed-off-by: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Pavel Machek <pavel@ucw.cz>
---
 fs/dlm/lowcomms-sctp.c | 264 ++++++++++++++++++--------------------
 fs/dlm/lowcomms-tcp.c  | 342 +++++++++++++++++++------------------------------
 fs/dlm/lowcomms.h      |   2 -
 fs/dlm/main.c          |  10 +-
 4 files changed, 261 insertions(+), 357 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/lowcomms-sctp.c b/fs/dlm/lowcomms-sctp.c
index 6da6b14d5a..fe158d7a92 100644
--- a/fs/dlm/lowcomms-sctp.c
+++ b/fs/dlm/lowcomms-sctp.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -75,13 +75,13 @@ struct nodeinfo {
 };
 
 static DEFINE_IDR(nodeinfo_idr);
-static struct rw_semaphore	nodeinfo_lock;
-static int			max_nodeid;
+static DECLARE_RWSEM(nodeinfo_lock);
+static int max_nodeid;
 
 struct cbuf {
-	unsigned		base;
-	unsigned		len;
-	unsigned		mask;
+	unsigned int base;
+	unsigned int len;
+	unsigned int mask;
 };
 
 /* Just the one of these, now. But this struct keeps
@@ -90,9 +90,9 @@ struct cbuf {
 #define CF_READ_PENDING 1
 
 struct connection {
-	struct socket          *sock;
+	struct socket           *sock;
 	unsigned long		flags;
-	struct page            *rx_page;
+	struct page             *rx_page;
 	atomic_t		waiting_requests;
 	struct cbuf		cb;
 	int                     eagain_flag;
@@ -102,36 +102,40 @@ struct connection {
 
 struct writequeue_entry {
 	struct list_head	list;
-	struct page            *page;
+	struct page             *page;
 	int			offset;
 	int			len;
 	int			end;
 	int			users;
-	struct nodeinfo        *ni;
+	struct nodeinfo         *ni;
 };
 
-#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0)
-#define CBUF_EMPTY(cb) ((cb)->len == 0)
-#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1))
-#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask)
+static void cbuf_add(struct cbuf *cb, int n)
+{
+	cb->len += n;
+}
 
-#define CBUF_INIT(cb, size) \
-do { \
-	(cb)->base = (cb)->len = 0; \
-	(cb)->mask = ((size)-1); \
-} while(0)
+static int cbuf_data(struct cbuf *cb)
+{
+	return ((cb->base + cb->len) & cb->mask);
+}
 
-#define CBUF_EAT(cb, n) \
-do { \
-	(cb)->len  -= (n); \
-	(cb)->base += (n); \
-	(cb)->base &= (cb)->mask; \
-} while(0)
+static void cbuf_init(struct cbuf *cb, int size)
+{
+	cb->base = cb->len = 0;
+	cb->mask = size-1;
+}
 
+static void cbuf_eat(struct cbuf *cb, int n)
+{
+	cb->len  -= n;
+	cb->base += n;
+	cb->base &= cb->mask;
+}
 
 /* List of nodes which have writes pending */
-static struct list_head write_nodes;
-static spinlock_t write_nodes_lock;
+static LIST_HEAD(write_nodes);
+static DEFINE_SPINLOCK(write_nodes_lock);
 
 /* Maximum number of incoming messages to process before
  * doing a schedule()
@@ -141,8 +145,7 @@ static spinlock_t write_nodes_lock;
 /* Manage daemons */
 static struct task_struct *recv_task;
 static struct task_struct *send_task;
-static wait_queue_head_t lowcomms_recv_wait;
-static atomic_t accepting;
+static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_wait);
 
 /* The SCTP connection */
 static struct connection sctp_con;
@@ -161,11 +164,11 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
 		return error;
 
 	if (dlm_local_addr[0]->ss_family == AF_INET) {
-	        struct sockaddr_in *in4  = (struct sockaddr_in *) &addr;
+		struct sockaddr_in *in4  = (struct sockaddr_in *) &addr;
 		struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr;
 		ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
 	} else {
-	        struct sockaddr_in6 *in6  = (struct sockaddr_in6 *) &addr;
+		struct sockaddr_in6 *in6  = (struct sockaddr_in6 *) &addr;
 		struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
 		memcpy(&ret6->sin6_addr, &in6->sin6_addr,
 		       sizeof(in6->sin6_addr));
@@ -174,6 +177,8 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
 	return 0;
 }
 
+/* If alloc is 0 here we will not attempt to allocate a new
+   nodeinfo struct */
 static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
 {
 	struct nodeinfo *ni;
@@ -184,44 +189,45 @@ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
 	ni = idr_find(&nodeinfo_idr, nodeid);
 	up_read(&nodeinfo_lock);
 
-	if (!ni && alloc) {
-		down_write(&nodeinfo_lock);
+	if (ni || !alloc)
+		return ni;
 
-		ni = idr_find(&nodeinfo_idr, nodeid);
-		if (ni)
-			goto out_up;
+	down_write(&nodeinfo_lock);
 
-		r = idr_pre_get(&nodeinfo_idr, alloc);
-		if (!r)
-			goto out_up;
+	ni = idr_find(&nodeinfo_idr, nodeid);
+	if (ni)
+		goto out_up;
 
-		ni = kmalloc(sizeof(struct nodeinfo), alloc);
-		if (!ni)
-			goto out_up;
+	r = idr_pre_get(&nodeinfo_idr, alloc);
+	if (!r)
+		goto out_up;
 
-		r = idr_get_new_above(&nodeinfo_idr, ni, nodeid, &n);
-		if (r) {
-			kfree(ni);
-			ni = NULL;
-			goto out_up;
-		}
-		if (n != nodeid) {
-			idr_remove(&nodeinfo_idr, n);
-			kfree(ni);
-			ni = NULL;
-			goto out_up;
-		}
-		memset(ni, 0, sizeof(struct nodeinfo));
-		spin_lock_init(&ni->lock);
-		INIT_LIST_HEAD(&ni->writequeue);
-		spin_lock_init(&ni->writequeue_lock);
-		ni->nodeid = nodeid;
-
-		if (nodeid > max_nodeid)
-			max_nodeid = nodeid;
-	out_up:
-		up_write(&nodeinfo_lock);
+	ni = kmalloc(sizeof(struct nodeinfo), alloc);
+	if (!ni)
+		goto out_up;
+
+	r = idr_get_new_above(&nodeinfo_idr, ni, nodeid, &n);
+	if (r) {
+		kfree(ni);
+		ni = NULL;
+		goto out_up;
 	}
+	if (n != nodeid) {
+		idr_remove(&nodeinfo_idr, n);
+		kfree(ni);
+		ni = NULL;
+		goto out_up;
+	}
+	memset(ni, 0, sizeof(struct nodeinfo));
+	spin_lock_init(&ni->lock);
+	INIT_LIST_HEAD(&ni->writequeue);
+	spin_lock_init(&ni->writequeue_lock);
+	ni->nodeid = nodeid;
+
+	if (nodeid > max_nodeid)
+		max_nodeid = nodeid;
+out_up:
+	up_write(&nodeinfo_lock);
 
 	return ni;
 }
@@ -279,13 +285,13 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
 		in4_addr->sin_port = cpu_to_be16(port);
 		memset(&in4_addr->sin_zero, 0, sizeof(in4_addr->sin_zero));
 		memset(in4_addr+1, 0, sizeof(struct sockaddr_storage) -
-				      sizeof(struct sockaddr_in));
+		       sizeof(struct sockaddr_in));
 		*addr_len = sizeof(struct sockaddr_in);
 	} else {
 		struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
 		in6_addr->sin6_port = cpu_to_be16(port);
 		memset(in6_addr+1, 0, sizeof(struct sockaddr_storage) -
-				      sizeof(struct sockaddr_in6));
+		       sizeof(struct sockaddr_in6));
 		*addr_len = sizeof(struct sockaddr_in6);
 	}
 }
@@ -324,7 +330,7 @@ static void send_shutdown(sctp_assoc_t associd)
 	cmsg->cmsg_type = SCTP_SNDRCV;
 	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
 	outmessage.msg_controllen = cmsg->cmsg_len;
-	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+	sinfo = CMSG_DATA(cmsg);
 	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
 
 	sinfo->sinfo_flags |= MSG_EOF;
@@ -387,7 +393,7 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
 
 			if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) {
 				log_print("COMM_UP for invalid assoc ID %d",
-					 (int)sn->sn_assoc_change.sac_assoc_id);
+					  (int)sn->sn_assoc_change.sac_assoc_id);
 				init_failed();
 				return;
 			}
@@ -398,15 +404,18 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
 			fs = get_fs();
 			set_fs(get_ds());
 			ret = sctp_con.sock->ops->getsockopt(sctp_con.sock,
-						IPPROTO_SCTP, SCTP_PRIMARY_ADDR,
-						(char*)&prim, &prim_len);
+							     IPPROTO_SCTP,
+							     SCTP_PRIMARY_ADDR,
+							     (char*)&prim,
+							     &prim_len);
 			set_fs(fs);
 			if (ret < 0) {
 				struct nodeinfo *ni;
 
 				log_print("getsockopt/sctp_primary_addr on "
 					  "new assoc %d failed : %d",
-				    (int)sn->sn_assoc_change.sac_assoc_id, ret);
+					  (int)sn->sn_assoc_change.sac_assoc_id,
+					  ret);
 
 				/* Retry INIT later */
 				ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id);
@@ -426,12 +435,10 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
 				return;
 
 			/* Save the assoc ID */
-			spin_lock(&ni->lock);
 			ni->assoc_id = sn->sn_assoc_change.sac_assoc_id;
-			spin_unlock(&ni->lock);
 
 			log_print("got new/restarted association %d nodeid %d",
-			       (int)sn->sn_assoc_change.sac_assoc_id, nodeid);
+				  (int)sn->sn_assoc_change.sac_assoc_id, nodeid);
 
 			/* Send any pending writes */
 			clear_bit(NI_INIT_PENDING, &ni->flags);
@@ -507,13 +514,12 @@ static int receive_from_sock(void)
 		sctp_con.rx_page = alloc_page(GFP_ATOMIC);
 		if (sctp_con.rx_page == NULL)
 			goto out_resched;
-		CBUF_INIT(&sctp_con.cb, PAGE_CACHE_SIZE);
+		cbuf_init(&sctp_con.cb, PAGE_CACHE_SIZE);
 	}
 
 	memset(&incmsg, 0, sizeof(incmsg));
 	memset(&msgname, 0, sizeof(msgname));
 
-	memset(incmsg, 0, sizeof(incmsg));
 	msg.msg_name = &msgname;
 	msg.msg_namelen = sizeof(msgname);
 	msg.msg_flags = 0;
@@ -532,17 +538,17 @@ static int receive_from_sock(void)
 	 * iov[0] is the bit of the circular buffer between the current end
 	 * point (cb.base + cb.len) and the end of the buffer.
 	 */
-	iov[0].iov_len = sctp_con.cb.base - CBUF_DATA(&sctp_con.cb);
+	iov[0].iov_len = sctp_con.cb.base - cbuf_data(&sctp_con.cb);
 	iov[0].iov_base = page_address(sctp_con.rx_page) +
-			  CBUF_DATA(&sctp_con.cb);
+		cbuf_data(&sctp_con.cb);
 	iov[1].iov_len = 0;
 
 	/*
 	 * iov[1] is the bit of the circular buffer between the start of the
 	 * buffer and the start of the currently used section (cb.base)
 	 */
-	if (CBUF_DATA(&sctp_con.cb) >= sctp_con.cb.base) {
-		iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&sctp_con.cb);
+	if (cbuf_data(&sctp_con.cb) >= sctp_con.cb.base) {
+		iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&sctp_con.cb);
 		iov[1].iov_len = sctp_con.cb.base;
 		iov[1].iov_base = page_address(sctp_con.rx_page);
 		msg.msg_iovlen = 2;
@@ -557,7 +563,7 @@ static int receive_from_sock(void)
 	msg.msg_control = incmsg;
 	msg.msg_controllen = sizeof(incmsg);
 	cmsg = CMSG_FIRSTHDR(&msg);
-	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+	sinfo = CMSG_DATA(cmsg);
 
 	if (msg.msg_flags & MSG_NOTIFICATION) {
 		process_sctp_notification(&msg, page_address(sctp_con.rx_page));
@@ -583,29 +589,29 @@ static int receive_from_sock(void)
 	if (r == 1)
 		return 0;
 
-	CBUF_ADD(&sctp_con.cb, ret);
+	cbuf_add(&sctp_con.cb, ret);
 	ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid),
 					  page_address(sctp_con.rx_page),
 					  sctp_con.cb.base, sctp_con.cb.len,
 					  PAGE_CACHE_SIZE);
 	if (ret < 0)
 		goto out_close;
-	CBUF_EAT(&sctp_con.cb, ret);
+	cbuf_eat(&sctp_con.cb, ret);
 
-      out:
+out:
 	ret = 0;
 	goto out_ret;
 
-      out_resched:
+out_resched:
 	lowcomms_data_ready(sctp_con.sock->sk, 0);
 	ret = 0;
-	schedule();
+	cond_resched();
 	goto out_ret;
 
-      out_close:
+out_close:
 	if (ret != -EAGAIN)
 		log_print("error reading from sctp socket: %d", ret);
-      out_ret:
+out_ret:
 	return ret;
 }
 
@@ -619,10 +625,12 @@ static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num)
 	set_fs(get_ds());
 	if (num == 1)
 		result = sctp_con.sock->ops->bind(sctp_con.sock,
-					(struct sockaddr *) addr, addr_len);
+						  (struct sockaddr *) addr,
+						  addr_len);
 	else
 		result = sctp_con.sock->ops->setsockopt(sctp_con.sock, SOL_SCTP,
-				SCTP_SOCKOPT_BINDX_ADD, (char *)addr, addr_len);
+							SCTP_SOCKOPT_BINDX_ADD,
+							(char *)addr, addr_len);
 	set_fs(fs);
 
 	if (result < 0)
@@ -719,10 +727,10 @@ static int init_sock(void)
 
 	return 0;
 
- create_delsock:
+create_delsock:
 	sock_release(sock);
 	sctp_con.sock = NULL;
- out:
+out:
 	return result;
 }
 
@@ -756,16 +764,13 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
 	int users = 0;
 	struct nodeinfo *ni;
 
-	if (!atomic_read(&accepting))
-		return NULL;
-
 	ni = nodeid2nodeinfo(nodeid, allocation);
 	if (!ni)
 		return NULL;
 
 	spin_lock(&ni->writequeue_lock);
 	e = list_entry(ni->writequeue.prev, struct writequeue_entry, list);
-	if (((struct list_head *) e == &ni->writequeue) ||
+	if ((&e->list == &ni->writequeue) ||
 	    (PAGE_CACHE_SIZE - e->end < len)) {
 		e = NULL;
 	} else {
@@ -776,7 +781,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
 	spin_unlock(&ni->writequeue_lock);
 
 	if (e) {
-	      got_one:
+	got_one:
 		if (users == 0)
 			kmap(e->page);
 		*ppc = page_address(e->page) + offset;
@@ -803,9 +808,6 @@ void dlm_lowcomms_commit_buffer(void *arg)
 	int users;
 	struct nodeinfo *ni = e->ni;
 
-	if (!atomic_read(&accepting))
-		return;
-
 	spin_lock(&ni->writequeue_lock);
 	users = --e->users;
 	if (users)
@@ -822,7 +824,7 @@ void dlm_lowcomms_commit_buffer(void *arg)
 	}
 	return;
 
-      out:
+out:
 	spin_unlock(&ni->writequeue_lock);
 	return;
 }
@@ -878,7 +880,7 @@ static void initiate_association(int nodeid)
 	cmsg->cmsg_level = IPPROTO_SCTP;
 	cmsg->cmsg_type = SCTP_SNDRCV;
 	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
-	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+	sinfo = CMSG_DATA(cmsg);
 	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
 	sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
 
@@ -892,7 +894,7 @@ static void initiate_association(int nodeid)
 }
 
 /* Send a message */
-static int send_to_sock(struct nodeinfo *ni)
+static void send_to_sock(struct nodeinfo *ni)
 {
 	int ret = 0;
 	struct writequeue_entry *e;
@@ -903,13 +905,13 @@ static int send_to_sock(struct nodeinfo *ni)
 	struct sctp_sndrcvinfo *sinfo;
 	struct kvec iov;
 
-        /* See if we need to init an association before we start
+	/* See if we need to init an association before we start
 	   sending precious messages */
 	spin_lock(&ni->lock);
 	if (!ni->assoc_id && !test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
 		spin_unlock(&ni->lock);
 		initiate_association(ni->nodeid);
-		return 0;
+		return;
 	}
 	spin_unlock(&ni->lock);
 
@@ -923,7 +925,7 @@ static int send_to_sock(struct nodeinfo *ni)
 	cmsg->cmsg_level = IPPROTO_SCTP;
 	cmsg->cmsg_type = SCTP_SNDRCV;
 	cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
-	sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+	sinfo = CMSG_DATA(cmsg);
 	memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
 	sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
 	sinfo->sinfo_assoc_id = ni->assoc_id;
@@ -955,7 +957,7 @@ static int send_to_sock(struct nodeinfo *ni)
 				goto send_error;
 		} else {
 			/* Don't starve people filling buffers */
-			schedule();
+			cond_resched();
 		}
 
 		spin_lock(&ni->writequeue_lock);
@@ -964,15 +966,16 @@ static int send_to_sock(struct nodeinfo *ni)
 
 		if (e->len == 0 && e->users == 0) {
 			list_del(&e->list);
+			kunmap(e->page);
 			free_entry(e);
 			continue;
 		}
 	}
 	spin_unlock(&ni->writequeue_lock);
- out:
-	return ret;
+out:
+	return;
 
- send_error:
+send_error:
 	log_print("Error sending to node %d %d", ni->nodeid, ret);
 	spin_lock(&ni->lock);
 	if (!test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
@@ -982,7 +985,7 @@ static int send_to_sock(struct nodeinfo *ni)
 	} else
 		spin_unlock(&ni->lock);
 
-	return ret;
+	return;
 }
 
 /* Try to send any messages that are pending */
@@ -994,7 +997,7 @@ static void process_output_queue(void)
 	spin_lock_bh(&write_nodes_lock);
 	list_for_each_safe(list, temp, &write_nodes) {
 		struct nodeinfo *ni =
-		    list_entry(list, struct nodeinfo, write_list);
+			list_entry(list, struct nodeinfo, write_list);
 		clear_bit(NI_WRITE_PENDING, &ni->flags);
 		list_del(&ni->write_list);
 
@@ -1106,7 +1109,7 @@ static int dlm_recvd(void *data)
 		set_current_state(TASK_INTERRUPTIBLE);
 		add_wait_queue(&lowcomms_recv_wait, &wait);
 		if (!test_bit(CF_READ_PENDING, &sctp_con.flags))
-			schedule();
+			cond_resched();
 		remove_wait_queue(&lowcomms_recv_wait, &wait);
 		set_current_state(TASK_RUNNING);
 
@@ -1118,12 +1121,12 @@ static int dlm_recvd(void *data)
 
 				/* Don't starve out everyone else */
 				if (++count >= MAX_RX_MSG_COUNT) {
-					schedule();
+					cond_resched();
 					count = 0;
 				}
 			} while (!kthread_should_stop() && ret >=0);
 		}
-		schedule();
+		cond_resched();
 	}
 
 	return 0;
@@ -1138,7 +1141,7 @@ static int dlm_sendd(void *data)
 	while (!kthread_should_stop()) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (write_list_empty())
-			schedule();
+			cond_resched();
 		set_current_state(TASK_RUNNING);
 
 		if (sctp_con.eagain_flag) {
@@ -1166,7 +1169,7 @@ static int daemons_start(void)
 
 	p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
 	error = IS_ERR(p);
-       	if (error) {
+	if (error) {
 		log_print("can't start dlm_recvd %d", error);
 		return error;
 	}
@@ -1174,7 +1177,7 @@ static int daemons_start(void)
 
 	p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
 	error = IS_ERR(p);
-       	if (error) {
+	if (error) {
 		log_print("can't start dlm_sendd %d", error);
 		kthread_stop(recv_task);
 		return error;
@@ -1197,43 +1200,28 @@ int dlm_lowcomms_start(void)
 	error = daemons_start();
 	if (error)
 		goto fail_sock;
-	atomic_set(&accepting, 1);
 	return 0;
 
- fail_sock:
+fail_sock:
 	close_connection();
 	return error;
 }
 
-/* Set all the activity flags to prevent any socket activity. */
-
 void dlm_lowcomms_stop(void)
 {
-	atomic_set(&accepting, 0);
+	int i;
+
 	sctp_con.flags = 0x7;
 	daemons_stop();
 	clean_writequeues();
 	close_connection();
 	dealloc_nodeinfo();
 	max_nodeid = 0;
-}
 
-int dlm_lowcomms_init(void)
-{
-	init_waitqueue_head(&lowcomms_recv_wait);
-	spin_lock_init(&write_nodes_lock);
-	INIT_LIST_HEAD(&write_nodes);
-	init_rwsem(&nodeinfo_lock);
-	return 0;
-}
-
-void dlm_lowcomms_exit(void)
-{
-	int i;
+	dlm_local_count = 0;
+	dlm_local_nodeid = 0;
 
 	for (i = 0; i < dlm_local_count; i++)
 		kfree(dlm_local_addr[i]);
-	dlm_local_count = 0;
-	dlm_local_nodeid = 0;
 }
 
diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c
index 7289e59b4b..8f2791fc84 100644
--- a/fs/dlm/lowcomms-tcp.c
+++ b/fs/dlm/lowcomms-tcp.c
@@ -54,44 +54,59 @@
 #include "config.h"
 
 struct cbuf {
-	unsigned base;
-	unsigned len;
-	unsigned mask;
+	unsigned int base;
+	unsigned int len;
+	unsigned int mask;
 };
 
-#ifndef FALSE
-#define FALSE 0
-#define TRUE 1
-#endif
 #define NODE_INCREMENT 32
+static void cbuf_add(struct cbuf *cb, int n)
+{
+	cb->len += n;
+}
 
-#define CBUF_INIT(cb, size) do { (cb)->base = (cb)->len = 0; (cb)->mask = ((size)-1); } while(0)
-#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0)
-#define CBUF_EMPTY(cb) ((cb)->len == 0)
-#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1))
-#define CBUF_EAT(cb, n) do { (cb)->len  -= (n); \
-                             (cb)->base += (n); (cb)->base &= (cb)->mask; } while(0)
-#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask)
+static int cbuf_data(struct cbuf *cb)
+{
+	return ((cb->base + cb->len) & cb->mask);
+}
+
+static void cbuf_init(struct cbuf *cb, int size)
+{
+	cb->base = cb->len = 0;
+	cb->mask = size-1;
+}
+
+static void cbuf_eat(struct cbuf *cb, int n)
+{
+	cb->len  -= n;
+	cb->base += n;
+	cb->base &= cb->mask;
+}
+
+static bool cbuf_empty(struct cbuf *cb)
+{
+	return cb->len == 0;
+}
 
 /* Maximum number of incoming messages to process before
-   doing a schedule()
+   doing a cond_resched()
 */
 #define MAX_RX_MSG_COUNT 25
 
 struct connection {
 	struct socket *sock;	/* NULL if not connected */
 	uint32_t nodeid;	/* So we know who we are in the list */
-	struct rw_semaphore sock_sem;	/* Stop connect races */
-	struct list_head read_list;	/* On this list when ready for reading */
-	struct list_head write_list;	/* On this list when ready for writing */
-	struct list_head state_list;	/* On this list when ready to connect */
+	struct rw_semaphore sock_sem; /* Stop connect races */
+	struct list_head read_list;   /* On this list when ready for reading */
+	struct list_head write_list;  /* On this list when ready for writing */
+	struct list_head state_list;  /* On this list when ready to connect */
 	unsigned long flags;	/* bit 1,2 = We are on the read/write lists */
 #define CF_READ_PENDING 1
 #define CF_WRITE_PENDING 2
 #define CF_CONNECT_PENDING 3
 #define CF_IS_OTHERCON 4
-	struct list_head writequeue;	/* List of outgoing writequeue_entries */
-	struct list_head listenlist;    /* List of allocated listening sockets */
+	struct list_head writequeue;  /* List of outgoing writequeue_entries */
+	struct list_head listenlist;  /* List of allocated listening sockets */
 	spinlock_t writequeue_lock;
 	int (*rx_action) (struct connection *);	/* What to do when active */
 	struct page *rx_page;
@@ -121,28 +136,27 @@ static struct task_struct *recv_task;
 static struct task_struct *send_task;
 
 static wait_queue_t lowcomms_send_waitq_head;
-static wait_queue_head_t lowcomms_send_waitq;
+static DECLARE_WAIT_QUEUE_HEAD(lowcomms_send_waitq);
 static wait_queue_t lowcomms_recv_waitq_head;
-static wait_queue_head_t lowcomms_recv_waitq;
+static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_waitq);
 
 /* An array of pointers to connections, indexed by NODEID */
 static struct connection **connections;
-static struct semaphore connections_lock;
+static DECLARE_MUTEX(connections_lock);
 static kmem_cache_t *con_cache;
 static int conn_array_size;
-static atomic_t accepting;
 
 /* List of sockets that have reads pending */
-static struct list_head read_sockets;
-static spinlock_t read_sockets_lock;
+static LIST_HEAD(read_sockets);
+static DEFINE_SPINLOCK(read_sockets_lock);
 
 /* List of sockets which have writes pending */
-static struct list_head write_sockets;
-static spinlock_t write_sockets_lock;
+static LIST_HEAD(write_sockets);
+static DEFINE_SPINLOCK(write_sockets_lock);
 
 /* List of sockets which have connects pending */
-static struct list_head state_sockets;
-static spinlock_t state_sockets_lock;
+static LIST_HEAD(state_sockets);
+static DEFINE_SPINLOCK(state_sockets_lock);
 
 static struct connection *nodeid2con(int nodeid, gfp_t allocation)
 {
@@ -153,12 +167,11 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
 		int new_size = nodeid + NODE_INCREMENT;
 		struct connection **new_conns;
 
-		new_conns = kmalloc(sizeof(struct connection *) *
+		new_conns = kzalloc(sizeof(struct connection *) *
 				    new_size, allocation);
 		if (!new_conns)
 			goto finish;
 
-		memset(new_conns, 0, sizeof(struct connection *) * new_size);
 		memcpy(new_conns, connections,  sizeof(struct connection *) * conn_array_size);
 		conn_array_size = new_size;
 		kfree(connections);
@@ -168,11 +181,10 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
 
 	con = connections[nodeid];
 	if (con == NULL && allocation) {
-		con = kmem_cache_alloc(con_cache, allocation);
+		con = kmem_cache_zalloc(con_cache, allocation);
 		if (!con)
 			goto finish;
 
-		memset(con, 0, sizeof(*con));
 		con->nodeid = nodeid;
 		init_rwsem(&con->sock_sem);
 		INIT_LIST_HEAD(&con->writequeue);
@@ -181,7 +193,7 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
 		connections[nodeid] = con;
 	}
 
- finish:
+finish:
 	up(&connections_lock);
 	return con;
 }
@@ -220,8 +232,6 @@ static inline void lowcomms_connect_sock(struct connection *con)
 {
 	if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
 		return;
-	if (!atomic_read(&accepting))
-		return;
 
 	spin_lock_bh(&state_sockets_lock);
 	list_add_tail(&con->state_list, &state_sockets);
@@ -232,31 +242,8 @@ static inline void lowcomms_connect_sock(struct connection *con)
 
 static void lowcomms_state_change(struct sock *sk)
 {
-/*	struct connection *con = sock2con(sk); */
-
-	switch (sk->sk_state) {
-	case TCP_ESTABLISHED:
+	if (sk->sk_state == TCP_ESTABLISHED)
 		lowcomms_write_space(sk);
-		break;
-
-	case TCP_FIN_WAIT1:
-	case TCP_FIN_WAIT2:
-	case TCP_TIME_WAIT:
-	case TCP_CLOSE:
-	case TCP_CLOSE_WAIT:
-	case TCP_LAST_ACK:
-	case TCP_CLOSING:
-		/* FIXME: I think this causes more trouble than it solves.
-		   lowcomms wil reconnect anyway when there is something to
-		   send. This just attempts reconnection if a node goes down!
-		*/
-		/* lowcomms_connect_sock(con); */
-		break;
-
-	default:
-		printk("dlm: lowcomms_state_change: state=%d\n", sk->sk_state);
-		break;
-	}
 }
 
 /* Make a socket active */
@@ -277,13 +264,12 @@ static int add_sock(struct socket *sock, struct connection *con)
 static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
 			  int *addr_len)
 {
-        saddr->ss_family =  dlm_local_addr.ss_family;
-        if (saddr->ss_family == AF_INET) {
+	saddr->ss_family =  dlm_local_addr.ss_family;
+	if (saddr->ss_family == AF_INET) {
 		struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
 		in4_addr->sin_port = cpu_to_be16(port);
 		*addr_len = sizeof(struct sockaddr_in);
-	}
-	else {
+	} else {
 		struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
 		in6_addr->sin6_port = cpu_to_be16(port);
 		*addr_len = sizeof(struct sockaddr_in6);
@@ -291,7 +277,7 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
 }
 
 /* Close a remote connection and tidy up */
-static void close_connection(struct connection *con, int and_other)
+static void close_connection(struct connection *con, bool and_other)
 {
 	down_write(&con->sock_sem);
 
@@ -300,11 +286,8 @@ static void close_connection(struct connection *con, int and_other)
 		con->sock = NULL;
 	}
 	if (con->othercon && and_other) {
-		/* Argh! recursion in kernel code!
-		   Actually, this isn't a list so it
-		   will only re-enter once.
-		*/
-		close_connection(con->othercon, FALSE);
+		/* Will only re-enter once. */
+		close_connection(con->othercon, false);
 	}
 	if (con->rx_page) {
 		__free_page(con->rx_page);
@@ -337,7 +320,7 @@ static int receive_from_sock(struct connection *con)
 		con->rx_page = alloc_page(GFP_ATOMIC);
 		if (con->rx_page == NULL)
 			goto out_resched;
-		CBUF_INIT(&con->cb, PAGE_CACHE_SIZE);
+		cbuf_init(&con->cb, PAGE_CACHE_SIZE);
 	}
 
 	msg.msg_control = NULL;
@@ -352,16 +335,16 @@ static int receive_from_sock(struct connection *con)
 	 * iov[0] is the bit of the circular buffer between the current end
 	 * point (cb.base + cb.len) and the end of the buffer.
 	 */
-	iov[0].iov_len = con->cb.base - CBUF_DATA(&con->cb);
-	iov[0].iov_base = page_address(con->rx_page) + CBUF_DATA(&con->cb);
+	iov[0].iov_len = con->cb.base - cbuf_data(&con->cb);
+	iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb);
 	iov[1].iov_len = 0;
 
 	/*
 	 * iov[1] is the bit of the circular buffer between the start of the
 	 * buffer and the start of the currently used section (cb.base)
 	 */
-	if (CBUF_DATA(&con->cb) >= con->cb.base) {
-		iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&con->cb);
+	if (cbuf_data(&con->cb) >= con->cb.base) {
+		iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&con->cb);
 		iov[1].iov_len = con->cb.base;
 		iov[1].iov_base = page_address(con->rx_page);
 		msg.msg_iovlen = 2;
@@ -378,7 +361,7 @@ static int receive_from_sock(struct connection *con)
 		goto out_close;
 	if (ret == len)
 		call_again_soon = 1;
-	CBUF_ADD(&con->cb, ret);
+	cbuf_add(&con->cb, ret);
 	ret = dlm_process_incoming_buffer(con->nodeid,
 					  page_address(con->rx_page),
 					  con->cb.base, con->cb.len,
@@ -391,35 +374,32 @@ static int receive_from_sock(struct connection *con)
 	}
 	if (ret < 0)
 		goto out_close;
-	CBUF_EAT(&con->cb, ret);
+	cbuf_eat(&con->cb, ret);
 
-	if (CBUF_EMPTY(&con->cb) && !call_again_soon) {
+	if (cbuf_empty(&con->cb) && !call_again_soon) {
 		__free_page(con->rx_page);
 		con->rx_page = NULL;
 	}
 
-      out:
+out:
 	if (call_again_soon)
 		goto out_resched;
 	up_read(&con->sock_sem);
-	ret = 0;
-	goto out_ret;
+	return 0;
 
-      out_resched:
+out_resched:
 	lowcomms_data_ready(con->sock->sk, 0);
 	up_read(&con->sock_sem);
-	ret = 0;
-	schedule();
-	goto out_ret;
+	cond_resched();
+	return 0;
 
-      out_close:
+out_close:
 	up_read(&con->sock_sem);
 	if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) {
-		close_connection(con, FALSE);
+		close_connection(con, false);
 		/* Reconnect when there is something to send */
 	}
 
-      out_ret:
 	return ret;
 }
 
@@ -434,7 +414,8 @@ static int accept_from_sock(struct connection *con)
 	struct connection *newcon;
 
 	memset(&peeraddr, 0, sizeof(peeraddr));
-	result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &newsock);
+	result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM,
+				  IPPROTO_TCP, &newsock);
 	if (result < 0)
 		return -ENOMEM;
 
@@ -462,7 +443,7 @@ static int accept_from_sock(struct connection *con)
 	/* Get the new node's NODEID */
 	make_sockaddr(&peeraddr, 0, &len);
 	if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
-	    	printk("dlm: connect from non cluster node\n");
+		printk("dlm: connect from non cluster node\n");
 		sock_release(newsock);
 		up_read(&con->sock_sem);
 		return -1;
@@ -483,17 +464,16 @@ static int accept_from_sock(struct connection *con)
 	}
 	down_write(&newcon->sock_sem);
 	if (newcon->sock) {
-	        struct connection *othercon = newcon->othercon;
+		struct connection *othercon = newcon->othercon;
 
 		if (!othercon) {
-			othercon = kmem_cache_alloc(con_cache, GFP_KERNEL);
+			othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL);
 			if (!othercon) {
 				printk("dlm: failed to allocate incoming socket\n");
 				up_write(&newcon->sock_sem);
 				result = -ENOMEM;
 				goto accept_err;
 			}
-			memset(othercon, 0, sizeof(*othercon));
 			othercon->nodeid = nodeid;
 			othercon->rx_action = receive_from_sock;
 			init_rwsem(&othercon->sock_sem);
@@ -523,7 +503,7 @@ static int accept_from_sock(struct connection *con)
 
 	return 0;
 
-      accept_err:
+accept_err:
 	up_read(&con->sock_sem);
 	sock_release(newsock);
 
@@ -533,7 +513,7 @@ static int accept_from_sock(struct connection *con)
 }
 
 /* Connect a new socket to its peer */
-static int connect_to_sock(struct connection *con)
+static void connect_to_sock(struct connection *con)
 {
 	int result = -EHOSTUNREACH;
 	struct sockaddr_storage saddr;
@@ -542,7 +522,7 @@ static int connect_to_sock(struct connection *con)
 
 	if (con->nodeid == 0) {
 		log_print("attempt to connect sock 0 foiled");
-		return 0;
+		return;
 	}
 
 	down_write(&con->sock_sem);
@@ -556,13 +536,14 @@ static int connect_to_sock(struct connection *con)
 	}
 
 	/* Create a socket to communicate with */
-	result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &sock);
+	result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM,
+				  IPPROTO_TCP, &sock);
 	if (result < 0)
 		goto out_err;
 
 	memset(&saddr, 0, sizeof(saddr));
 	if (dlm_nodeid_to_addr(con->nodeid, &saddr))
-	        goto out_err;
+		goto out_err;
 
 	sock->sk->sk_user_data = con;
 	con->rx_action = receive_from_sock;
@@ -574,22 +555,13 @@ static int connect_to_sock(struct connection *con)
 	log_print("connecting to %d", con->nodeid);
 	result =
 		sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
-			       O_NONBLOCK);
+				   O_NONBLOCK);
 	if (result == -EINPROGRESS)
 		result = 0;
-	if (result != 0)
-		goto out_err;
-
-      out:
-	up_write(&con->sock_sem);
-	/*
-	 * Returning an error here means we've given up trying to connect to
-	 * a remote node, otherwise we return 0 and reschedule the connetion
-	 * attempt
-	 */
-	return result;
+	if (result == 0)
+		goto out;
 
-      out_err:
+out_err:
 	if (con->sock) {
 		sock_release(con->sock);
 		con->sock = NULL;
@@ -604,12 +576,15 @@ static int connect_to_sock(struct connection *con)
 		lowcomms_connect_sock(con);
 		result = 0;
 	}
-	goto out;
+out:
+	up_write(&con->sock_sem);
+	return;
 }
 
-static struct socket *create_listen_sock(struct connection *con, struct sockaddr_storage *saddr)
+static struct socket *create_listen_sock(struct connection *con,
+					 struct sockaddr_storage *saddr)
 {
-        struct socket *sock = NULL;
+	struct socket *sock = NULL;
 	mm_segment_t fs;
 	int result = 0;
 	int one = 1;
@@ -629,10 +604,12 @@ static struct socket *create_listen_sock(struct connection *con, struct sockaddr
 
 	fs = get_fs();
 	set_fs(get_ds());
-	result = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(one));
+	result = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
+				 (char *)&one, sizeof(one));
 	set_fs(fs);
 	if (result < 0) {
-		printk("dlm: Failed to set SO_REUSEADDR on socket: result=%d\n",result);
+		printk("dlm: Failed to set SO_REUSEADDR on socket: result=%d\n",
+		       result);
 	}
 	sock->sk->sk_user_data = con;
 	con->rx_action = accept_from_sock;
@@ -652,7 +629,8 @@ static struct socket *create_listen_sock(struct connection *con, struct sockaddr
 	fs = get_fs();
 	set_fs(get_ds());
 
-	result = sock_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&one, sizeof(one));
+	result = sock_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
+				 (char *)&one, sizeof(one));
 	set_fs(fs);
 	if (result < 0) {
 		printk("dlm: Set keepalive failed: %d\n", result);
@@ -666,7 +644,7 @@ static struct socket *create_listen_sock(struct connection *con, struct sockaddr
 		goto create_out;
 	}
 
-      create_out:
+create_out:
 	return sock;
 }
 
@@ -679,10 +657,6 @@ static int listen_for_all(void)
 	int result = -EINVAL;
 
 	/* We don't support multi-homed hosts */
-	memset(con, 0, sizeof(*con));
-	init_rwsem(&con->sock_sem);
-	spin_lock_init(&con->writequeue_lock);
-	INIT_LIST_HEAD(&con->writequeue);
 	set_bit(CF_IS_OTHERCON, &con->flags);
 
 	sock = create_listen_sock(con, &dlm_local_addr);
@@ -731,16 +705,12 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len,
 	int offset = 0;
 	int users = 0;
 
-	if (!atomic_read(&accepting))
-		return NULL;
-
 	con = nodeid2con(nodeid, allocation);
 	if (!con)
 		return NULL;
 
-	spin_lock(&con->writequeue_lock);
 	e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
-	if (((struct list_head *) e == &con->writequeue) ||
+	if ((&e->list == &con->writequeue) ||
 	    (PAGE_CACHE_SIZE - e->end < len)) {
 		e = NULL;
 	} else {
@@ -751,7 +721,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len,
 	spin_unlock(&con->writequeue_lock);
 
 	if (e) {
-	      got_one:
+	got_one:
 		if (users == 0)
 			kmap(e->page);
 		*ppc = page_address(e->page) + offset;
@@ -777,10 +747,6 @@ void dlm_lowcomms_commit_buffer(void *mh)
 	struct connection *con = e->con;
 	int users;
 
-	if (!atomic_read(&accepting))
-		return;
-
-	spin_lock(&con->writequeue_lock);
 	users = --e->users;
 	if (users)
 		goto out;
@@ -797,7 +763,7 @@ void dlm_lowcomms_commit_buffer(void *mh)
 	}
 	return;
 
-      out:
+out:
 	spin_unlock(&con->writequeue_lock);
 	return;
 }
@@ -809,7 +775,7 @@ static void free_entry(struct writequeue_entry *e)
 }
 
 /* Send a message */
-static int send_to_sock(struct connection *con)
+static void send_to_sock(struct connection *con)
 {
 	int ret = 0;
 	ssize_t(*sendpage) (struct socket *, struct page *, int, size_t, int);
@@ -846,7 +812,7 @@ static int send_to_sock(struct connection *con)
 		}
 		else {
 			/* Don't starve people filling buffers */
-			schedule();
+			cond_resched();
 		}
 
 		spin_lock(&con->writequeue_lock);
@@ -855,25 +821,26 @@ static int send_to_sock(struct connection *con)
 
 		if (e->len == 0 && e->users == 0) {
 			list_del(&e->list);
+			kunmap(e->page);
 			free_entry(e);
 			continue;
 		}
 	}
 	spin_unlock(&con->writequeue_lock);
-      out:
+out:
 	up_read(&con->sock_sem);
-	return ret;
+	return;
 
-      send_error:
+send_error:
 	up_read(&con->sock_sem);
-	close_connection(con, FALSE);
+	close_connection(con, false);
 	lowcomms_connect_sock(con);
-	return ret;
+	return;
 
-      out_connect:
+out_connect:
 	up_read(&con->sock_sem);
 	lowcomms_connect_sock(con);
-	return 0;
+	return;
 }
 
 static void clean_one_writequeue(struct connection *con)
@@ -904,12 +871,12 @@ int dlm_lowcomms_close(int nodeid)
 	con = nodeid2con(nodeid, 0);
 	if (con) {
 		clean_one_writequeue(con);
-		close_connection(con, TRUE);
+		close_connection(con, true);
 		atomic_set(&con->waiting_requests, 0);
 	}
 	return 0;
 
-      out:
+out:
 	return -1;
 }
 
@@ -940,7 +907,7 @@ static void process_sockets(void)
 	list_for_each_safe(list, temp, &read_sockets) {
 
 		struct connection *con =
-		    list_entry(list, struct connection, read_list);
+			list_entry(list, struct connection, read_list);
 		list_del(&con->read_list);
 		clear_bit(CF_READ_PENDING, &con->flags);
 
@@ -959,7 +926,7 @@ static void process_sockets(void)
 
 			/* Don't starve out everyone else */
 			if (++count >= MAX_RX_MSG_COUNT) {
-				schedule();
+				cond_resched();
 				count = 0;
 			}
 
@@ -977,20 +944,16 @@ static void process_output_queue(void)
 {
 	struct list_head *list;
 	struct list_head *temp;
-	int ret;
 
 	spin_lock_bh(&write_sockets_lock);
 	list_for_each_safe(list, temp, &write_sockets) {
 		struct connection *con =
-		    list_entry(list, struct connection, write_list);
+			list_entry(list, struct connection, write_list);
 		clear_bit(CF_WRITE_PENDING, &con->flags);
 		list_del(&con->write_list);
 
 		spin_unlock_bh(&write_sockets_lock);
-
-		ret = send_to_sock(con);
-		if (ret < 0) {
-		}
+		send_to_sock(con);
 		spin_lock_bh(&write_sockets_lock);
 	}
 	spin_unlock_bh(&write_sockets_lock);
@@ -1000,19 +963,16 @@ static void process_state_queue(void)
 {
 	struct list_head *list;
 	struct list_head *temp;
-	int ret;
 
 	spin_lock_bh(&state_sockets_lock);
 	list_for_each_safe(list, temp, &state_sockets) {
 		struct connection *con =
-		    list_entry(list, struct connection, state_list);
+			list_entry(list, struct connection, state_list);
 		list_del(&con->state_list);
 		clear_bit(CF_CONNECT_PENDING, &con->flags);
 		spin_unlock_bh(&state_sockets_lock);
 
-		ret = connect_to_sock(con);
-		if (ret < 0) {
-		}
+		connect_to_sock(con);
 		spin_lock_bh(&state_sockets_lock);
 	}
 	spin_unlock_bh(&state_sockets_lock);
@@ -1046,14 +1006,13 @@ static int read_list_empty(void)
 /* DLM Transport comms receive daemon */
 static int dlm_recvd(void *data)
 {
-	init_waitqueue_head(&lowcomms_recv_waitq);
 	init_waitqueue_entry(&lowcomms_recv_waitq_head, current);
 	add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head);
 
 	while (!kthread_should_stop()) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (read_list_empty())
-			schedule();
+			cond_resched();
 		set_current_state(TASK_RUNNING);
 
 		process_sockets();
@@ -1081,14 +1040,13 @@ static int write_and_state_lists_empty(void)
 /* DLM Transport send daemon */
 static int dlm_sendd(void *data)
 {
-	init_waitqueue_head(&lowcomms_send_waitq);
 	init_waitqueue_entry(&lowcomms_send_waitq_head, current);
 	add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head);
 
 	while (!kthread_should_stop()) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (write_and_state_lists_empty())
-			schedule();
+			cond_resched();
 		set_current_state(TASK_RUNNING);
 
 		process_state_queue();
@@ -1111,7 +1069,7 @@ static int daemons_start(void)
 
 	p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
 	error = IS_ERR(p);
-       	if (error) {
+	if (error) {
 		log_print("can't start dlm_recvd %d", error);
 		return error;
 	}
@@ -1119,7 +1077,7 @@ static int daemons_start(void)
 
 	p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
 	error = IS_ERR(p);
-       	if (error) {
+	if (error) {
 		log_print("can't start dlm_sendd %d", error);
 		kthread_stop(recv_task);
 		return error;
@@ -1141,21 +1099,20 @@ void dlm_lowcomms_stop(void)
 {
 	int i;
 
-	atomic_set(&accepting, 0);
-
-	/* Set all the activity flags to prevent any
+	/* Set all the flags to prevent any
 	   socket activity.
 	*/
 	for (i = 0; i < conn_array_size; i++) {
 		if (connections[i])
-			connections[i]->flags |= 0x7;
+			connections[i]->flags |= 0xFF;
 	}
+
 	daemons_stop();
 	clean_writequeues();
 
 	for (i = 0; i < conn_array_size; i++) {
 		if (connections[i]) {
-			close_connection(connections[i], TRUE);
+			close_connection(connections[i], true);
 			if (connections[i]->othercon)
 				kmem_cache_free(con_cache, connections[i]->othercon);
 			kmem_cache_free(con_cache, connections[i]);
@@ -1173,24 +1130,12 @@ int dlm_lowcomms_start(void)
 {
 	int error = 0;
 
-	error = -ENOTCONN;
-
-	/*
-	 * Temporarily initialise the waitq head so that lowcomms_send_message
-	 * doesn't crash if it gets called before the thread is fully
-	 * initialised
-	 */
-	init_waitqueue_head(&lowcomms_send_waitq);
-
 	error = -ENOMEM;
-	connections = kmalloc(sizeof(struct connection *) *
+	connections = kzalloc(sizeof(struct connection *) *
 			      NODE_INCREMENT, GFP_KERNEL);
 	if (!connections)
 		goto out;
 
-	memset(connections, 0,
-	       sizeof(struct connection *) * NODE_INCREMENT);
-
 	conn_array_size = NODE_INCREMENT;
 
 	if (dlm_our_addr(&dlm_local_addr, 0)) {
@@ -1203,7 +1148,8 @@ int dlm_lowcomms_start(void)
 	}
 
 	con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection),
-				      __alignof__(struct connection), 0, NULL, NULL);
+				      __alignof__(struct connection), 0,
+				      NULL, NULL);
 	if (!con_cache)
 		goto fail_free_conn;
 
@@ -1217,40 +1163,20 @@ int dlm_lowcomms_start(void)
 	if (error)
 		goto fail_unlisten;
 
-	atomic_set(&accepting, 1);
-
 	return 0;
 
-      fail_unlisten:
-	close_connection(connections[0], 0);
+fail_unlisten:
+	close_connection(connections[0], false);
 	kmem_cache_free(con_cache, connections[0]);
 	kmem_cache_destroy(con_cache);
 
-      fail_free_conn:
+fail_free_conn:
 	kfree(connections);
 
-      out:
+out:
 	return error;
 }
 
-int dlm_lowcomms_init(void)
-{
-	INIT_LIST_HEAD(&read_sockets);
-	INIT_LIST_HEAD(&write_sockets);
-	INIT_LIST_HEAD(&state_sockets);
-
-	spin_lock_init(&read_sockets_lock);
-	spin_lock_init(&write_sockets_lock);
-	spin_lock_init(&state_sockets_lock);
-	init_MUTEX(&connections_lock);
-
-	return 0;
-}
-
-void dlm_lowcomms_exit(void)
-{
-}
-
 /*
  * Overrides for Emacs so that we follow Linus's tabbing style.
  * Emacs will notice this stuff at the end of the file and automatically
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index 2d045e0daa..a9a9618c0d 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -14,8 +14,6 @@
 #ifndef __LOWCOMMS_DOT_H__
 #define __LOWCOMMS_DOT_H__
 
-int dlm_lowcomms_init(void);
-void dlm_lowcomms_exit(void);
 int dlm_lowcomms_start(void);
 void dlm_lowcomms_stop(void);
 int dlm_lowcomms_close(int nodeid);
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index a8da8dc36b..162fbae58f 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -16,7 +16,6 @@
 #include "lock.h"
 #include "user.h"
 #include "memory.h"
-#include "lowcomms.h"
 #include "config.h"
 
 #ifdef CONFIG_DLM_DEBUG
@@ -47,20 +46,14 @@ static int __init init_dlm(void)
 	if (error)
 		goto out_config;
 
-	error = dlm_lowcomms_init();
-	if (error)
-		goto out_debug;
-
 	error = dlm_user_init();
 	if (error)
-		goto out_lowcomms;
+		goto out_debug;
 
 	printk("DLM (built %s %s) installed\n", __DATE__, __TIME__);
 
 	return 0;
 
- out_lowcomms:
-	dlm_lowcomms_exit();
  out_debug:
 	dlm_unregister_debugfs();
  out_config:
@@ -76,7 +69,6 @@ static int __init init_dlm(void)
 static void __exit exit_dlm(void)
 {
 	dlm_user_exit();
-	dlm_lowcomms_exit();
 	dlm_config_exit();
 	dlm_memory_exit();
 	dlm_lockspace_exit();
-- 
cgit v1.2.2


From 822191a2fa1584a29c3224ab328507adcaeac1ab Mon Sep 17 00:00:00 2001
From: Andrey Mirkin <amirkin@openvz.org>
Date: Wed, 6 Dec 2006 20:31:35 -0800
Subject: [PATCH] skip data conversion in compat_sys_mount when data_page is
 NULL

OpenVZ Linux kernel team has found a problem with mounting in compat mode.

Simple command "mount -t smbfs ..." on Fedora Core 5 distro in 32-bit mode
leads to oops:

  Unable to handle kernel NULL pointer dereference at 0000000000000000 RIP: compat_sys_mount+0xd6/0x290
  Process mount (pid: 14656, veid=300, threadinfo ffff810034d30000, task ffff810034c86bc0)
  Call Trace: ia32_sysret+0x0/0xa

The problem is that data_page pointer can be NULL, so we should skip data
conversion in this case.

Signed-off-by: Andrey Mirkin <amirkin@openvz.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 06dad665b8..7aef5412f4 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -871,7 +871,7 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
 
 	retval = -EINVAL;
 
-	if (type_page) {
+	if (type_page && data_page) {
 		if (!strcmp((char *)type_page, SMBFS_NAME)) {
 			do_smb_super_data_conv((void *)data_page);
 		} else if (!strcmp((char *)type_page, NCPFS_NAME)) {
-- 
cgit v1.2.2


From 8fb4fc68ca391862b061b3d358a288ccf6abed39 Mon Sep 17 00:00:00 2001
From: Guillem Jover <guillem.jover@nokia.com>
Date: Wed, 6 Dec 2006 20:32:24 -0800
Subject: [PATCH] Allow user processes to raise their oom_adj value

Currently a user process cannot rise its own oom_adj value (i.e.
unprotecting itself from the OOM killer).  As this value is stored in the
task structure it gets inherited and the unprivileged childs will be unable
to rise it.

The EPERM will be handled by the generic proc fs layer, as only processes
with the proper caps or the owner of the process will be able to write to
the file.  So we allow only the processes with CAP_SYS_RESOURCE to lower
the value, otherwise it will get an EACCES which seems more appropriate
than EPERM.

Signed-off-by: Guillem Jover <guillem.jover@nokia.com>
Acked-by: Andrea Arcangeli <andrea@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/base.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 795319c54f..05ace70d05 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -683,8 +683,6 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	char buffer[PROC_NUMBUF], *end;
 	int oom_adjust;
 
-	if (!capable(CAP_SYS_RESOURCE))
-		return -EPERM;
 	memset(buffer, 0, sizeof(buffer));
 	if (count > sizeof(buffer) - 1)
 		count = sizeof(buffer) - 1;
@@ -699,6 +697,10 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	task = get_proc_task(file->f_dentry->d_inode);
 	if (!task)
 		return -ESRCH;
+	if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
+		put_task_struct(task);
+		return -EACCES;
+	}
 	task->oomkilladj = oom_adjust;
 	put_task_struct(task);
 	if (end - buffer == 0)
-- 
cgit v1.2.2


From e6b4f8da3a88457148038bc952043e99a7fdba64 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 6 Dec 2006 20:33:14 -0800
Subject: [PATCH] slab: remove SLAB_NOFS

SLAB_NOFS is an alias of GFP_NOFS.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/misc.c       | 4 ++--
 fs/cifs/transport.c  | 2 +-
 fs/dquot.c           | 2 +-
 fs/ext3/super.c      | 2 +-
 fs/ext4/super.c      | 2 +-
 fs/hpfs/super.c      | 2 +-
 fs/nfs/read.c        | 2 +-
 fs/nfs/write.c       | 4 ++--
 fs/ntfs/attrib.c     | 2 +-
 fs/ntfs/index.c      | 2 +-
 fs/ntfs/inode.c      | 4 ++--
 fs/ntfs/unistr.c     | 2 +-
 fs/ocfs2/dlm/dlmfs.c | 2 +-
 fs/ocfs2/super.c     | 2 +-
 14 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index bbc9cd34b6..8355daff50 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -153,7 +153,7 @@ cifs_buf_get(void)
    albeit slightly larger than necessary and maxbuffersize 
    defaults to this and can not be bigger */
 	ret_buf =
-	    (struct smb_hdr *) mempool_alloc(cifs_req_poolp, SLAB_KERNEL | SLAB_NOFS);
+	    (struct smb_hdr *) mempool_alloc(cifs_req_poolp, SLAB_KERNEL | GFP_NOFS);
 
 	/* clear the first few header bytes */
 	/* for most paths, more is cleared in header_assemble */
@@ -192,7 +192,7 @@ cifs_small_buf_get(void)
    albeit slightly larger than necessary and maxbuffersize 
    defaults to this and can not be bigger */
 	ret_buf =
-	    (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, SLAB_KERNEL | SLAB_NOFS);
+	    (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, SLAB_KERNEL | GFP_NOFS);
 	if (ret_buf) {
 	/* No need to clear memory here, cleared in header assemble */
 	/*	memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 48d47b46b1..7514237cf3 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -51,7 +51,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
 	}
 	
 	temp = (struct mid_q_entry *) mempool_alloc(cifs_mid_poolp,
-						    SLAB_KERNEL | SLAB_NOFS);
+						    SLAB_KERNEL | GFP_NOFS);
 	if (temp == NULL)
 		return temp;
 	else {
diff --git a/fs/dquot.c b/fs/dquot.c
index 9af789567e..c6ae6c0e0c 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -600,7 +600,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
 {
 	struct dquot *dquot;
 
-	dquot = kmem_cache_alloc(dquot_cachep, SLAB_NOFS);
+	dquot = kmem_cache_alloc(dquot_cachep, GFP_NOFS);
 	if(!dquot)
 		return NODQUOT;
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index afc2d4f42d..0cf633f0cf 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -445,7 +445,7 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
 {
 	struct ext3_inode_info *ei;
 
-	ei = kmem_cache_alloc(ext3_inode_cachep, SLAB_NOFS);
+	ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
 #ifdef CONFIG_EXT3_FS_POSIX_ACL
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b4b022aa2b..c730cbc840 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -495,7 +495,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 {
 	struct ext4_inode_info *ei;
 
-	ei = kmem_cache_alloc(ext4_inode_cachep, SLAB_NOFS);
+	ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
 #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 450b5e0b47..46ceadd6f1 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -165,7 +165,7 @@ static kmem_cache_t * hpfs_inode_cachep;
 static struct inode *hpfs_alloc_inode(struct super_block *sb)
 {
 	struct hpfs_inode_info *ei;
-	ei = (struct hpfs_inode_info *)kmem_cache_alloc(hpfs_inode_cachep, SLAB_NOFS);
+	ei = (struct hpfs_inode_info *)kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS);
 	if (!ei)
 		return NULL;
 	ei->vfs_inode.i_version = 1;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index c2e49c397a..56f66f0ccb 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -46,7 +46,7 @@ static mempool_t *nfs_rdata_mempool;
 struct nfs_read_data *nfs_readdata_alloc(size_t len)
 {
 	unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS);
 
 	if (p) {
 		memset(p, 0, sizeof(*p));
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 883dd4a1c1..f7dd0d0059 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -93,7 +93,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
 
 struct nfs_write_data *nfs_commit_alloc(void)
 {
-	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
+	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
 
 	if (p) {
 		memset(p, 0, sizeof(*p));
@@ -112,7 +112,7 @@ void nfs_commit_free(struct nfs_write_data *p)
 struct nfs_write_data *nfs_writedata_alloc(size_t len)
 {
 	unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
+	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
 
 	if (p) {
 		memset(p, 0, sizeof(*p));
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 9f08e851cf..c577d8e1bd 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1272,7 +1272,7 @@ ntfs_attr_search_ctx *ntfs_attr_get_search_ctx(ntfs_inode *ni, MFT_RECORD *mrec)
 {
 	ntfs_attr_search_ctx *ctx;
 
-	ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, SLAB_NOFS);
+	ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, GFP_NOFS);
 	if (ctx)
 		ntfs_attr_init_search_ctx(ctx, ni, mrec);
 	return ctx;
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index e32cde4863..2194eff497 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -38,7 +38,7 @@ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni)
 {
 	ntfs_index_context *ictx;
 
-	ictx = kmem_cache_alloc(ntfs_index_ctx_cache, SLAB_NOFS);
+	ictx = kmem_cache_alloc(ntfs_index_ctx_cache, GFP_NOFS);
 	if (ictx)
 		*ictx = (ntfs_index_context){ .idx_ni = idx_ni };
 	return ictx;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 2d3de9c898..247989891b 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -324,7 +324,7 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
 	ntfs_inode *ni;
 
 	ntfs_debug("Entering.");
-	ni = kmem_cache_alloc(ntfs_big_inode_cache, SLAB_NOFS);
+	ni = kmem_cache_alloc(ntfs_big_inode_cache, GFP_NOFS);
 	if (likely(ni != NULL)) {
 		ni->state = 0;
 		return VFS_I(ni);
@@ -349,7 +349,7 @@ static inline ntfs_inode *ntfs_alloc_extent_inode(void)
 	ntfs_inode *ni;
 
 	ntfs_debug("Entering.");
-	ni = kmem_cache_alloc(ntfs_inode_cache, SLAB_NOFS);
+	ni = kmem_cache_alloc(ntfs_inode_cache, GFP_NOFS);
 	if (likely(ni != NULL)) {
 		ni->state = 0;
 		return ni;
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c
index 6a495f7369..005ca4b0f1 100644
--- a/fs/ntfs/unistr.c
+++ b/fs/ntfs/unistr.c
@@ -266,7 +266,7 @@ int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
 
 	/* We do not trust outside sources. */
 	if (likely(ins)) {
-		ucs = kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS);
+		ucs = kmem_cache_alloc(ntfs_name_cache, GFP_NOFS);
 		if (likely(ucs)) {
 			for (i = o = 0; i < ins_len; i += wc_len) {
 				wc_len = nls->char2uni(ins + i, ins_len - i,
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 16b8d1ba70..01f91501f7 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -276,7 +276,7 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb)
 {
 	struct dlmfs_inode_private *ip;
 
-	ip = kmem_cache_alloc(dlmfs_inode_cache, SLAB_NOFS);
+	ip = kmem_cache_alloc(dlmfs_inode_cache, GFP_NOFS);
 	if (!ip)
 		return NULL;
 
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index d9b4214a12..7574d26ee0 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -303,7 +303,7 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
 {
 	struct ocfs2_inode_info *oi;
 
-	oi = kmem_cache_alloc(ocfs2_inode_cachep, SLAB_NOFS);
+	oi = kmem_cache_alloc(ocfs2_inode_cachep, GFP_NOFS);
 	if (!oi)
 		return NULL;
 
-- 
cgit v1.2.2


From f7267c0c0721fd02ad3dc37c3d6dd24ccd81d4d6 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 6 Dec 2006 20:33:15 -0800
Subject: [PATCH] slab: remove SLAB_USER

SLAB_USER is an alias of GFP_USER

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/crypto.c | 4 ++--
 fs/ecryptfs/inode.c  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f63a7755fe..776b2eed37 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1334,7 +1334,7 @@ int ecryptfs_write_headers(struct dentry *ecryptfs_dentry,
 		goto out;
 	}
 	/* Released in this function */
-	page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, SLAB_USER);
+	page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, GFP_USER);
 	if (!page_virt) {
 		ecryptfs_printk(KERN_ERR, "Out of memory\n");
 		rc = -ENOMEM;
@@ -1493,7 +1493,7 @@ int ecryptfs_read_headers(struct dentry *ecryptfs_dentry,
 	    &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
 
 	/* Read the first page from the underlying file */
-	page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, SLAB_USER);
+	page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, GFP_USER);
 	if (!page_virt) {
 		rc = -ENOMEM;
 		ecryptfs_printk(KERN_ERR, "Unable to allocate page_virt\n");
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index dfcc68484f..7091141204 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -404,7 +404,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 	/* Released in this function */
 	page_virt =
 	    (char *)kmem_cache_alloc(ecryptfs_header_cache_2,
-				     SLAB_USER);
+				     GFP_USER);
 	if (!page_virt) {
 		rc = -ENOMEM;
 		ecryptfs_printk(KERN_ERR,
-- 
cgit v1.2.2


From e94b1766097d53e6f3ccfb36c8baa562ffeda3fc Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 6 Dec 2006 20:33:17 -0800
Subject: [PATCH] slab: remove SLAB_KERNEL

SLAB_KERNEL is an alias of GFP_KERNEL.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/adfs/super.c          | 2 +-
 fs/affs/super.c          | 2 +-
 fs/afs/super.c           | 2 +-
 fs/befs/linuxvfs.c       | 2 +-
 fs/bfs/inode.c           | 2 +-
 fs/block_dev.c           | 2 +-
 fs/cifs/cifsfs.c         | 2 +-
 fs/cifs/misc.c           | 4 ++--
 fs/cifs/transport.c      | 4 ++--
 fs/coda/inode.c          | 2 +-
 fs/dnotify.c             | 2 +-
 fs/ecryptfs/crypto.c     | 2 +-
 fs/ecryptfs/file.c       | 2 +-
 fs/ecryptfs/inode.c      | 4 ++--
 fs/ecryptfs/keystore.c   | 2 +-
 fs/ecryptfs/main.c       | 4 ++--
 fs/ecryptfs/super.c      | 2 +-
 fs/efs/super.c           | 2 +-
 fs/eventpoll.c           | 4 ++--
 fs/exec.c                | 2 +-
 fs/ext2/super.c          | 2 +-
 fs/fat/cache.c           | 2 +-
 fs/fat/inode.c           | 2 +-
 fs/fcntl.c               | 2 +-
 fs/freevxfs/vxfs_inode.c | 4 ++--
 fs/fuse/dev.c            | 2 +-
 fs/fuse/inode.c          | 2 +-
 fs/hfs/super.c           | 2 +-
 fs/hfsplus/super.c       | 2 +-
 fs/hugetlbfs/inode.c     | 2 +-
 fs/inode.c               | 2 +-
 fs/isofs/inode.c         | 2 +-
 fs/jffs2/super.c         | 2 +-
 fs/locks.c               | 2 +-
 fs/minix/inode.c         | 2 +-
 fs/ncpfs/inode.c         | 2 +-
 fs/nfs/direct.c          | 2 +-
 fs/nfs/inode.c           | 2 +-
 fs/nfs/pagelist.c        | 2 +-
 fs/openpromfs/inode.c    | 2 +-
 fs/proc/inode.c          | 2 +-
 fs/qnx4/inode.c          | 2 +-
 fs/reiserfs/super.c      | 2 +-
 fs/romfs/inode.c         | 2 +-
 fs/smbfs/inode.c         | 2 +-
 fs/smbfs/request.c       | 2 +-
 fs/sysv/inode.c          | 2 +-
 fs/udf/super.c           | 2 +-
 fs/ufs/super.c           | 2 +-
 49 files changed, 55 insertions(+), 55 deletions(-)

(limited to 'fs')

diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 9ade139086..52eb10ca65 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -217,7 +217,7 @@ static kmem_cache_t *adfs_inode_cachep;
 static struct inode *adfs_alloc_inode(struct super_block *sb)
 {
 	struct adfs_inode_info *ei;
-	ei = (struct adfs_inode_info *)kmem_cache_alloc(adfs_inode_cachep, SLAB_KERNEL);
+	ei = (struct adfs_inode_info *)kmem_cache_alloc(adfs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 5ea72c3a16..81c73ec09f 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -71,7 +71,7 @@ static kmem_cache_t * affs_inode_cachep;
 static struct inode *affs_alloc_inode(struct super_block *sb)
 {
 	struct affs_inode_info *ei;
-	ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, SLAB_KERNEL);
+	ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	ei->vfs_inode.i_version = 1;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 67d1f5c819..c6ead009bf 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -412,7 +412,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
 	struct afs_vnode *vnode;
 
 	vnode = (struct afs_vnode *)
-		kmem_cache_alloc(afs_inode_cachep, SLAB_KERNEL);
+		kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL);
 	if (!vnode)
 		return NULL;
 
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 07f7144f0e..995348df94 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -277,7 +277,7 @@ befs_alloc_inode(struct super_block *sb)
 {
         struct befs_inode_info *bi;
         bi = (struct befs_inode_info *)kmem_cache_alloc(befs_inode_cachep,
-							SLAB_KERNEL);
+							GFP_KERNEL);
         if (!bi)
                 return NULL;
         return &bi->vfs_inode;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index ed27ffb345..2e45123c8f 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -233,7 +233,7 @@ static kmem_cache_t * bfs_inode_cachep;
 static struct inode *bfs_alloc_inode(struct super_block *sb)
 {
 	struct bfs_inode_info *bi;
-	bi = kmem_cache_alloc(bfs_inode_cachep, SLAB_KERNEL);
+	bi = kmem_cache_alloc(bfs_inode_cachep, GFP_KERNEL);
 	if (!bi)
 		return NULL;
 	return &bi->vfs_inode;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 36c0e7af9d..063506705f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -239,7 +239,7 @@ static kmem_cache_t * bdev_cachep __read_mostly;
 
 static struct inode *bdev_alloc_inode(struct super_block *sb)
 {
-	struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, SLAB_KERNEL);
+	struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 84976cdbe7..84168629ce 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -245,7 +245,7 @@ static struct inode *
 cifs_alloc_inode(struct super_block *sb)
 {
 	struct cifsInodeInfo *cifs_inode;
-	cifs_inode = kmem_cache_alloc(cifs_inode_cachep, SLAB_KERNEL);
+	cifs_inode = kmem_cache_alloc(cifs_inode_cachep, GFP_KERNEL);
 	if (!cifs_inode)
 		return NULL;
 	cifs_inode->cifsAttrs = 0x20;	/* default */
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 8355daff50..aedf683f01 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -153,7 +153,7 @@ cifs_buf_get(void)
    albeit slightly larger than necessary and maxbuffersize 
    defaults to this and can not be bigger */
 	ret_buf =
-	    (struct smb_hdr *) mempool_alloc(cifs_req_poolp, SLAB_KERNEL | GFP_NOFS);
+	    (struct smb_hdr *) mempool_alloc(cifs_req_poolp, GFP_KERNEL | GFP_NOFS);
 
 	/* clear the first few header bytes */
 	/* for most paths, more is cleared in header_assemble */
@@ -192,7 +192,7 @@ cifs_small_buf_get(void)
    albeit slightly larger than necessary and maxbuffersize 
    defaults to this and can not be bigger */
 	ret_buf =
-	    (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, SLAB_KERNEL | GFP_NOFS);
+	    (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, GFP_KERNEL | GFP_NOFS);
 	if (ret_buf) {
 	/* No need to clear memory here, cleared in header assemble */
 	/*	memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 7514237cf3..1f727765a8 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -51,7 +51,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
 	}
 	
 	temp = (struct mid_q_entry *) mempool_alloc(cifs_mid_poolp,
-						    SLAB_KERNEL | GFP_NOFS);
+						    GFP_KERNEL | GFP_NOFS);
 	if (temp == NULL)
 		return temp;
 	else {
@@ -118,7 +118,7 @@ AllocOplockQEntry(struct inode * pinode, __u16 fid, struct cifsTconInfo * tcon)
 		return NULL;
 	}
 	temp = (struct oplock_q_entry *) kmem_cache_alloc(cifs_oplock_cachep,
-						       SLAB_KERNEL);
+						       GFP_KERNEL);
 	if (temp == NULL)
 		return temp;
 	else {
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 88d1233211..50cedd2617 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -43,7 +43,7 @@ static kmem_cache_t * coda_inode_cachep;
 static struct inode *coda_alloc_inode(struct super_block *sb)
 {
 	struct coda_inode_info *ei;
-	ei = (struct coda_inode_info *)kmem_cache_alloc(coda_inode_cachep, SLAB_KERNEL);
+	ei = (struct coda_inode_info *)kmem_cache_alloc(coda_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	memset(&ei->c_fid, 0, sizeof(struct CodaFid));
diff --git a/fs/dnotify.c b/fs/dnotify.c
index 2b0442db67..e778b1737b 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -77,7 +77,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	inode = filp->f_dentry->d_inode;
 	if (!S_ISDIR(inode->i_mode))
 		return -ENOTDIR;
-	dn = kmem_cache_alloc(dn_cache, SLAB_KERNEL);
+	dn = kmem_cache_alloc(dn_cache, GFP_KERNEL);
 	if (dn == NULL)
 		return -ENOMEM;
 	spin_lock(&inode->i_lock);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 776b2eed37..7196f50fe1 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -628,7 +628,7 @@ int ecryptfs_decrypt_page(struct file *file, struct page *page)
 	num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size;
 	base_extent = (page->index * num_extents_per_page);
 	lower_page_virt = kmem_cache_alloc(ecryptfs_lower_page_cache,
-					   SLAB_KERNEL);
+					   GFP_KERNEL);
 	if (!lower_page_virt) {
 		rc = -ENOMEM;
 		ecryptfs_printk(KERN_ERR, "Error getting page for encrypted "
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index a92ef05eff..42099e779a 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -250,7 +250,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 	int lower_flags;
 
 	/* Released in ecryptfs_release or end of function if failure */
-	file_info = kmem_cache_alloc(ecryptfs_file_info_cache, SLAB_KERNEL);
+	file_info = kmem_cache_alloc(ecryptfs_file_info_cache, GFP_KERNEL);
 	ecryptfs_set_file_private(file, file_info);
 	if (!file_info) {
 		ecryptfs_printk(KERN_ERR,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 7091141204..8a1945a84c 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -369,7 +369,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 	BUG_ON(!atomic_read(&lower_dentry->d_count));
 	ecryptfs_set_dentry_private(dentry,
 				    kmem_cache_alloc(ecryptfs_dentry_info_cache,
-						     SLAB_KERNEL));
+						     GFP_KERNEL));
 	if (!ecryptfs_dentry_to_private(dentry)) {
 		rc = -ENOMEM;
 		ecryptfs_printk(KERN_ERR, "Out of memory whilst attempting "
@@ -795,7 +795,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
 	/* Released at out_free: label */
 	ecryptfs_set_file_private(&fake_ecryptfs_file,
 				  kmem_cache_alloc(ecryptfs_file_info_cache,
-						   SLAB_KERNEL));
+						   GFP_KERNEL));
 	if (unlikely(!ecryptfs_file_to_private(&fake_ecryptfs_file))) {
 		rc = -ENOMEM;
 		goto out;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index c3746f56d1..745c0f1bfb 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -207,7 +207,7 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
 	/* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or
 	 * at end of function upon failure */
 	auth_tok_list_item =
-	    kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, SLAB_KERNEL);
+	    kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, GFP_KERNEL);
 	if (!auth_tok_list_item) {
 		ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
 		rc = -ENOMEM;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index a78d87d14b..a2c6ccbce3 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -378,7 +378,7 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
 	/* Released in ecryptfs_put_super() */
 	ecryptfs_set_superblock_private(sb,
 					kmem_cache_alloc(ecryptfs_sb_info_cache,
-							 SLAB_KERNEL));
+							 GFP_KERNEL));
 	if (!ecryptfs_superblock_to_private(sb)) {
 		ecryptfs_printk(KERN_WARNING, "Out of memory\n");
 		rc = -ENOMEM;
@@ -402,7 +402,7 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
 	/* through deactivate_super(sb) from get_sb_nodev() */
 	ecryptfs_set_dentry_private(sb->s_root,
 				    kmem_cache_alloc(ecryptfs_dentry_info_cache,
-						     SLAB_KERNEL));
+						     GFP_KERNEL));
 	if (!ecryptfs_dentry_to_private(sb->s_root)) {
 		ecryptfs_printk(KERN_ERR,
 				"dentry_info_cache alloc failed\n");
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 825757ae48..eaa5daaf10 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -50,7 +50,7 @@ static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
 	struct inode *inode = NULL;
 
 	ecryptfs_inode = kmem_cache_alloc(ecryptfs_inode_info_cache,
-					  SLAB_KERNEL);
+					  GFP_KERNEL);
 	if (unlikely(!ecryptfs_inode))
 		goto out;
 	ecryptfs_init_crypt_stat(&ecryptfs_inode->crypt_stat);
diff --git a/fs/efs/super.c b/fs/efs/super.c
index b3f50651eb..69b15a996c 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -57,7 +57,7 @@ static kmem_cache_t * efs_inode_cachep;
 static struct inode *efs_alloc_inode(struct super_block *sb)
 {
 	struct efs_inode_info *ei;
-	ei = (struct efs_inode_info *)kmem_cache_alloc(efs_inode_cachep, SLAB_KERNEL);
+	ei = (struct efs_inode_info *)kmem_cache_alloc(efs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index ae228ec54e..f5c88435c6 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -961,7 +961,7 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
 	struct epitem *epi = ep_item_from_epqueue(pt);
 	struct eppoll_entry *pwq;
 
-	if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, SLAB_KERNEL))) {
+	if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) {
 		init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
 		pwq->whead = whead;
 		pwq->base = epi;
@@ -1004,7 +1004,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 	struct ep_pqueue epq;
 
 	error = -ENOMEM;
-	if (!(epi = kmem_cache_alloc(epi_cache, SLAB_KERNEL)))
+	if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL)))
 		goto eexit_1;
 
 	/* Item initialization follow here ... */
diff --git a/fs/exec.c b/fs/exec.c
index d993ea1a81..2092bd2074 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -404,7 +404,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
 		bprm->loader += stack_base;
 	bprm->exec += stack_base;
 
-	mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (!mpnt)
 		return -ENOMEM;
 
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index d8b9abd95d..85c237e738 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -140,7 +140,7 @@ static kmem_cache_t * ext2_inode_cachep;
 static struct inode *ext2_alloc_inode(struct super_block *sb)
 {
 	struct ext2_inode_info *ei;
-	ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, SLAB_KERNEL);
+	ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 #ifdef CONFIG_EXT2_FS_POSIX_ACL
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 82cc4f59e3..8c27227845 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -63,7 +63,7 @@ void fat_cache_destroy(void)
 
 static inline struct fat_cache *fat_cache_alloc(struct inode *inode)
 {
-	return kmem_cache_alloc(fat_cache_cachep, SLAB_KERNEL);
+	return kmem_cache_alloc(fat_cache_cachep, GFP_KERNEL);
 }
 
 static inline void fat_cache_free(struct fat_cache *cache)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 78945b53b0..b58fd0c9f3 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -482,7 +482,7 @@ static kmem_cache_t *fat_inode_cachep;
 static struct inode *fat_alloc_inode(struct super_block *sb)
 {
 	struct msdos_inode_info *ei;
-	ei = kmem_cache_alloc(fat_inode_cachep, SLAB_KERNEL);
+	ei = kmem_cache_alloc(fat_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index e4f26165f1..c03dc9cb21 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -567,7 +567,7 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
 	int result = 0;
 
 	if (on) {
-		new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL);
+		new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
 		if (!new)
 			return -ENOMEM;
 	}
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 4786d51ad3..d2dd0d7000 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -103,7 +103,7 @@ vxfs_blkiget(struct super_block *sbp, u_long extent, ino_t ino)
 		struct vxfs_inode_info	*vip;
 		struct vxfs_dinode	*dip;
 
-		if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, SLAB_KERNEL)))
+		if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL)))
 			goto fail;
 		dip = (struct vxfs_dinode *)(bp->b_data + offset);
 		memcpy(vip, dip, sizeof(*vip));
@@ -145,7 +145,7 @@ __vxfs_iget(ino_t ino, struct inode *ilistp)
 		struct vxfs_dinode	*dip;
 		caddr_t			kaddr = (char *)page_address(pp);
 
-		if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, SLAB_KERNEL)))
+		if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL)))
 			goto fail;
 		dip = (struct vxfs_dinode *)(kaddr + offset);
 		memcpy(vip, dip, sizeof(*vip));
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 66571eafbb..8c15139f27 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -41,7 +41,7 @@ static void fuse_request_init(struct fuse_req *req)
 
 struct fuse_req *fuse_request_alloc(void)
 {
-	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
+	struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
 	if (req)
 		fuse_request_init(req);
 	return req;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fc42035703..e039e2047c 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -46,7 +46,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 	struct inode *inode;
 	struct fuse_inode *fi;
 
-	inode = kmem_cache_alloc(fuse_inode_cachep, SLAB_KERNEL);
+	inode = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
 	if (!inode)
 		return NULL;
 
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 85b17b3fa4..ffc6409132 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -145,7 +145,7 @@ static struct inode *hfs_alloc_inode(struct super_block *sb)
 {
 	struct hfs_inode_info *i;
 
-	i = kmem_cache_alloc(hfs_inode_cachep, SLAB_KERNEL);
+	i = kmem_cache_alloc(hfs_inode_cachep, GFP_KERNEL);
 	return i ? &i->vfs_inode : NULL;
 }
 
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 194eede52f..4a0c70c76c 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -440,7 +440,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb)
 {
 	struct hfsplus_inode_info *i;
 
-	i = kmem_cache_alloc(hfsplus_inode_cachep, SLAB_KERNEL);
+	i = kmem_cache_alloc(hfsplus_inode_cachep, GFP_KERNEL);
 	return i ? &i->vfs_inode : NULL;
 }
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 7f4756963d..36e52173a5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -522,7 +522,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 
 	if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo)))
 		return NULL;
-	p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL);
+	p = kmem_cache_alloc(hugetlbfs_inode_cachep, GFP_KERNEL);
 	if (unlikely(!p)) {
 		hugetlbfs_inc_free_inodes(sbinfo);
 		return NULL;
diff --git a/fs/inode.c b/fs/inode.c
index 26cdb115ce..dd15984d51 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -109,7 +109,7 @@ static struct inode *alloc_inode(struct super_block *sb)
 	if (sb->s_op->alloc_inode)
 		inode = sb->s_op->alloc_inode(sb);
 	else
-		inode = (struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL);
+		inode = (struct inode *) kmem_cache_alloc(inode_cachep, GFP_KERNEL);
 
 	if (inode) {
 		struct address_space * const mapping = &inode->i_data;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index c34b862cdb..4b6381cd2c 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -62,7 +62,7 @@ static kmem_cache_t *isofs_inode_cachep;
 static struct inode *isofs_alloc_inode(struct super_block *sb)
 {
 	struct iso_inode_info *ei;
-	ei = kmem_cache_alloc(isofs_inode_cachep, SLAB_KERNEL);
+	ei = kmem_cache_alloc(isofs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index bc4b8106a4..77be534ce4 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -33,7 +33,7 @@ static kmem_cache_t *jffs2_inode_cachep;
 static struct inode *jffs2_alloc_inode(struct super_block *sb)
 {
 	struct jffs2_inode_info *ei;
-	ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, SLAB_KERNEL);
+	ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/locks.c b/fs/locks.c
index e0b6a80649..a7b97d50c1 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -147,7 +147,7 @@ static kmem_cache_t *filelock_cache __read_mostly;
 /* Allocate an empty lock structure. */
 static struct file_lock *locks_alloc_lock(void)
 {
-	return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
+	return kmem_cache_alloc(filelock_cache, GFP_KERNEL);
 }
 
 static void locks_release_private(struct file_lock *fl)
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 1e36bae4d0..ce532c2ded 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -56,7 +56,7 @@ static kmem_cache_t * minix_inode_cachep;
 static struct inode *minix_alloc_inode(struct super_block *sb)
 {
 	struct minix_inode_info *ei;
-	ei = (struct minix_inode_info *)kmem_cache_alloc(minix_inode_cachep, SLAB_KERNEL);
+	ei = (struct minix_inode_info *)kmem_cache_alloc(minix_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 72dad552aa..ed84d89922 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -45,7 +45,7 @@ static kmem_cache_t * ncp_inode_cachep;
 static struct inode *ncp_alloc_inode(struct super_block *sb)
 {
 	struct ncp_inode_info *ei;
-	ei = (struct ncp_inode_info *)kmem_cache_alloc(ncp_inode_cachep, SLAB_KERNEL);
+	ei = (struct ncp_inode_info *)kmem_cache_alloc(ncp_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bdfabf854a..769fd0a0c7 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -143,7 +143,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 {
 	struct nfs_direct_req *dreq;
 
-	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+	dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL);
 	if (!dreq)
 		return NULL;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 08cc4c5919..6b53aae4ed 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1080,7 +1080,7 @@ void nfs4_clear_inode(struct inode *inode)
 struct inode *nfs_alloc_inode(struct super_block *sb)
 {
 	struct nfs_inode *nfsi;
-	nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL);
+	nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL);
 	if (!nfsi)
 		return NULL;
 	nfsi->flags = 0UL;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 829af323f2..a1561a820a 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -26,7 +26,7 @@ static inline struct nfs_page *
 nfs_page_alloc(void)
 {
 	struct nfs_page	*p;
-	p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL);
+	p = kmem_cache_alloc(nfs_page_cachep, GFP_KERNEL);
 	if (p) {
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->wb_list);
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 592a6402e8..911d1bcfc5 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -336,7 +336,7 @@ static struct inode *openprom_alloc_inode(struct super_block *sb)
 {
 	struct op_inode_info *oi;
 
-	oi = kmem_cache_alloc(op_inode_cachep, SLAB_KERNEL);
+	oi = kmem_cache_alloc(op_inode_cachep, GFP_KERNEL);
 	if (!oi)
 		return NULL;
 
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 49dfb2ab78..b24cdb2f17 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -88,7 +88,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
 	struct proc_inode *ei;
 	struct inode *inode;
 
-	ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL);
+	ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	ei->pid = NULL;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 5a41db2a21..5b943eb11d 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -520,7 +520,7 @@ static kmem_cache_t *qnx4_inode_cachep;
 static struct inode *qnx4_alloc_inode(struct super_block *sb)
 {
 	struct qnx4_inode_info *ei;
-	ei = kmem_cache_alloc(qnx4_inode_cachep, SLAB_KERNEL);
+	ei = kmem_cache_alloc(qnx4_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1724999411..32332516d6 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -496,7 +496,7 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
 {
 	struct reiserfs_inode_info *ei;
 	ei = (struct reiserfs_inode_info *)
-	    kmem_cache_alloc(reiserfs_inode_cachep, SLAB_KERNEL);
+	    kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index ddcd9e1ef2..d1b455f9b6 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -555,7 +555,7 @@ static kmem_cache_t * romfs_inode_cachep;
 static struct inode *romfs_alloc_inode(struct super_block *sb)
 {
 	struct romfs_inode_info *ei;
-	ei = (struct romfs_inode_info *)kmem_cache_alloc(romfs_inode_cachep, SLAB_KERNEL);
+	ei = (struct romfs_inode_info *)kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 2c122ee83a..2216171034 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -55,7 +55,7 @@ static kmem_cache_t *smb_inode_cachep;
 static struct inode *smb_alloc_inode(struct super_block *sb)
 {
 	struct smb_inode_info *ei;
-	ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, SLAB_KERNEL);
+	ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index 0fb74697ab..3eb1402191 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -61,7 +61,7 @@ static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
 	struct smb_request *req;
 	unsigned char *buf = NULL;
 
-	req = kmem_cache_alloc(req_cachep, SLAB_KERNEL);
+	req = kmem_cache_alloc(req_cachep, GFP_KERNEL);
 	VERBOSE("allocating request: %p\n", req);
 	if (!req)
 		goto out;
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index d63c5e48b0..a6ca12b747 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -307,7 +307,7 @@ static struct inode *sysv_alloc_inode(struct super_block *sb)
 {
 	struct sysv_inode_info *si;
 
-	si = kmem_cache_alloc(sysv_inode_cachep, SLAB_KERNEL);
+	si = kmem_cache_alloc(sysv_inode_cachep, GFP_KERNEL);
 	if (!si)
 		return NULL;
 	return &si->vfs_inode;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 1aea6a4f9a..e50f24221d 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -112,7 +112,7 @@ static kmem_cache_t * udf_inode_cachep;
 static struct inode *udf_alloc_inode(struct super_block *sb)
 {
 	struct udf_inode_info *ei;
-	ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, SLAB_KERNEL);
+	ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index ec79e3091d..85a88c0c5e 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1209,7 +1209,7 @@ static kmem_cache_t * ufs_inode_cachep;
 static struct inode *ufs_alloc_inode(struct super_block *sb)
 {
 	struct ufs_inode_info *ei;
-	ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, SLAB_KERNEL);
+	ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
 	ei->vfs_inode.i_version = 1;
-- 
cgit v1.2.2


From e18b890bb0881bbab6f4f1a6cd20d9c60d66b003 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Wed, 6 Dec 2006 20:33:20 -0800
Subject: [PATCH] slab: remove kmem_cache_t

Replace all uses of kmem_cache_t with struct kmem_cache.

The patch was generated using the following script:

	#!/bin/sh
	#
	# Replace one string by another in all the kernel sources.
	#

	set -e

	for file in `find * -name "*.c" -o -name "*.h"|xargs grep -l $1`; do
		quilt add $file
		sed -e "1,\$s/$1/$2/g" $file >/tmp/$$
		mv /tmp/$$ $file
		quilt refresh
	done

The script was run like this

	sh replace kmem_cache_t "struct kmem_cache"

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/adfs/super.c                 |  4 ++--
 fs/affs/super.c                 |  4 ++--
 fs/afs/super.c                  |  6 +++---
 fs/aio.c                        |  4 ++--
 fs/befs/linuxvfs.c              |  4 ++--
 fs/bfs/inode.c                  |  4 ++--
 fs/bio.c                        |  4 ++--
 fs/block_dev.c                  |  4 ++--
 fs/buffer.c                     |  4 ++--
 fs/cifs/cifsfs.c                | 14 +++++++-------
 fs/cifs/transport.c             |  2 +-
 fs/coda/inode.c                 |  4 ++--
 fs/configfs/configfs_internal.h |  2 +-
 fs/configfs/mount.c             |  2 +-
 fs/dcache.c                     |  6 +++---
 fs/dcookies.c                   |  2 +-
 fs/dlm/memory.c                 |  2 +-
 fs/dnotify.c                    |  2 +-
 fs/dquot.c                      |  2 +-
 fs/ecryptfs/main.c              |  2 +-
 fs/efs/super.c                  |  4 ++--
 fs/eventpoll.c                  |  4 ++--
 fs/ext2/super.c                 |  4 ++--
 fs/ext3/super.c                 |  4 ++--
 fs/ext4/super.c                 |  4 ++--
 fs/fat/cache.c                  |  4 ++--
 fs/fat/inode.c                  |  4 ++--
 fs/fcntl.c                      |  2 +-
 fs/freevxfs/vxfs_inode.c        |  2 +-
 fs/fuse/dev.c                   |  2 +-
 fs/fuse/inode.c                 |  4 ++--
 fs/gfs2/main.c                  |  4 ++--
 fs/gfs2/util.c                  |  6 +++---
 fs/gfs2/util.h                  |  6 +++---
 fs/hfs/super.c                  |  4 ++--
 fs/hfsplus/super.c              |  4 ++--
 fs/hpfs/super.c                 |  4 ++--
 fs/hugetlbfs/inode.c            |  4 ++--
 fs/inode.c                      |  4 ++--
 fs/inotify_user.c               |  4 ++--
 fs/isofs/inode.c                |  4 ++--
 fs/jbd/journal.c                |  6 +++---
 fs/jbd/revoke.c                 |  4 ++--
 fs/jbd2/journal.c               |  6 +++---
 fs/jbd2/revoke.c                |  4 ++--
 fs/jffs/inode-v23.c             |  4 ++--
 fs/jffs/jffs_fm.c               |  4 ++--
 fs/jffs2/malloc.c               | 18 +++++++++---------
 fs/jffs2/super.c                |  4 ++--
 fs/jfs/jfs_metapage.c           |  4 ++--
 fs/jfs/super.c                  |  4 ++--
 fs/locks.c                      |  4 ++--
 fs/mbcache.c                    |  2 +-
 fs/minix/inode.c                |  4 ++--
 fs/namespace.c                  |  2 +-
 fs/ncpfs/inode.c                |  4 ++--
 fs/nfs/direct.c                 |  2 +-
 fs/nfs/inode.c                  |  4 ++--
 fs/nfs/pagelist.c               |  2 +-
 fs/nfs/read.c                   |  2 +-
 fs/nfs/write.c                  |  2 +-
 fs/nfsd/nfs4state.c             | 10 +++++-----
 fs/ocfs2/dlm/dlmfs.c            |  4 ++--
 fs/ocfs2/dlm/dlmmaster.c        |  2 +-
 fs/ocfs2/extent_map.c           |  2 +-
 fs/ocfs2/inode.h                |  2 +-
 fs/ocfs2/super.c                |  4 ++--
 fs/ocfs2/uptodate.c             |  2 +-
 fs/openpromfs/inode.c           |  4 ++--
 fs/proc/inode.c                 |  4 ++--
 fs/qnx4/inode.c                 |  4 ++--
 fs/reiserfs/super.c             |  4 ++--
 fs/romfs/inode.c                |  4 ++--
 fs/smbfs/inode.c                |  4 ++--
 fs/smbfs/request.c              |  2 +-
 fs/sysfs/mount.c                |  2 +-
 fs/sysfs/sysfs.h                |  2 +-
 fs/sysv/inode.c                 |  4 ++--
 fs/udf/super.c                  |  4 ++--
 fs/ufs/super.c                  |  4 ++--
 80 files changed, 157 insertions(+), 157 deletions(-)

(limited to 'fs')

diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 52eb10ca65..e5a205c9f4 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -212,7 +212,7 @@ static int adfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static kmem_cache_t *adfs_inode_cachep;
+static struct kmem_cache *adfs_inode_cachep;
 
 static struct inode *adfs_alloc_inode(struct super_block *sb)
 {
@@ -228,7 +228,7 @@ static void adfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
 
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 81c73ec09f..3de93e7999 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -66,7 +66,7 @@ affs_write_super(struct super_block *sb)
 	pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean);
 }
 
-static kmem_cache_t * affs_inode_cachep;
+static struct kmem_cache * affs_inode_cachep;
 
 static struct inode *affs_alloc_inode(struct super_block *sb)
 {
@@ -83,7 +83,7 @@ static void affs_destroy_inode(struct inode *inode)
 	kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct affs_inode_info *ei = (struct affs_inode_info *) foo;
 
diff --git a/fs/afs/super.c b/fs/afs/super.c
index c6ead009bf..9a351c4c75 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -35,7 +35,7 @@ struct afs_mount_params {
 	struct afs_volume	*volume;
 };
 
-static void afs_i_init_once(void *foo, kmem_cache_t *cachep,
+static void afs_i_init_once(void *foo, struct kmem_cache *cachep,
 			    unsigned long flags);
 
 static int afs_get_sb(struct file_system_type *fs_type,
@@ -65,7 +65,7 @@ static struct super_operations afs_super_ops = {
 	.put_super	= afs_put_super,
 };
 
-static kmem_cache_t *afs_inode_cachep;
+static struct kmem_cache *afs_inode_cachep;
 static atomic_t afs_count_active_inodes;
 
 /*****************************************************************************/
@@ -384,7 +384,7 @@ static void afs_put_super(struct super_block *sb)
 /*
  * initialise an inode cache slab element prior to any use
  */
-static void afs_i_init_once(void *_vnode, kmem_cache_t *cachep,
+static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep,
 			    unsigned long flags)
 {
 	struct afs_vnode *vnode = (struct afs_vnode *) _vnode;
diff --git a/fs/aio.c b/fs/aio.c
index 287a1bc7a1..13aa9298ab 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -47,8 +47,8 @@ unsigned long aio_nr;		/* current system wide number of aio requests */
 unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */
 /*----end sysctl variables---*/
 
-static kmem_cache_t	*kiocb_cachep;
-static kmem_cache_t	*kioctx_cachep;
+static struct kmem_cache	*kiocb_cachep;
+static struct kmem_cache	*kioctx_cachep;
 
 static struct workqueue_struct *aio_wq;
 
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 995348df94..bce402eee5 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -61,7 +61,7 @@ static const struct super_operations befs_sops = {
 };
 
 /* slab cache for befs_inode_info objects */
-static kmem_cache_t *befs_inode_cachep;
+static struct kmem_cache *befs_inode_cachep;
 
 static const struct file_operations befs_dir_operations = {
 	.read		= generic_read_dir,
@@ -289,7 +289,7 @@ befs_destroy_inode(struct inode *inode)
         kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
         struct befs_inode_info *bi = (struct befs_inode_info *) foo;
 	
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 2e45123c8f..eac175ed9f 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -228,7 +228,7 @@ static void bfs_write_super(struct super_block *s)
 	unlock_kernel();
 }
 
-static kmem_cache_t * bfs_inode_cachep;
+static struct kmem_cache * bfs_inode_cachep;
 
 static struct inode *bfs_alloc_inode(struct super_block *sb)
 {
@@ -244,7 +244,7 @@ static void bfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct bfs_inode_info *bi = foo;
 
diff --git a/fs/bio.c b/fs/bio.c
index 50c40ce2ce..7ec737eda7 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -30,7 +30,7 @@
 
 #define BIO_POOL_SIZE 256
 
-static kmem_cache_t *bio_slab __read_mostly;
+static struct kmem_cache *bio_slab __read_mostly;
 
 #define BIOVEC_NR_POOLS 6
 
@@ -44,7 +44,7 @@ mempool_t *bio_split_pool __read_mostly;
 struct biovec_slab {
 	int nr_vecs;
 	char *name; 
-	kmem_cache_t *slab;
+	struct kmem_cache *slab;
 };
 
 /*
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 063506705f..13816b4d76 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -235,7 +235,7 @@ static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
  */
 
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
-static kmem_cache_t * bdev_cachep __read_mostly;
+static struct kmem_cache * bdev_cachep __read_mostly;
 
 static struct inode *bdev_alloc_inode(struct super_block *sb)
 {
@@ -253,7 +253,7 @@ static void bdev_destroy_inode(struct inode *inode)
 	kmem_cache_free(bdev_cachep, bdi);
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct bdev_inode *ei = (struct bdev_inode *) foo;
 	struct block_device *bdev = &ei->bdev;
diff --git a/fs/buffer.c b/fs/buffer.c
index 35527dca1d..a8ca0ac214 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2908,7 +2908,7 @@ asmlinkage long sys_bdflush(int func, long data)
 /*
  * Buffer-head allocation
  */
-static kmem_cache_t *bh_cachep;
+static struct kmem_cache *bh_cachep;
 
 /*
  * Once the number of bh's in the machine exceeds this level, we start
@@ -2961,7 +2961,7 @@ void free_buffer_head(struct buffer_head *bh)
 EXPORT_SYMBOL(free_buffer_head);
 
 static void
-init_buffer_head(void *data, kmem_cache_t *cachep, unsigned long flags)
+init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags)
 {
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 			    SLAB_CTOR_CONSTRUCTOR) {
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 84168629ce..e6b5866e50 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -81,7 +81,7 @@ extern mempool_t *cifs_sm_req_poolp;
 extern mempool_t *cifs_req_poolp;
 extern mempool_t *cifs_mid_poolp;
 
-extern kmem_cache_t *cifs_oplock_cachep;
+extern struct kmem_cache *cifs_oplock_cachep;
 
 static int
 cifs_read_super(struct super_block *sb, void *data,
@@ -232,11 +232,11 @@ static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd)
 		return generic_permission(inode, mask, NULL);
 }
 
-static kmem_cache_t *cifs_inode_cachep;
-static kmem_cache_t *cifs_req_cachep;
-static kmem_cache_t *cifs_mid_cachep;
-kmem_cache_t *cifs_oplock_cachep;
-static kmem_cache_t *cifs_sm_req_cachep;
+static struct kmem_cache *cifs_inode_cachep;
+static struct kmem_cache *cifs_req_cachep;
+static struct kmem_cache *cifs_mid_cachep;
+struct kmem_cache *cifs_oplock_cachep;
+static struct kmem_cache *cifs_sm_req_cachep;
 mempool_t *cifs_sm_req_poolp;
 mempool_t *cifs_req_poolp;
 mempool_t *cifs_mid_poolp;
@@ -668,7 +668,7 @@ const struct file_operations cifs_dir_ops = {
 };
 
 static void
-cifs_init_once(void *inode, kmem_cache_t * cachep, unsigned long flags)
+cifs_init_once(void *inode, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct cifsInodeInfo *cifsi = inode;
 
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 1f727765a8..f80007eaeb 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -34,7 +34,7 @@
 #include "cifs_debug.h"
   
 extern mempool_t *cifs_mid_poolp;
-extern kmem_cache_t *cifs_oplock_cachep;
+extern struct kmem_cache *cifs_oplock_cachep;
 
 static struct mid_q_entry *
 AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 50cedd2617..b64659fa82 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -38,7 +38,7 @@ static void coda_clear_inode(struct inode *);
 static void coda_put_super(struct super_block *);
 static int coda_statfs(struct dentry *dentry, struct kstatfs *buf);
 
-static kmem_cache_t * coda_inode_cachep;
+static struct kmem_cache * coda_inode_cachep;
 
 static struct inode *coda_alloc_inode(struct super_block *sb)
 {
@@ -58,7 +58,7 @@ static void coda_destroy_inode(struct inode *inode)
 	kmem_cache_free(coda_inode_cachep, ITOC(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct coda_inode_info *ei = (struct coda_inode_info *) foo;
 
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 3f4ff7a242..f92cd303d2 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -49,7 +49,7 @@ struct configfs_dirent {
 #define CONFIGFS_NOT_PINNED	(CONFIGFS_ITEM_ATTR)
 
 extern struct vfsmount * configfs_mount;
-extern kmem_cache_t *configfs_dir_cachep;
+extern struct kmem_cache *configfs_dir_cachep;
 
 extern int configfs_is_root(struct config_item *item);
 
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 68bd5c93ca..ed678529eb 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -38,7 +38,7 @@
 
 struct vfsmount * configfs_mount = NULL;
 struct super_block * configfs_sb = NULL;
-kmem_cache_t *configfs_dir_cachep;
+struct kmem_cache *configfs_dir_cachep;
 static int configfs_mnt_count = 0;
 
 static struct super_operations configfs_ops = {
diff --git a/fs/dcache.c b/fs/dcache.c
index fd4a428998..a7c67cee5d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -43,7 +43,7 @@ static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
 EXPORT_SYMBOL(dcache_lock);
 
-static kmem_cache_t *dentry_cache __read_mostly;
+static struct kmem_cache *dentry_cache __read_mostly;
 
 #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
 
@@ -2072,10 +2072,10 @@ static void __init dcache_init(unsigned long mempages)
 }
 
 /* SLAB cache for __getname() consumers */
-kmem_cache_t *names_cachep __read_mostly;
+struct kmem_cache *names_cachep __read_mostly;
 
 /* SLAB cache for file structures */
-kmem_cache_t *filp_cachep __read_mostly;
+struct kmem_cache *filp_cachep __read_mostly;
 
 EXPORT_SYMBOL(d_genocide);
 
diff --git a/fs/dcookies.c b/fs/dcookies.c
index 0c4b067485..21af1629f9 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -37,7 +37,7 @@ struct dcookie_struct {
 
 static LIST_HEAD(dcookie_users);
 static DEFINE_MUTEX(dcookie_mutex);
-static kmem_cache_t *dcookie_cache __read_mostly;
+static struct kmem_cache *dcookie_cache __read_mostly;
 static struct list_head *dcookie_hashtable __read_mostly;
 static size_t hash_size __read_mostly;
 
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 989b608fd8..5352b03ff5 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -15,7 +15,7 @@
 #include "config.h"
 #include "memory.h"
 
-static kmem_cache_t *lkb_cache;
+static struct kmem_cache *lkb_cache;
 
 
 int dlm_memory_init(void)
diff --git a/fs/dnotify.c b/fs/dnotify.c
index e778b1737b..1f26a2b9ee 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -23,7 +23,7 @@
 
 int dir_notify_enable __read_mostly = 1;
 
-static kmem_cache_t *dn_cache __read_mostly;
+static struct kmem_cache *dn_cache __read_mostly;
 
 static void redo_inode_mask(struct inode *inode)
 {
diff --git a/fs/dquot.c b/fs/dquot.c
index c6ae6c0e0c..f9cd5e23eb 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -131,7 +131,7 @@ static struct quota_format_type *quota_formats;	/* List of registered formats */
 static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
 
 /* SLAB cache for dquot structures */
-static kmem_cache_t *dquot_cachep;
+static struct kmem_cache *dquot_cachep;
 
 int register_quota_format(struct quota_format_type *fmt)
 {
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index a2c6ccbce3..306f8fbd1a 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -546,7 +546,7 @@ inode_info_init_once(void *vptr, struct kmem_cache *cachep, unsigned long flags)
 }
 
 static struct ecryptfs_cache_info {
-	kmem_cache_t **cache;
+	struct kmem_cache **cache;
 	const char *name;
 	size_t size;
 	void (*ctor)(void*, struct kmem_cache *, unsigned long);
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 69b15a996c..dfebf21289 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -52,7 +52,7 @@ static struct pt_types sgi_pt_types[] = {
 };
 
 
-static kmem_cache_t * efs_inode_cachep;
+static struct kmem_cache * efs_inode_cachep;
 
 static struct inode *efs_alloc_inode(struct super_block *sb)
 {
@@ -68,7 +68,7 @@ static void efs_destroy_inode(struct inode *inode)
 	kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct efs_inode_info *ei = (struct efs_inode_info *) foo;
 
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index f5c88435c6..88a6f8d0b8 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -283,10 +283,10 @@ static struct mutex epmutex;
 static struct poll_safewake psw;
 
 /* Slab cache used to allocate "struct epitem" */
-static kmem_cache_t *epi_cache __read_mostly;
+static struct kmem_cache *epi_cache __read_mostly;
 
 /* Slab cache used to allocate "struct eppoll_entry" */
-static kmem_cache_t *pwq_cache __read_mostly;
+static struct kmem_cache *pwq_cache __read_mostly;
 
 /* Virtual fs used to allocate inodes for eventpoll files */
 static struct vfsmount *eventpoll_mnt __read_mostly;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 85c237e738..3aafb1dd91 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -135,7 +135,7 @@ static void ext2_put_super (struct super_block * sb)
 	return;
 }
 
-static kmem_cache_t * ext2_inode_cachep;
+static struct kmem_cache * ext2_inode_cachep;
 
 static struct inode *ext2_alloc_inode(struct super_block *sb)
 {
@@ -156,7 +156,7 @@ static void ext2_destroy_inode(struct inode *inode)
 	kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 0cf633f0cf..9856565d4d 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -436,7 +436,7 @@ static void ext3_put_super (struct super_block * sb)
 	return;
 }
 
-static kmem_cache_t *ext3_inode_cachep;
+static struct kmem_cache *ext3_inode_cachep;
 
 /*
  * Called inside transaction, so use GFP_NOFS
@@ -462,7 +462,7 @@ static void ext3_destroy_inode(struct inode *inode)
 	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c730cbc840..f2e8c4aa0f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -486,7 +486,7 @@ static void ext4_put_super (struct super_block * sb)
 	return;
 }
 
-static kmem_cache_t *ext4_inode_cachep;
+static struct kmem_cache *ext4_inode_cachep;
 
 /*
  * Called inside transaction, so use GFP_NOFS
@@ -513,7 +513,7 @@ static void ext4_destroy_inode(struct inode *inode)
 	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 8c27227845..05c2941c74 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -34,9 +34,9 @@ static inline int fat_max_cache(struct inode *inode)
 	return FAT_MAX_CACHE;
 }
 
-static kmem_cache_t *fat_cache_cachep;
+static struct kmem_cache *fat_cache_cachep;
 
-static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct fat_cache *cache = (struct fat_cache *)foo;
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index b58fd0c9f3..a9e4688582 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -477,7 +477,7 @@ static void fat_put_super(struct super_block *sb)
 	kfree(sbi);
 }
 
-static kmem_cache_t *fat_inode_cachep;
+static struct kmem_cache *fat_inode_cachep;
 
 static struct inode *fat_alloc_inode(struct super_block *sb)
 {
@@ -493,7 +493,7 @@ static void fat_destroy_inode(struct inode *inode)
 	kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
 
diff --git a/fs/fcntl.c b/fs/fcntl.c
index c03dc9cb21..4740d35e52 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -553,7 +553,7 @@ int send_sigurg(struct fown_struct *fown)
 }
 
 static DEFINE_RWLOCK(fasync_lock);
-static kmem_cache_t *fasync_cache __read_mostly;
+static struct kmem_cache *fasync_cache __read_mostly;
 
 /*
  * fasync_helper() is used by some character device drivers (mainly mice)
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index d2dd0d7000..0b7ae897cb 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -46,7 +46,7 @@ extern const struct address_space_operations vxfs_immed_aops;
 
 extern struct inode_operations vxfs_immed_symlink_iops;
 
-kmem_cache_t		*vxfs_inode_cachep;
+struct kmem_cache		*vxfs_inode_cachep;
 
 
 #ifdef DIAGNOSTIC
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8c15139f27..357764d85f 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -19,7 +19,7 @@
 
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 
-static kmem_cache_t *fuse_req_cachep;
+static struct kmem_cache *fuse_req_cachep;
 
 static struct fuse_conn *fuse_get_conn(struct file *file)
 {
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e039e2047c..2bdc652b8b 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -22,7 +22,7 @@ MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
 MODULE_DESCRIPTION("Filesystem in Userspace");
 MODULE_LICENSE("GPL");
 
-static kmem_cache_t *fuse_inode_cachep;
+static struct kmem_cache *fuse_inode_cachep;
 struct list_head fuse_conn_list;
 DEFINE_MUTEX(fuse_mutex);
 
@@ -601,7 +601,7 @@ static struct file_system_type fuse_fs_type = {
 static decl_subsys(fuse, NULL, NULL);
 static decl_subsys(connections, NULL, NULL);
 
-static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep,
+static void fuse_inode_init_once(void *foo, struct kmem_cache *cachep,
 				 unsigned long flags)
 {
 	struct inode * inode = foo;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 9889c1eace..7c1a9e22a5 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -25,7 +25,7 @@
 #include "util.h"
 #include "glock.h"
 
-static void gfs2_init_inode_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void gfs2_init_inode_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct gfs2_inode *ip = foo;
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
@@ -37,7 +37,7 @@ static void gfs2_init_inode_once(void *foo, kmem_cache_t *cachep, unsigned long
 	}
 }
 
-static void gfs2_init_glock_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct gfs2_glock *gl = foo;
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 196c604faa..e5707a9f78 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -23,9 +23,9 @@
 #include "lm.h"
 #include "util.h"
 
-kmem_cache_t *gfs2_glock_cachep __read_mostly;
-kmem_cache_t *gfs2_inode_cachep __read_mostly;
-kmem_cache_t *gfs2_bufdata_cachep __read_mostly;
+struct kmem_cache *gfs2_glock_cachep __read_mostly;
+struct kmem_cache *gfs2_inode_cachep __read_mostly;
+struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
 
 void gfs2_assert_i(struct gfs2_sbd *sdp)
 {
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 76a50899fe..7984dcf89a 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -146,9 +146,9 @@ int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
 gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__);
 
 
-extern kmem_cache_t *gfs2_glock_cachep;
-extern kmem_cache_t *gfs2_inode_cachep;
-extern kmem_cache_t *gfs2_bufdata_cachep;
+extern struct kmem_cache *gfs2_glock_cachep;
+extern struct kmem_cache *gfs2_inode_cachep;
+extern struct kmem_cache *gfs2_bufdata_cachep;
 
 static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
 					   unsigned int *p)
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index ffc6409132..a369879660 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -24,7 +24,7 @@
 #include "hfs_fs.h"
 #include "btree.h"
 
-static kmem_cache_t *hfs_inode_cachep;
+static struct kmem_cache *hfs_inode_cachep;
 
 MODULE_LICENSE("GPL");
 
@@ -430,7 +430,7 @@ static struct file_system_type hfs_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static void hfs_init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
+static void hfs_init_once(void *p, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct hfs_inode_info *i = p;
 
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 4a0c70c76c..0f513c6bf8 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -434,7 +434,7 @@ MODULE_AUTHOR("Brad Boyer");
 MODULE_DESCRIPTION("Extended Macintosh Filesystem");
 MODULE_LICENSE("GPL");
 
-static kmem_cache_t *hfsplus_inode_cachep;
+static struct kmem_cache *hfsplus_inode_cachep;
 
 static struct inode *hfsplus_alloc_inode(struct super_block *sb)
 {
@@ -467,7 +467,7 @@ static struct file_system_type hfsplus_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static void hfsplus_init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
+static void hfsplus_init_once(void *p, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct hfsplus_inode_info *i = p;
 
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 46ceadd6f1..34d68e2117 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -160,7 +160,7 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static kmem_cache_t * hpfs_inode_cachep;
+static struct kmem_cache * hpfs_inode_cachep;
 
 static struct inode *hpfs_alloc_inode(struct super_block *sb)
 {
@@ -177,7 +177,7 @@ static void hpfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 36e52173a5..0706f5aac6 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -513,7 +513,7 @@ static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo)
 }
 
 
-static kmem_cache_t *hugetlbfs_inode_cachep;
+static struct kmem_cache *hugetlbfs_inode_cachep;
 
 static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 {
@@ -545,7 +545,7 @@ static const struct address_space_operations hugetlbfs_aops = {
 };
 
 
-static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
 
diff --git a/fs/inode.c b/fs/inode.c
index dd15984d51..699aa4f7aa 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -97,7 +97,7 @@ static DEFINE_MUTEX(iprune_mutex);
  */
 struct inodes_stat_t inodes_stat;
 
-static kmem_cache_t * inode_cachep __read_mostly;
+static struct kmem_cache * inode_cachep __read_mostly;
 
 static struct inode *alloc_inode(struct super_block *sb)
 {
@@ -209,7 +209,7 @@ void inode_init_once(struct inode *inode)
 
 EXPORT_SYMBOL(inode_init_once);
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct inode * inode = (struct inode *) foo;
 
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 017cb0f134..e1956e6f11 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -34,8 +34,8 @@
 
 #include <asm/ioctls.h>
 
-static kmem_cache_t *watch_cachep __read_mostly;
-static kmem_cache_t *event_cachep __read_mostly;
+static struct kmem_cache *watch_cachep __read_mostly;
+static struct kmem_cache *event_cachep __read_mostly;
 
 static struct vfsmount *inotify_mnt __read_mostly;
 
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 4b6381cd2c..ea55b6c469 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -57,7 +57,7 @@ static void isofs_put_super(struct super_block *sb)
 static void isofs_read_inode(struct inode *);
 static int isofs_statfs (struct dentry *, struct kstatfs *);
 
-static kmem_cache_t *isofs_inode_cachep;
+static struct kmem_cache *isofs_inode_cachep;
 
 static struct inode *isofs_alloc_inode(struct super_block *sb)
 {
@@ -73,7 +73,7 @@ static void isofs_destroy_inode(struct inode *inode)
 	kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
 }
 
-static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct iso_inode_info *ei = foo;
 
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b85c686b60..a8774bed20 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1630,7 +1630,7 @@ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
 #define JBD_MAX_SLABS 5
 #define JBD_SLAB_INDEX(size)  (size >> 11)
 
-static kmem_cache_t *jbd_slab[JBD_MAX_SLABS];
+static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
 static const char *jbd_slab_names[JBD_MAX_SLABS] = {
 	"jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
 };
@@ -1693,7 +1693,7 @@ void jbd_slab_free(void *ptr,  size_t size)
 /*
  * Journal_head storage management
  */
-static kmem_cache_t *journal_head_cache;
+static struct kmem_cache *journal_head_cache;
 #ifdef CONFIG_JBD_DEBUG
 static atomic_t nr_journal_heads = ATOMIC_INIT(0);
 #endif
@@ -1996,7 +1996,7 @@ static void __exit remove_jbd_proc_entry(void)
 
 #endif
 
-kmem_cache_t *jbd_handle_cache;
+struct kmem_cache *jbd_handle_cache;
 
 static int __init journal_init_handle_cache(void)
 {
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index c532429d8d..d204ab394f 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -70,8 +70,8 @@
 #include <linux/init.h>
 #endif
 
-static kmem_cache_t *revoke_record_cache;
-static kmem_cache_t *revoke_table_cache;
+static struct kmem_cache *revoke_record_cache;
+static struct kmem_cache *revoke_table_cache;
 
 /* Each revoke record represents one single revoked block.  During
    journal replay, this involves recording the transaction ID of the
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c60f378b0f..50356019ae 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1641,7 +1641,7 @@ void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
 #define JBD_MAX_SLABS 5
 #define JBD_SLAB_INDEX(size)  (size >> 11)
 
-static kmem_cache_t *jbd_slab[JBD_MAX_SLABS];
+static struct kmem_cache *jbd_slab[JBD_MAX_SLABS];
 static const char *jbd_slab_names[JBD_MAX_SLABS] = {
 	"jbd2_1k", "jbd2_2k", "jbd2_4k", NULL, "jbd2_8k"
 };
@@ -1704,7 +1704,7 @@ void jbd2_slab_free(void *ptr,  size_t size)
 /*
  * Journal_head storage management
  */
-static kmem_cache_t *jbd2_journal_head_cache;
+static struct kmem_cache *jbd2_journal_head_cache;
 #ifdef CONFIG_JBD_DEBUG
 static atomic_t nr_journal_heads = ATOMIC_INIT(0);
 #endif
@@ -2007,7 +2007,7 @@ static void __exit jbd2_remove_jbd_proc_entry(void)
 
 #endif
 
-kmem_cache_t *jbd2_handle_cache;
+struct kmem_cache *jbd2_handle_cache;
 
 static int __init journal_init_handle_cache(void)
 {
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 380d19917f..f506646ad0 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -70,8 +70,8 @@
 #include <linux/init.h>
 #endif
 
-static kmem_cache_t *jbd2_revoke_record_cache;
-static kmem_cache_t *jbd2_revoke_table_cache;
+static struct kmem_cache *jbd2_revoke_record_cache;
+static struct kmem_cache *jbd2_revoke_table_cache;
 
 /* Each revoke record represents one single revoked block.  During
    journal replay, this involves recording the transaction ID of the
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 3f7899ea4c..9f15bce920 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -61,8 +61,8 @@ static const struct file_operations jffs_dir_operations;
 static struct inode_operations jffs_dir_inode_operations;
 static const struct address_space_operations jffs_address_operations;
 
-kmem_cache_t     *node_cache = NULL;
-kmem_cache_t     *fm_cache = NULL;
+struct kmem_cache     *node_cache = NULL;
+struct kmem_cache     *fm_cache = NULL;
 
 /* Called by the VFS at mount time to initialize the whole file system.  */
 static int jffs_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/jffs/jffs_fm.c b/fs/jffs/jffs_fm.c
index 29b68d939b..077258b210 100644
--- a/fs/jffs/jffs_fm.c
+++ b/fs/jffs/jffs_fm.c
@@ -29,8 +29,8 @@ static int jffs_mark_obsolete(struct jffs_fmcontrol *fmc, __u32 fm_offset);
 static struct jffs_fm *jffs_alloc_fm(void);
 static void jffs_free_fm(struct jffs_fm *n);
 
-extern kmem_cache_t     *fm_cache;
-extern kmem_cache_t     *node_cache;
+extern struct kmem_cache     *fm_cache;
+extern struct kmem_cache     *node_cache;
 
 #if CONFIG_JFFS_FS_VERBOSE > 0
 void
diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c
index 33f2910050..83f9881ec4 100644
--- a/fs/jffs2/malloc.c
+++ b/fs/jffs2/malloc.c
@@ -19,16 +19,16 @@
 
 /* These are initialised to NULL in the kernel startup code.
    If you're porting to other operating systems, beware */
-static kmem_cache_t *full_dnode_slab;
-static kmem_cache_t *raw_dirent_slab;
-static kmem_cache_t *raw_inode_slab;
-static kmem_cache_t *tmp_dnode_info_slab;
-static kmem_cache_t *raw_node_ref_slab;
-static kmem_cache_t *node_frag_slab;
-static kmem_cache_t *inode_cache_slab;
+static struct kmem_cache *full_dnode_slab;
+static struct kmem_cache *raw_dirent_slab;
+static struct kmem_cache *raw_inode_slab;
+static struct kmem_cache *tmp_dnode_info_slab;
+static struct kmem_cache *raw_node_ref_slab;
+static struct kmem_cache *node_frag_slab;
+static struct kmem_cache *inode_cache_slab;
 #ifdef CONFIG_JFFS2_FS_XATTR
-static kmem_cache_t *xattr_datum_cache;
-static kmem_cache_t *xattr_ref_cache;
+static struct kmem_cache *xattr_datum_cache;
+static struct kmem_cache *xattr_ref_cache;
 #endif
 
 int __init jffs2_create_slab_caches(void)
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 77be534ce4..7deb782540 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -28,7 +28,7 @@
 
 static void jffs2_put_super(struct super_block *);
 
-static kmem_cache_t *jffs2_inode_cachep;
+static struct kmem_cache *jffs2_inode_cachep;
 
 static struct inode *jffs2_alloc_inode(struct super_block *sb)
 {
@@ -44,7 +44,7 @@ static void jffs2_destroy_inode(struct inode *inode)
 	kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
 }
 
-static void jffs2_i_init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void jffs2_i_init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo;
 
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 0cccd1c39d..b1a1c72960 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -74,7 +74,7 @@ static inline void lock_metapage(struct metapage *mp)
 }
 
 #define METAPOOL_MIN_PAGES 32
-static kmem_cache_t *metapage_cache;
+static struct kmem_cache *metapage_cache;
 static mempool_t *metapage_mempool;
 
 #define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)
@@ -180,7 +180,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
 
 #endif
 
-static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct metapage *mp = (struct metapage *)foo;
 
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 9c1c6e0e63..ca3e191285 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -44,7 +44,7 @@ MODULE_DESCRIPTION("The Journaled Filesystem (JFS)");
 MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM");
 MODULE_LICENSE("GPL");
 
-static kmem_cache_t * jfs_inode_cachep;
+static struct kmem_cache * jfs_inode_cachep;
 
 static struct super_operations jfs_super_operations;
 static struct export_operations jfs_export_operations;
@@ -748,7 +748,7 @@ static struct file_system_type jfs_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
 
diff --git a/fs/locks.c b/fs/locks.c
index a7b97d50c1..1cb0c57fed 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -142,7 +142,7 @@ int lease_break_time = 45;
 static LIST_HEAD(file_lock_list);
 static LIST_HEAD(blocked_list);
 
-static kmem_cache_t *filelock_cache __read_mostly;
+static struct kmem_cache *filelock_cache __read_mostly;
 
 /* Allocate an empty lock structure. */
 static struct file_lock *locks_alloc_lock(void)
@@ -199,7 +199,7 @@ EXPORT_SYMBOL(locks_init_lock);
  * Initialises the fields of the file lock which are invariant for
  * free file_locks.
  */
-static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cache, unsigned long flags)
 {
 	struct file_lock *lock = (struct file_lock *) foo;
 
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 0ff71256e6..deeb9dc062 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -85,7 +85,7 @@ struct mb_cache {
 #ifndef MB_CACHE_INDEXES_COUNT
 	int				c_indexes_count;
 #endif
-	kmem_cache_t			*c_entry_cache;
+	struct kmem_cache			*c_entry_cache;
 	struct list_head		*c_block_hash;
 	struct list_head		*c_indexes_hash[0];
 };
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index ce532c2ded..629e09b38c 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -51,7 +51,7 @@ static void minix_put_super(struct super_block *sb)
 	return;
 }
 
-static kmem_cache_t * minix_inode_cachep;
+static struct kmem_cache * minix_inode_cachep;
 
 static struct inode *minix_alloc_inode(struct super_block *sb)
 {
@@ -67,7 +67,7 @@ static void minix_destroy_inode(struct inode *inode)
 	kmem_cache_free(minix_inode_cachep, minix_i(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct minix_inode_info *ei = (struct minix_inode_info *) foo;
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 55442a6cf2..b00ac84ebb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -36,7 +36,7 @@ static int event;
 
 static struct list_head *mount_hashtable __read_mostly;
 static int hash_mask __read_mostly, hash_bits __read_mostly;
-static kmem_cache_t *mnt_cache __read_mostly;
+static struct kmem_cache *mnt_cache __read_mostly;
 static struct rw_semaphore namespace_sem;
 
 /* /sys/fs */
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index ed84d89922..fae53243bb 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -40,7 +40,7 @@ static void ncp_delete_inode(struct inode *);
 static void ncp_put_super(struct super_block *);
 static int  ncp_statfs(struct dentry *, struct kstatfs *);
 
-static kmem_cache_t * ncp_inode_cachep;
+static struct kmem_cache * ncp_inode_cachep;
 
 static struct inode *ncp_alloc_inode(struct super_block *sb)
 {
@@ -56,7 +56,7 @@ static void ncp_destroy_inode(struct inode *inode)
 	kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
 
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 769fd0a0c7..2f488e1d9b 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -58,7 +58,7 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
-static kmem_cache_t *nfs_direct_cachep;
+static struct kmem_cache *nfs_direct_cachep;
 
 /*
  * This represents a set of asynchronous requests that we're waiting on
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6b53aae4ed..15afa460e6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -55,7 +55,7 @@ static int nfs_update_inode(struct inode *, struct nfs_fattr *);
 
 static void nfs_zap_acl_cache(struct inode *);
 
-static kmem_cache_t * nfs_inode_cachep;
+static struct kmem_cache * nfs_inode_cachep;
 
 static inline unsigned long
 nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
@@ -1111,7 +1111,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
 #endif
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct nfs_inode *nfsi = (struct nfs_inode *) foo;
 
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a1561a820a..3fbfc2f033 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -20,7 +20,7 @@
 
 #define NFS_PARANOIA 1
 
-static kmem_cache_t *nfs_page_cachep;
+static struct kmem_cache *nfs_page_cachep;
 
 static inline struct nfs_page *
 nfs_page_alloc(void)
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 56f66f0ccb..244a8c45b6 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -38,7 +38,7 @@ static int nfs_pagein_one(struct list_head *, struct inode *);
 static const struct rpc_call_ops nfs_read_partial_ops;
 static const struct rpc_call_ops nfs_read_full_ops;
 
-static kmem_cache_t *nfs_rdata_cachep;
+static struct kmem_cache *nfs_rdata_cachep;
 static mempool_t *nfs_rdata_mempool;
 
 #define MIN_POOL_READ	(32)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f7dd0d0059..41b07288f9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -85,7 +85,7 @@ static const struct rpc_call_ops nfs_write_partial_ops;
 static const struct rpc_call_ops nfs_write_full_ops;
 static const struct rpc_call_ops nfs_commit_ops;
 
-static kmem_cache_t *nfs_wdata_cachep;
+static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
 static mempool_t *nfs_commit_mempool;
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e431e93ab5..640c92b2a9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -84,10 +84,10 @@ static void nfs4_set_recdir(char *recdir);
  */
 static DEFINE_MUTEX(client_mutex);
 
-static kmem_cache_t *stateowner_slab = NULL;
-static kmem_cache_t *file_slab = NULL;
-static kmem_cache_t *stateid_slab = NULL;
-static kmem_cache_t *deleg_slab = NULL;
+static struct kmem_cache *stateowner_slab = NULL;
+static struct kmem_cache *file_slab = NULL;
+static struct kmem_cache *stateid_slab = NULL;
+static struct kmem_cache *deleg_slab = NULL;
 
 void
 nfs4_lock_state(void)
@@ -1003,7 +1003,7 @@ alloc_init_file(struct inode *ino)
 }
 
 static void
-nfsd4_free_slab(kmem_cache_t **slab)
+nfsd4_free_slab(struct kmem_cache **slab)
 {
 	if (*slab == NULL)
 		return;
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 01f91501f7..941acf14e6 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -66,7 +66,7 @@ static struct file_operations dlmfs_file_operations;
 static struct inode_operations dlmfs_dir_inode_operations;
 static struct inode_operations dlmfs_root_inode_operations;
 static struct inode_operations dlmfs_file_inode_operations;
-static kmem_cache_t *dlmfs_inode_cache;
+static struct kmem_cache *dlmfs_inode_cache;
 
 struct workqueue_struct *user_dlm_worker;
 
@@ -257,7 +257,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
 }
 
 static void dlmfs_init_once(void *foo,
-			    kmem_cache_t *cachep,
+			    struct kmem_cache *cachep,
 			    unsigned long flags)
 {
 	struct dlmfs_inode_private *ip =
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index f784177b62..856012b4fa 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -221,7 +221,7 @@ EXPORT_SYMBOL_GPL(dlm_dump_all_mles);
 #endif  /*  0  */
 
 
-static kmem_cache_t *dlm_mle_cache = NULL;
+static struct kmem_cache *dlm_mle_cache = NULL;
 
 
 static void dlm_mle_release(struct kref *kref);
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index fcd4475d1f..80ac69f11d 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -61,7 +61,7 @@ struct ocfs2_em_insert_context {
 	struct ocfs2_extent_map_entry *right_ent;
 };
 
-static kmem_cache_t *ocfs2_em_ent_cachep = NULL;
+static struct kmem_cache *ocfs2_em_ent_cachep = NULL;
 
 
 static struct ocfs2_extent_map_entry *
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 46a378fbc4..1a7dd2945b 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -106,7 +106,7 @@ static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
 #define INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags & OCFS2_INODE_JOURNAL)
 #define SET_INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags |= OCFS2_INODE_JOURNAL)
 
-extern kmem_cache_t *ocfs2_inode_cache;
+extern struct kmem_cache *ocfs2_inode_cache;
 
 extern const struct address_space_operations ocfs2_aops;
 
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 7574d26ee0..0524f8a04a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -68,7 +68,7 @@
 
 #include "buffer_head_io.h"
 
-static kmem_cache_t *ocfs2_inode_cachep = NULL;
+static struct kmem_cache *ocfs2_inode_cachep = NULL;
 
 /* OCFS2 needs to schedule several differnt types of work which
  * require cluster locking, disk I/O, recovery waits, etc. Since these
@@ -914,7 +914,7 @@ bail:
 }
 
 static void ocfs2_inode_init_once(void *data,
-				  kmem_cache_t *cachep,
+				  struct kmem_cache *cachep,
 				  unsigned long flags)
 {
 	struct ocfs2_inode_info *oi = data;
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 9707ed7a32..39814b900f 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -69,7 +69,7 @@ struct ocfs2_meta_cache_item {
 	sector_t	c_block;
 };
 
-static kmem_cache_t *ocfs2_uptodate_cachep = NULL;
+static struct kmem_cache *ocfs2_uptodate_cachep = NULL;
 
 void ocfs2_metadata_cache_init(struct inode *inode)
 {
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 911d1bcfc5..26f44e0074 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -330,7 +330,7 @@ out:
 	return 0;
 }
 
-static kmem_cache_t *op_inode_cachep;
+static struct kmem_cache *op_inode_cachep;
 
 static struct inode *openprom_alloc_inode(struct super_block *sb)
 {
@@ -415,7 +415,7 @@ static struct file_system_type openprom_fs_type = {
 	.kill_sb	= kill_anon_super,
 };
 
-static void op_inode_init_once(void *data, kmem_cache_t * cachep, unsigned long flags)
+static void op_inode_init_once(void *data, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct op_inode_info *oi = (struct op_inode_info *) data;
 
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b24cdb2f17..e26945ba68 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -81,7 +81,7 @@ static void proc_read_inode(struct inode * inode)
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 }
 
-static kmem_cache_t * proc_inode_cachep;
+static struct kmem_cache * proc_inode_cachep;
 
 static struct inode *proc_alloc_inode(struct super_block *sb)
 {
@@ -105,7 +105,7 @@ static void proc_destroy_inode(struct inode *inode)
 	kmem_cache_free(proc_inode_cachep, PROC_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct proc_inode *ei = (struct proc_inode *) foo;
 
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 5b943eb11d..c047dc654d 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -515,7 +515,7 @@ static void qnx4_read_inode(struct inode *inode)
 	brelse(bh);
 }
 
-static kmem_cache_t *qnx4_inode_cachep;
+static struct kmem_cache *qnx4_inode_cachep;
 
 static struct inode *qnx4_alloc_inode(struct super_block *sb)
 {
@@ -531,7 +531,7 @@ static void qnx4_destroy_inode(struct inode *inode)
 	kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
 }
 
-static void init_once(void *foo, kmem_cache_t * cachep,
+static void init_once(void *foo, struct kmem_cache * cachep,
 		      unsigned long flags)
 {
 	struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 32332516d6..745bc714ba 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -490,7 +490,7 @@ static void reiserfs_put_super(struct super_block *s)
 	return;
 }
 
-static kmem_cache_t *reiserfs_inode_cachep;
+static struct kmem_cache *reiserfs_inode_cachep;
 
 static struct inode *reiserfs_alloc_inode(struct super_block *sb)
 {
@@ -507,7 +507,7 @@ static void reiserfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
 }
 
-static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
 
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index d1b455f9b6..c5af088d4a 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -550,7 +550,7 @@ romfs_read_inode(struct inode *i)
 	}
 }
 
-static kmem_cache_t * romfs_inode_cachep;
+static struct kmem_cache * romfs_inode_cachep;
 
 static struct inode *romfs_alloc_inode(struct super_block *sb)
 {
@@ -566,7 +566,7 @@ static void romfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct romfs_inode_info *ei = (struct romfs_inode_info *) foo;
 
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 2216171034..4af4cd729a 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -50,7 +50,7 @@ static void smb_put_super(struct super_block *);
 static int  smb_statfs(struct dentry *, struct kstatfs *);
 static int  smb_show_options(struct seq_file *, struct vfsmount *);
 
-static kmem_cache_t *smb_inode_cachep;
+static struct kmem_cache *smb_inode_cachep;
 
 static struct inode *smb_alloc_inode(struct super_block *sb)
 {
@@ -66,7 +66,7 @@ static void smb_destroy_inode(struct inode *inode)
 	kmem_cache_free(smb_inode_cachep, SMB_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct smb_inode_info *ei = (struct smb_inode_info *) foo;
 	unsigned long flagmask = SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR;
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index 3eb1402191..a4bcae8a9a 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -25,7 +25,7 @@
 #define ROUND_UP(x) (((x)+3) & ~3)
 
 /* cache for request structures */
-static kmem_cache_t *req_cachep;
+static struct kmem_cache *req_cachep;
 
 static int smb_request_send_req(struct smb_request *req);
 
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 20551a1b8a..e503f858fb 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -16,7 +16,7 @@
 
 struct vfsmount *sysfs_mount;
 struct super_block * sysfs_sb = NULL;
-kmem_cache_t *sysfs_dir_cachep;
+struct kmem_cache *sysfs_dir_cachep;
 
 static struct super_operations sysfs_ops = {
 	.statfs		= simple_statfs,
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 6f3d6bd528..bd7cec295d 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -1,6 +1,6 @@
 
 extern struct vfsmount * sysfs_mount;
-extern kmem_cache_t *sysfs_dir_cachep;
+extern struct kmem_cache *sysfs_dir_cachep;
 
 extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
 extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index a6ca12b747..ead9864567 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -301,7 +301,7 @@ static void sysv_delete_inode(struct inode *inode)
 	unlock_kernel();
 }
 
-static kmem_cache_t *sysv_inode_cachep;
+static struct kmem_cache *sysv_inode_cachep;
 
 static struct inode *sysv_alloc_inode(struct super_block *sb)
 {
@@ -318,7 +318,7 @@ static void sysv_destroy_inode(struct inode *inode)
 	kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
 }
 
-static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
+static void init_once(void *p, struct kmem_cache *cachep, unsigned long flags)
 {
 	struct sysv_inode_info *si = (struct sysv_inode_info *)p;
 
diff --git a/fs/udf/super.c b/fs/udf/super.c
index e50f24221d..397f54aaf6 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -107,7 +107,7 @@ static struct file_system_type udf_fstype = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static kmem_cache_t * udf_inode_cachep;
+static struct kmem_cache * udf_inode_cachep;
 
 static struct inode *udf_alloc_inode(struct super_block *sb)
 {
@@ -130,7 +130,7 @@ static void udf_destroy_inode(struct inode *inode)
 	kmem_cache_free(udf_inode_cachep, UDF_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct udf_inode_info *ei = (struct udf_inode_info *) foo;
 
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 85a88c0c5e..0b18d2cc2e 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1204,7 +1204,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static kmem_cache_t * ufs_inode_cachep;
+static struct kmem_cache * ufs_inode_cachep;
 
 static struct inode *ufs_alloc_inode(struct super_block *sb)
 {
@@ -1221,7 +1221,7 @@ static void ufs_destroy_inode(struct inode *inode)
 	kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
 }
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
 {
 	struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
 
-- 
cgit v1.2.2


From 7dfb71030f7636a0d65200158113c37764552f93 Mon Sep 17 00:00:00 2001
From: Nigel Cunningham <ncunningham@linuxmail.org>
Date: Wed, 6 Dec 2006 20:34:23 -0800
Subject: [PATCH] Add include/linux/freezer.h and move definitions from sched.h

Move process freezing functions from include/linux/sched.h to freezer.h, so
that modifications to the freezer or the kernel configuration don't require
recompiling just about everything.

[akpm@osdl.org: fix ueagle driver]
Signed-off-by: Nigel Cunningham <nigel@suspend2.net>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/afs/kafsasyncd.c          | 1 +
 fs/afs/kafstimod.c           | 1 +
 fs/cifs/cifsfs.c             | 1 +
 fs/cifs/connect.c            | 1 +
 fs/jbd/journal.c             | 2 +-
 fs/jbd2/journal.c            | 2 +-
 fs/jffs/intrep.c             | 1 +
 fs/jffs2/background.c        | 1 +
 fs/jfs/jfs_logmgr.c          | 2 +-
 fs/jfs/jfs_txnmgr.c          | 2 +-
 fs/lockd/clntproc.c          | 1 +
 fs/xfs/linux-2.6/xfs_buf.c   | 1 +
 fs/xfs/linux-2.6/xfs_super.c | 1 +
 13 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
index f09a794f24..615df2407c 100644
--- a/fs/afs/kafsasyncd.c
+++ b/fs/afs/kafsasyncd.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/completion.h>
+#include <linux/freezer.h>
 #include "cell.h"
 #include "server.h"
 #include "volume.h"
diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c
index 65bc05ab81..694344e4d3 100644
--- a/fs/afs/kafstimod.c
+++ b/fs/afs/kafstimod.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/completion.h>
+#include <linux/freezer.h>
 #include "cell.h"
 #include "volume.h"
 #include "kafstimod.h"
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index e6b5866e50..71bc87a37f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -34,6 +34,7 @@
 #include <linux/mempool.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
+#include <linux/freezer.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
 #define DECLARE_GLOBALS_HERE
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 71f77914ce..2caca06b4b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -31,6 +31,7 @@
 #include <linux/delay.h>
 #include <linux/completion.h>
 #include <linux/pagevec.h>
+#include <linux/freezer.h>
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include "cifspdu.h"
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index a8774bed20..10fff94439 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -31,7 +31,7 @@
 #include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/pagemap.h>
 #include <linux/kthread.h>
 #include <linux/poison.h>
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 50356019ae..44fc32bfd7 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -31,7 +31,7 @@
 #include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/pagemap.h>
 #include <linux/kthread.h>
 #include <linux/poison.h>
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index 4a543e1149..478a74e2e9 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -66,6 +66,7 @@
 #include <linux/smp_lock.h>
 #include <linux/time.h>
 #include <linux/ctype.h>
+#include <linux/freezer.h>
 
 #include "intrep.h"
 #include "jffs_fm.h"
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index ff2a872e80..6eb3daebd5 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -16,6 +16,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/completion.h>
 #include <linux/sched.h>
+#include <linux/freezer.h>
 #include "nodelist.h"
 
 
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index b89c9aba04..5065baa530 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -67,7 +67,7 @@
 #include <linux/kthread.h>
 #include <linux/buffer_head.h>		/* for sync_blockdev() */
 #include <linux/bio.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
 #include "jfs_incore.h"
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 81f6f04af1..d558e51b0d 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -46,7 +46,7 @@
 #include <linux/vmalloc.h>
 #include <linux/smp_lock.h>
 #include <linux/completion.h>
-#include <linux/suspend.h>
+#include <linux/freezer.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/kthread.h>
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 3d84f600b6..50643b6a55 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -13,6 +13,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/utsname.h>
 #include <linux/smp_lock.h>
+#include <linux/freezer.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index eef4a0ba11..b971237c5a 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -32,6 +32,7 @@
 #include <linux/kthread.h>
 #include <linux/migrate.h>
 #include <linux/backing-dev.h>
+#include <linux/freezer.h>
 
 STATIC kmem_zone_t *xfs_buf_zone;
 STATIC kmem_shaker_t xfs_buf_shake;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index de05abbbe7..b93265b7c7 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -56,6 +56,7 @@
 #include <linux/mempool.h>
 #include <linux/writeback.h>
 #include <linux/kthread.h>
+#include <linux/freezer.h>
 
 STATIC struct quotactl_ops xfs_quotactl_operations;
 STATIC struct super_operations xfs_super_operations;
-- 
cgit v1.2.2


From 58e14b148ddb56f0bf999965d6279932ed4a00bc Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:50 -0800
Subject: [PATCH] Use freezeable workqueues in XFS

Make the workqueues used by XFS freezeable, so their worker threads don't
submit any I/O after the suspend image has been created.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Nigel Cunningham <nigel@suspend2.net>
Cc: David Chinner <dgc@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/xfs/linux-2.6/xfs_buf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index b971237c5a..4fb01ffdfd 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1827,11 +1827,11 @@ xfs_buf_init(void)
 	if (!xfs_buf_zone)
 		goto out_free_trace_buf;
 
-	xfslogd_workqueue = create_workqueue("xfslogd");
+	xfslogd_workqueue = create_freezeable_workqueue("xfslogd");
 	if (!xfslogd_workqueue)
 		goto out_free_buf_zone;
 
-	xfsdatad_workqueue = create_workqueue("xfsdatad");
+	xfsdatad_workqueue = create_freezeable_workqueue("xfsdatad");
 	if (!xfsdatad_workqueue)
 		goto out_destroy_xfslogd_workqueue;
 
-- 
cgit v1.2.2


From 5127d002f9769ba6b1691de78dd3a5c14635e183 Mon Sep 17 00:00:00 2001
From: Suzuki Kp <suzuki@in.ibm.com>
Date: Wed, 6 Dec 2006 20:35:14 -0800
Subject: [PATCH] fix rescan_partitions to return errors properly

The current rescan_partition implementation ignores the errors that comes from
the lower layer.  It reports success for unknown partitions as well as I/O
error cases while reading the partition information.

The unknown partition is not (and will not be) considered as an error in the
kernel, since there are legal users of it (e.g, members of a RAID5 MD Device
or a new disk which is not partitioned at all ).  Changing this behaviour
would scare the user about a serious problem with their disk and is not
recommended.  Thus for both "unknown partitions" to the Linux (eg., DEC
VMS,Novell Netware) and the legal users of NULL partition, would still be
reported as "SUCCESS".

The patch attached here, scares the user about something which he does need to
worry about.  i.e, returning -EIO on disk I/O errors while reading the
partition information.

Signed-off-by: Suzuki K P <suzuki@in.ibm.com>
Cc: Erik Mouw <erik@harddisk-recovery.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/check.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6fb4b6150d..0b6113ba3b 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -177,7 +177,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
 	else if (warn_no_part)
 		printk(" unable to read partition table\n");
 	kfree(state);
-	return NULL;
+	return ERR_PTR(res);
 }
 
 /*
@@ -494,6 +494,8 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 		disk->fops->revalidate_disk(disk);
 	if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
 		return 0;
+	if (IS_ERR(state))	/* I/O error reading the partition table */
+		return PTR_ERR(state);
 	for (p = 1; p < state->limit; p++) {
 		sector_t size = state->parts[p].size;
 		sector_t from = state->parts[p].from;
-- 
cgit v1.2.2


From 57881dd9df40b76dc7fc6a0d13fd75f337accb32 Mon Sep 17 00:00:00 2001
From: Suzuki K P <suzuki@in.ibm.com>
Date: Wed, 6 Dec 2006 20:35:16 -0800
Subject: [PATCH] Fix check_partition routines

check_partition() stops its probe once it hits an I/O error from the
partition checkers.  This would prevent the actual partition checker
getting a chance to verify the partition.

So this patch lets check_partition() continue probing untill it hits a
success while recording the I/O error which might have been reported by the
checking routines.

Also, it does some cleanup of the partition methods for ibm, atari and
amiga to return -1 upon hitting an I/O error.

Signed-off-by: Suzuki K P <suzuki@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/amiga.c |  2 ++
 fs/partitions/atari.c |  2 +-
 fs/partitions/check.c | 15 +++++++++++++--
 fs/partitions/ibm.c   | 29 +++++++++++++++++------------
 4 files changed, 33 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/partitions/amiga.c b/fs/partitions/amiga.c
index 3068528890..9917a8c360 100644
--- a/fs/partitions/amiga.c
+++ b/fs/partitions/amiga.c
@@ -43,6 +43,7 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev)
 			if (warn_no_part)
 				printk("Dev %s: unable to read RDB block %d\n",
 				       bdevname(bdev, b), blk);
+			res = -1;
 			goto rdb_done;
 		}
 		if (*(__be32 *)data != cpu_to_be32(IDNAME_RIGIDDISK))
@@ -79,6 +80,7 @@ amiga_partition(struct parsed_partitions *state, struct block_device *bdev)
 			if (warn_no_part)
 				printk("Dev %s: unable to read partition block %d\n",
 				       bdevname(bdev, b), blk);
+			res = -1;
 			goto rdb_done;
 		}
 		pb  = (struct PartitionBlock *)data;
diff --git a/fs/partitions/atari.c b/fs/partitions/atari.c
index 192a6adfde..1f3572d5b7 100644
--- a/fs/partitions/atari.c
+++ b/fs/partitions/atari.c
@@ -88,7 +88,7 @@ int atari_partition(struct parsed_partitions *state, struct block_device *bdev)
 			if (!xrs) {
 				printk (" block %ld read failed\n", partsect);
 				put_dev_sector(sect);
-				return 0;
+				return -1;
 			}
 
 			/* ++roman: sanity check: bit 0 of flg field must be set */
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 0b6113ba3b..1901137f4e 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -153,7 +153,7 @@ static struct parsed_partitions *
 check_partition(struct gendisk *hd, struct block_device *bdev)
 {
 	struct parsed_partitions *state;
-	int i, res;
+	int i, res, err;
 
 	state = kmalloc(sizeof(struct parsed_partitions), GFP_KERNEL);
 	if (!state)
@@ -165,13 +165,24 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
 		sprintf(state->name, "p");
 
 	state->limit = hd->minors;
-	i = res = 0;
+	i = res = err = 0;
 	while (!res && check_part[i]) {
 		memset(&state->parts, 0, sizeof(state->parts));
 		res = check_part[i++](state, bdev);
+		if (res < 0) {
+			/* We have hit an I/O error which we don't report now.
+		 	* But record it, and let the others do their job.
+		 	*/
+			err = res;
+			res = 0;
+		}
+
 	}
 	if (res > 0)
 		return state;
+	if (!err)
+	/* The partition is unrecognized. So report I/O errors if there were any */
+		res = err;
 	if (!res)
 		printk(" unknown partition table\n");
 	else if (warn_no_part)
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index d352a7381f..9f7ad4244f 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -43,7 +43,7 @@ cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) {
 int
 ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 {
-	int blocksize, offset, size;
+	int blocksize, offset, size,res;
 	loff_t i_size;
 	dasd_information_t *info;
 	struct hd_geometry *geo;
@@ -56,15 +56,16 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 	unsigned char *data;
 	Sector sect;
 
+	res = 0;
 	blocksize = bdev_hardsect_size(bdev);
 	if (blocksize <= 0)
-		return 0;
+		goto out_exit;
 	i_size = i_size_read(bdev->bd_inode);
 	if (i_size == 0)
-		return 0;
+		goto out_exit;
 
 	if ((info = kmalloc(sizeof(dasd_information_t), GFP_KERNEL)) == NULL)
-		goto out_noinfo;
+		goto out_exit;
 	if ((geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL)) == NULL)
 		goto out_nogeo;
 	if ((label = kmalloc(sizeof(union label_t), GFP_KERNEL)) == NULL)
@@ -72,7 +73,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 
 	if (ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)info) != 0 ||
 	    ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0)
-		goto out_noioctl;
+		goto out_freeall;
 
 	/*
 	 * Get volume label, extract name and type.
@@ -92,6 +93,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 	EBCASC(type, 4);
 	EBCASC(name, 6);
 
+	res = 1;
+
 	/*
 	 * Three different types: CMS1, VOL1 and LNX1/unlabeled
 	 */
@@ -156,6 +159,9 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 			counter++;
 			blk++;
 		}
+		if (!data)
+		/* Are we not supposed to report this ? */
+			goto out_readerr;
 	} else {
 		/*
 		 * Old style LNX1 or unlabeled disk
@@ -171,18 +177,17 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 	}
 
 	printk("\n");
-	kfree(label);
-	kfree(geo);
-	kfree(info);
-	return 1;
+	goto out_freeall;
+
 
 out_readerr:
-out_noioctl:
+	res = -1;
+out_freeall:
 	kfree(label);
 out_nolab:
 	kfree(geo);
 out_nogeo:
 	kfree(info);
-out_noinfo:
-	return 0;
+out_exit:
+	return res;
 }
-- 
cgit v1.2.2


From 317a40ac2237732aba531eee2c7b5e39dd40e959 Mon Sep 17 00:00:00 2001
From: Stas Sergeev <stsp@aknet.ru>
Date: Wed, 6 Dec 2006 20:35:25 -0800
Subject: [PATCH] honour MNT_NOEXEC for access()

Make access(X_OK) take the "noexec" mount option into account.

Signed-off-by: Stas Sergeev <stsp@aknet.ru>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 28d49b301d..61f99c1967 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -249,9 +249,11 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
 
 	/*
 	 * MAY_EXEC on regular files requires special handling: We override
-	 * filesystem execute permissions if the mode bits aren't set.
+	 * filesystem execute permissions if the mode bits aren't set or
+	 * the fs is mounted with the "noexec" flag.
 	 */
-	if ((mask & MAY_EXEC) && S_ISREG(mode) && !(mode & S_IXUGO))
+	if ((mask & MAY_EXEC) && S_ISREG(mode) && (!(mode & S_IXUGO) ||
+			(nd && nd->mnt && (nd->mnt->mnt_flags & MNT_NOEXEC))))
 		return -EACCES;
 
 	/* Ordinary permission routines do not understand MAY_APPEND. */
-- 
cgit v1.2.2


From e4fca01ea2b41c41a82f4ca3537f6ebc237adde5 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Wed, 6 Dec 2006 20:35:27 -0800
Subject: [PATCH] ext2: fsid for statvfs

Update ext2_statfs to return an FSID that is a 64 bit XOR of the 128 bit
filesystem UUID as suggested by Andreas Dilger.  See the following Bugzilla
entry for details:

  http://bugzilla.kernel.org/show_bug.cgi?id=136

Cc: Andreas Dilger <adilger@clusterfs.com>
Cc: Stephen Tweedie <sct@redhat.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/super.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 3aafb1dd91..255cef5f74 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1090,8 +1090,10 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 {
 	struct super_block *sb = dentry->d_sb;
 	struct ext2_sb_info *sbi = EXT2_SB(sb);
+	struct ext2_super_block *es = sbi->s_es;
 	unsigned long overhead;
 	int i;
+	u64 fsid;
 
 	if (test_opt (sb, MINIX_DF))
 		overhead = 0;
@@ -1104,7 +1106,7 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 		 * All of the blocks before first_data_block are
 		 * overhead
 		 */
-		overhead = le32_to_cpu(sbi->s_es->s_first_data_block);
+		overhead = le32_to_cpu(es->s_first_data_block);
 
 		/*
 		 * Add the overhead attributed to the superblock and
@@ -1125,14 +1127,18 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 
 	buf->f_type = EXT2_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
-	buf->f_blocks = le32_to_cpu(sbi->s_es->s_blocks_count) - overhead;
+	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
 	buf->f_bfree = ext2_count_free_blocks(sb);
-	buf->f_bavail = buf->f_bfree - le32_to_cpu(sbi->s_es->s_r_blocks_count);
-	if (buf->f_bfree < le32_to_cpu(sbi->s_es->s_r_blocks_count))
+	buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
+	if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
 		buf->f_bavail = 0;
-	buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count);
-	buf->f_ffree = ext2_count_free_inodes (sb);
+	buf->f_files = le32_to_cpu(es->s_inodes_count);
+	buf->f_ffree = ext2_count_free_inodes(sb);
 	buf->f_namelen = EXT2_NAME_LEN;
+	fsid = le64_to_cpup((void *)es->s_uuid) ^
+	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
+	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
+	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
 	return 0;
 }
 
-- 
cgit v1.2.2


From 50ee0a32b192902e32a2b596df7ec3496c4bf485 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Wed, 6 Dec 2006 20:35:28 -0800
Subject: [PATCH] ext3: fsid for statvfs

Update ext3_statfs to return an FSID that is a 64 bit XOR of the 128 bit
filesystem UUID as suggested by Andreas Dilger.  See the following Bugzilla
entry for details:

  http://bugzilla.kernel.org/show_bug.cgi?id=136

Cc: Andreas Dilger <adilger@clusterfs.com>
Cc: Stephen Tweedie <sct@redhat.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/super.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 9856565d4d..8ab1981333 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2387,6 +2387,7 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
 	struct ext3_super_block *es = sbi->s_es;
 	ext3_fsblk_t overhead;
 	int i;
+	u64 fsid;
 
 	if (test_opt (sb, MINIX_DF))
 		overhead = 0;
@@ -2433,6 +2434,10 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
 	buf->f_files = le32_to_cpu(es->s_inodes_count);
 	buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
 	buf->f_namelen = EXT3_NAME_LEN;
+	fsid = le64_to_cpup((void *)es->s_uuid) ^
+	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
+	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
+	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
 	return 0;
 }
 
-- 
cgit v1.2.2


From 960cc398a7a2acfe455b2ec33c64dc6018c83aab Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Wed, 6 Dec 2006 20:35:29 -0800
Subject: [PATCH] ext4: fsid for statvfs

Update ext4_statfs to return an FSID that is a 64 bit XOR of the 128 bit
filesystem UUID as suggested by Andreas Dilger.  See the following Bugzilla
entry for details:

  http://bugzilla.kernel.org/show_bug.cgi?id=136

Cc: Andreas Dilger <adilger@clusterfs.com>
Cc: Stephen Tweedie <sct@redhat.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/super.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f2e8c4aa0f..2ede7e2c70 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2460,6 +2460,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
 	struct ext4_super_block *es = sbi->s_es;
 	ext4_fsblk_t overhead;
 	int i;
+	u64 fsid;
 
 	if (test_opt (sb, MINIX_DF))
 		overhead = 0;
@@ -2506,6 +2507,10 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
 	buf->f_files = le32_to_cpu(es->s_inodes_count);
 	buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
 	buf->f_namelen = EXT4_NAME_LEN;
+	fsid = le64_to_cpup((void *)es->s_uuid) ^
+	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
+	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
+	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
 	return 0;
 }
 
-- 
cgit v1.2.2


From bdcf25080438ba71bb24b885e7c102de72c25c9d Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Wed, 6 Dec 2006 20:35:41 -0800
Subject: [PATCH] fuse: minor cleanup in fuse_dentry_revalidate

Remove unneeded code from fuse_dentry_revalidate().  This made some sense
while the validity time could wrap around, but now it's a very obvious no-op.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c71a6c092a..677f3ed7f9 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -141,9 +141,6 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 		struct fuse_req *forget_req;
 		struct dentry *parent;
 
-		/* Doesn't hurt to "reset" the validity timeout */
-		fuse_invalidate_entry_cache(entry);
-
 		/* For negative dentries, always do a fresh lookup */
 		if (!inode)
 			return 0;
-- 
cgit v1.2.2


From d6392f873f1d09974d5c92c52715fa422ad7c625 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Wed, 6 Dec 2006 20:35:44 -0800
Subject: [PATCH] fuse: add support for block device based filesystems

I never intended this, but people started using fuse to implement block device
based "real" filesystems (ntfs-3g, zfs).

The following four patches add better support for these kinds of filesystems.
Unlike "normal" fuse filesystems, using this feature should require superuser
privileges (enforced by the fusermount utility).

Thanks to Szabolcs Szakacsits for the input and testing.

This patch adds a 'fuseblk' filesystem type, which is only different from the
'fuse' filesystem type in how the 'dev_name' mount argument is interpreted.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/inode.c | 48 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 2bdc652b8b..38cf97d646 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -591,6 +591,14 @@ static int fuse_get_sb(struct file_system_type *fs_type,
 	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
 }
 
+static int fuse_get_sb_blk(struct file_system_type *fs_type,
+			   int flags, const char *dev_name,
+			   void *raw_data, struct vfsmount *mnt)
+{
+	return get_sb_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super,
+			   mnt);
+}
+
 static struct file_system_type fuse_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuse",
@@ -598,6 +606,14 @@ static struct file_system_type fuse_fs_type = {
 	.kill_sb	= kill_anon_super,
 };
 
+static struct file_system_type fuseblk_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "fuseblk",
+	.get_sb		= fuse_get_sb_blk,
+	.kill_sb	= kill_block_super,
+	.fs_flags	= FS_REQUIRES_DEV,
+};
+
 static decl_subsys(fuse, NULL, NULL);
 static decl_subsys(connections, NULL, NULL);
 
@@ -617,24 +633,34 @@ static int __init fuse_fs_init(void)
 
 	err = register_filesystem(&fuse_fs_type);
 	if (err)
-		printk("fuse: failed to register filesystem\n");
-	else {
-		fuse_inode_cachep = kmem_cache_create("fuse_inode",
-						      sizeof(struct fuse_inode),
-						      0, SLAB_HWCACHE_ALIGN,
-						      fuse_inode_init_once, NULL);
-		if (!fuse_inode_cachep) {
-			unregister_filesystem(&fuse_fs_type);
-			err = -ENOMEM;
-		}
-	}
+		goto out;
+
+	err = register_filesystem(&fuseblk_fs_type);
+	if (err)
+		goto out_unreg;
+
+	fuse_inode_cachep = kmem_cache_create("fuse_inode",
+					      sizeof(struct fuse_inode),
+					      0, SLAB_HWCACHE_ALIGN,
+					      fuse_inode_init_once, NULL);
+	err = -ENOMEM;
+	if (!fuse_inode_cachep)
+		goto out_unreg2;
+
+	return 0;
 
+ out_unreg2:
+	unregister_filesystem(&fuseblk_fs_type);
+ out_unreg:
+	unregister_filesystem(&fuse_fs_type);
+ out:
 	return err;
 }
 
 static void fuse_fs_cleanup(void)
 {
 	unregister_filesystem(&fuse_fs_type);
+	unregister_filesystem(&fuseblk_fs_type);
 	kmem_cache_destroy(fuse_inode_cachep);
 }
 
-- 
cgit v1.2.2


From d809161402e9f99aefe8848c4e701597ac367269 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Wed, 6 Dec 2006 20:35:48 -0800
Subject: [PATCH] fuse: add blksize option

Add 'blksize' option for block device based filesystems.  During
initialization this is used to set the block size on the device and the super
block.  The default block size is 512bytes.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/inode.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 38cf97d646..1baaaeb2e8 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -39,6 +39,7 @@ struct fuse_mount_data {
 	unsigned group_id_present : 1;
 	unsigned flags;
 	unsigned max_read;
+	unsigned blksize;
 };
 
 static struct inode *fuse_alloc_inode(struct super_block *sb)
@@ -274,6 +275,7 @@ enum {
 	OPT_DEFAULT_PERMISSIONS,
 	OPT_ALLOW_OTHER,
 	OPT_MAX_READ,
+	OPT_BLKSIZE,
 	OPT_ERR
 };
 
@@ -285,14 +287,16 @@ static match_table_t tokens = {
 	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
 	{OPT_ALLOW_OTHER,		"allow_other"},
 	{OPT_MAX_READ,			"max_read=%u"},
+	{OPT_BLKSIZE,			"blksize=%u"},
 	{OPT_ERR,			NULL}
 };
 
-static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
+static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
 {
 	char *p;
 	memset(d, 0, sizeof(struct fuse_mount_data));
 	d->max_read = ~0;
+	d->blksize = 512;
 
 	while ((p = strsep(&opt, ",")) != NULL) {
 		int token;
@@ -345,6 +349,12 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
 			d->max_read = value;
 			break;
 
+		case OPT_BLKSIZE:
+			if (!is_bdev || match_int(&args[0], &value))
+				return 0;
+			d->blksize = value;
+			break;
+
 		default:
 			return 0;
 		}
@@ -500,15 +510,21 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	struct dentry *root_dentry;
 	struct fuse_req *init_req;
 	int err;
+	int is_bdev = sb->s_bdev != NULL;
 
 	if (sb->s_flags & MS_MANDLOCK)
 		return -EINVAL;
 
-	if (!parse_fuse_opt((char *) data, &d))
+	if (!parse_fuse_opt((char *) data, &d, is_bdev))
 		return -EINVAL;
 
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	if (is_bdev) {
+		if (!sb_set_blocksize(sb, d.blksize))
+			return -EINVAL;
+	} else {
+		sb->s_blocksize = PAGE_CACHE_SIZE;
+		sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	}
 	sb->s_magic = FUSE_SUPER_MAGIC;
 	sb->s_op = &fuse_super_operations;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
-- 
cgit v1.2.2


From b2d2272fae1e1df26ec8f93a6d5baea891dcce37 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Wed, 6 Dec 2006 20:35:51 -0800
Subject: [PATCH] fuse: add bmap support

Add support for the BMAP operation for block device based filesystems.  This
is needed to support swap-files and lilo.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c    |  2 ++
 fs/fuse/file.c   | 37 +++++++++++++++++++++++++++++++++++++
 fs/fuse/fuse_i.h |  3 +++
 3 files changed, 42 insertions(+)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 677f3ed7f9..1cabdb229a 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1024,6 +1024,8 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 	if (attr->ia_valid & ATTR_SIZE) {
 		unsigned long limit;
 		is_truncate = 1;
+		if (IS_SWAPFILE(inode))
+			return -ETXTBSY;
 		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
 		if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
 			send_sig(SIGXFSZ, current, 0);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 763a50daf1..128f79c408 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -754,6 +754,42 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
 	return err;
 }
 
+static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
+{
+	struct inode *inode = mapping->host;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req;
+	struct fuse_bmap_in inarg;
+	struct fuse_bmap_out outarg;
+	int err;
+
+	if (!inode->i_sb->s_bdev || fc->no_bmap)
+		return 0;
+
+	req = fuse_get_req(fc);
+	if (IS_ERR(req))
+		return 0;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.block = block;
+	inarg.blocksize = inode->i_sb->s_blocksize;
+	req->in.h.opcode = FUSE_BMAP;
+	req->in.h.nodeid = get_node_id(inode);
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].value = &outarg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (err == -ENOSYS)
+		fc->no_bmap = 1;
+
+	return err ? 0 : outarg.block;
+}
+
 static const struct file_operations fuse_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
@@ -787,6 +823,7 @@ static const struct address_space_operations fuse_file_aops  = {
 	.commit_write	= fuse_commit_write,
 	.readpages	= fuse_readpages,
 	.set_page_dirty	= fuse_set_page_dirty,
+	.bmap		= fuse_bmap,
 };
 
 void fuse_init_file_inode(struct inode *inode)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 91edb8932d..58d482d9f6 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -339,6 +339,9 @@ struct fuse_conn {
 	/** Is interrupt not implemented by fs? */
 	unsigned no_interrupt : 1;
 
+	/** Is bmap not implemented by fs? */
+	unsigned no_bmap : 1;
+
 	/** The number of requests waiting for completion */
 	atomic_t num_waiting;
 
-- 
cgit v1.2.2


From 0ec7ca41f6f0f74a394a7d686bc0ee8afef84887 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Wed, 6 Dec 2006 20:35:52 -0800
Subject: [PATCH] fuse: add DESTROY operation

Add a DESTROY operation for block device based filesystems.  With the help of
this operation, such a filesystem can flush dirty data to the device
synchronously before the umount returns.

This is needed in situations where the filesystem is assumed to be clean
immediately after unmount (e.g.  ejecting removable media).

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/fuse_i.h |  6 ++++++
 fs/fuse/inode.c  | 22 ++++++++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'fs')

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 58d482d9f6..b98b20de74 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -298,6 +298,9 @@ struct fuse_conn {
 	    reply, before any other request, and never cleared */
 	unsigned conn_error : 1;
 
+	/** Connection successful.  Only set in INIT */
+	unsigned conn_init : 1;
+
 	/** Do readpages asynchronously?  Only set in INIT */
 	unsigned async_read : 1;
 
@@ -368,6 +371,9 @@ struct fuse_conn {
 
 	/** Key for lock owner ID scrambling */
 	u32 scramble_key[4];
+
+	/** Reserved request for the DESTROY message */
+	struct fuse_req *destroy_req;
 };
 
 static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1baaaeb2e8..437d61c652 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -206,10 +206,23 @@ static void fuse_umount_begin(struct vfsmount *vfsmnt, int flags)
 		fuse_abort_conn(get_fuse_conn_super(vfsmnt->mnt_sb));
 }
 
+static void fuse_send_destroy(struct fuse_conn *fc)
+{
+	struct fuse_req *req = fc->destroy_req;
+	if (req && fc->conn_init) {
+		fc->destroy_req = NULL;
+		req->in.h.opcode = FUSE_DESTROY;
+		req->force = 1;
+		request_send(fc, req);
+		fuse_put_request(fc, req);
+	}
+}
+
 static void fuse_put_super(struct super_block *sb)
 {
 	struct fuse_conn *fc = get_fuse_conn_super(sb);
 
+	fuse_send_destroy(fc);
 	spin_lock(&fc->lock);
 	fc->connected = 0;
 	fc->blocked = 0;
@@ -410,6 +423,8 @@ static struct fuse_conn *new_conn(void)
 void fuse_conn_put(struct fuse_conn *fc)
 {
 	if (atomic_dec_and_test(&fc->count)) {
+		if (fc->destroy_req)
+			fuse_request_free(fc->destroy_req);
 		mutex_destroy(&fc->inst_mutex);
 		kfree(fc);
 	}
@@ -466,6 +481,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 		fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
 		fc->minor = arg->minor;
 		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
+		fc->conn_init = 1;
 	}
 	fuse_put_request(fc, req);
 	fc->blocked = 0;
@@ -563,6 +579,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	if (!init_req)
 		goto err_put_root;
 
+	if (is_bdev) {
+		fc->destroy_req = fuse_request_alloc();
+		if (!fc->destroy_req)
+			goto err_put_root;
+	}
+
 	mutex_lock(&fuse_mutex);
 	err = -EINVAL;
 	if (file->private_data)
-- 
cgit v1.2.2


From 875d95ec9eb69ffb334116fb44d04d9a64dcbfbb Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Wed, 6 Dec 2006 20:35:54 -0800
Subject: [PATCH] fuse: fix compile without CONFIG_BLOCK

Randy Dunlap wote:
> Should FUSE depend on BLOCK?  Without that and with BLOCK=n, I get:
>
> inode.c:(.text+0x3acc5): undefined reference to `sb_set_blocksize'
> inode.c:(.text+0x3a393): undefined reference to `get_sb_bdev'
> fs/built-in.o:(.data+0xd718): undefined reference to `kill_block_super

Most fuse filesystems work fine without block device support, so I
think a better solution is to disable the 'fuseblk' filesystem type if
BLOCK=n.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/inode.c | 43 +++++++++++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 437d61c652..12450d2b32 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -535,8 +535,10 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 		return -EINVAL;
 
 	if (is_bdev) {
+#ifdef CONFIG_BLOCK
 		if (!sb_set_blocksize(sb, d.blksize))
 			return -EINVAL;
+#endif
 	} else {
 		sb->s_blocksize = PAGE_CACHE_SIZE;
 		sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
@@ -629,6 +631,14 @@ static int fuse_get_sb(struct file_system_type *fs_type,
 	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
 }
 
+static struct file_system_type fuse_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "fuse",
+	.get_sb		= fuse_get_sb,
+	.kill_sb	= kill_anon_super,
+};
+
+#ifdef CONFIG_BLOCK
 static int fuse_get_sb_blk(struct file_system_type *fs_type,
 			   int flags, const char *dev_name,
 			   void *raw_data, struct vfsmount *mnt)
@@ -637,13 +647,6 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type,
 			   mnt);
 }
 
-static struct file_system_type fuse_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "fuse",
-	.get_sb		= fuse_get_sb,
-	.kill_sb	= kill_anon_super,
-};
-
 static struct file_system_type fuseblk_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuseblk",
@@ -652,6 +655,26 @@ static struct file_system_type fuseblk_fs_type = {
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
+static inline int register_fuseblk(void)
+{
+	return register_filesystem(&fuseblk_fs_type);
+}
+
+static inline void unregister_fuseblk(void)
+{
+	unregister_filesystem(&fuseblk_fs_type);
+}
+#else
+static inline int register_fuseblk(void)
+{
+	return 0;
+}
+
+static inline void unregister_fuseblk(void)
+{
+}
+#endif
+
 static decl_subsys(fuse, NULL, NULL);
 static decl_subsys(connections, NULL, NULL);
 
@@ -673,7 +696,7 @@ static int __init fuse_fs_init(void)
 	if (err)
 		goto out;
 
-	err = register_filesystem(&fuseblk_fs_type);
+	err = register_fuseblk();
 	if (err)
 		goto out_unreg;
 
@@ -688,7 +711,7 @@ static int __init fuse_fs_init(void)
 	return 0;
 
  out_unreg2:
-	unregister_filesystem(&fuseblk_fs_type);
+	unregister_fuseblk();
  out_unreg:
 	unregister_filesystem(&fuse_fs_type);
  out:
@@ -698,7 +721,7 @@ static int __init fuse_fs_init(void)
 static void fuse_fs_cleanup(void)
 {
 	unregister_filesystem(&fuse_fs_type);
-	unregister_filesystem(&fuseblk_fs_type);
+	unregister_fuseblk();
 	kmem_cache_destroy(fuse_inode_cachep);
 }
 
-- 
cgit v1.2.2


From 593be07ae8f6f4a1b1b98813fabb155328f8bc0c Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Wed, 6 Dec 2006 20:36:01 -0800
Subject: [PATCH] file: kill unnecessary timer in fdtable_defer

free_fdtable_rc() schedules timer to reschedule fddef->wq if
schedule_work() on it returns 0.  However, schedule_work() guarantees that
the target work is executed at least once after the scheduling regardless
of its return value.  0 return simply means that the work was already
pending and thus no further action was required.

Another problem is that it used contant '5' as @expires argument to
mod_timer().

Kill unnecessary fddef->timer.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/file.c | 29 ++---------------------------
 1 file changed, 2 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 3787e82f54..51aef67547 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -21,7 +21,6 @@
 struct fdtable_defer {
 	spinlock_t lock;
 	struct work_struct wq;
-	struct timer_list timer;
 	struct fdtable *next;
 };
 
@@ -75,22 +74,6 @@ static void __free_fdtable(struct fdtable *fdt)
 	kfree(fdt);
 }
 
-static void fdtable_timer(unsigned long data)
-{
-	struct fdtable_defer *fddef = (struct fdtable_defer *)data;
-
-	spin_lock(&fddef->lock);
-	/*
-	 * If someone already emptied the queue return.
-	 */
-	if (!fddef->next)
-		goto out;
-	if (!schedule_work(&fddef->wq))
-		mod_timer(&fddef->timer, 5);
-out:
-	spin_unlock(&fddef->lock);
-}
-
 static void free_fdtable_work(struct work_struct *work)
 {
 	struct fdtable_defer *f =
@@ -144,13 +127,8 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
 		spin_lock(&fddef->lock);
 		fdt->next = fddef->next;
 		fddef->next = fdt;
-		/*
-		 * vmallocs are handled from the workqueue context.
-		 * If the per-cpu workqueue is running, then we
-		 * defer work scheduling through a timer.
-		 */
-		if (!schedule_work(&fddef->wq))
-			mod_timer(&fddef->timer, 5);
+		/* vmallocs are handled from the workqueue context */
+		schedule_work(&fddef->wq);
 		spin_unlock(&fddef->lock);
 		put_cpu_var(fdtable_defer_list);
 	}
@@ -354,9 +332,6 @@ static void __devinit fdtable_defer_list_init(int cpu)
 	struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
 	spin_lock_init(&fddef->lock);
 	INIT_WORK(&fddef->wq, free_fdtable_work);
-	init_timer(&fddef->timer);
-	fddef->timer.data = (unsigned long)fddef;
-	fddef->timer.function = fdtable_timer;
 	fddef->next = NULL;
 }
 
-- 
cgit v1.2.2


From 87b4126f10cce2d49687df227f6228fa5a9ac6c6 Mon Sep 17 00:00:00 2001
From: Suzuki K P <suzuki@in.ibm.com>
Date: Wed, 6 Dec 2006 20:36:10 -0800
Subject: [PATCH] fix reiserfs bad path release panic

One of our test team hit a reiserfs_panic while running fsstress tests on
2.6.19-rc1.  The message looks like :

  REISERFS: panic(device Null superblock):
  reiserfs[5676]: assertion !(p->path_length != 1 ) failed at
  fs/reiserfs/stree.c:397:reiserfs_check_path: path not properly relsed.

The backtrace looked :

  kernel BUG in reiserfs_panic at fs/reiserfs/prints.c:361!
	.reiserfs_check_path+0x58/0x74
	.reiserfs_get_block+0x1444/0x1508
	.__block_prepare_write+0x1c8/0x558
	.block_prepare_write+0x34/0x64
	.reiserfs_prepare_write+0x118/0x1d0
	.generic_file_buffered_write+0x314/0x82c
	.__generic_file_aio_write_nolock+0x350/0x3e0
	.__generic_file_write_nolock+0x78/0xb0
	.generic_file_write+0x60/0xf0
	.reiserfs_file_write+0x198/0x2038
	.vfs_write+0xd0/0x1b4
	.sys_write+0x4c/0x8c
	syscall_exit+0x0/0x4

Upon debugging I found that the restart_transaction was not releasing
the path if the th->refcount was > 1.

/*static*/
int restart_transaction(struct reiserfs_transaction_handle *th,
                           			struct inode *inode, struct path *path)
{
	[...]

         /* we cannot restart while nested */
         if (th->t_refcount > 1) { <<- Path is not released in this case!
                 return 0;
         }

         pathrelse(path); <<- Path released here.
	[...]

This could happen in such a situation :

In reiserfs/inode.c: reiserfs_get_block() ::

      if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
          /* restart the transaction to give the journal a chance to free
           ** some blocks.  releases the path, so we have to go back to
           ** research if we succeed on the second try
           */
          SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;

        -->>  retval = restart_transaction(th, inode, &path); <<--

  We are supposed to release the path, no matter we succeed or fail. But
if the th->refcount is > 1, the path is still valid. And,

          if (retval)
                   goto failure;
          repeat =
              _allocate_block(th, block, inode,
                             &allocated_block_nr, NULL, create);

If the above allocate_block fails with NO_DISK_SPACE or QUOTA_EXCEEDED,
we would have path which is not released.

         if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
                   goto research;
         }
         if (repeat == QUOTA_EXCEEDED)
                   retval = -EDQUOT;
         else
                   retval = -ENOSPC;
         goto failure;
	[...]

       failure:
	[...]
         reiserfs_check_path(&path); << Panics here !

Attached here is a patch which could fix the issue.

fix reiserfs/inode.c : restart_transaction() to release the path in all
cases.

The restart_transaction() doesn't release the path when the the journal
handle has a refcount > 1.  This would trigger a reiserfs_panic() if we
encounter an -ENOSPC / -EDQUOT in reiserfs_get_block().

Signed-off-by: Suzuki K P <suzuki@in.ibm.com>
Cc: "Vladimir V. Saveliev" <vs@namesys.com>
Cc: <reiserfs-dev@namesys.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/inode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9c69bcacad..a625688de4 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -216,11 +216,12 @@ static int file_capable(struct inode *inode, long block)
 	BUG_ON(!th->t_trans_id);
 	BUG_ON(!th->t_refcount);
 
+	pathrelse(path);
+
 	/* we cannot restart while nested */
 	if (th->t_refcount > 1) {
 		return 0;
 	}
-	pathrelse(path);
 	reiserfs_update_sd(th, inode);
 	err = journal_end(th, s, len);
 	if (!err) {
-- 
cgit v1.2.2


From ed2908f31398049c4371de9b100700e80704e95f Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 6 Dec 2006 20:36:16 -0800
Subject: [PATCH] Remove superfluous lock_super() in extN xattr code

lock_super() is unnecessary for setting super-block feature flags.  Use the
provided *_SET_COMPAT_FEATURE() macros as well.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/xattr.c | 5 +----
 fs/ext3/xattr.c | 5 +----
 fs/ext4/xattr.c | 5 +----
 3 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index af52a7f8b2..247efd0b51 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -342,12 +342,9 @@ static void ext2_xattr_update_super_block(struct super_block *sb)
 	if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR))
 		return;
 
-	lock_super(sb);
-	EXT2_SB(sb)->s_es->s_feature_compat |=
-		cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR);
+	EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR);
 	sb->s_dirt = 1;
 	mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
-	unlock_super(sb);
 }
 
 /*
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index f86f2482f0..99857a400f 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -459,14 +459,11 @@ static void ext3_xattr_update_super_block(handle_t *handle,
 	if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR))
 		return;
 
-	lock_super(sb);
 	if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) {
-		EXT3_SB(sb)->s_es->s_feature_compat |=
-			cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR);
+		EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR);
 		sb->s_dirt = 1;
 		ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
 	}
-	unlock_super(sb);
 }
 
 /*
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 63233cd946..dc969c357a 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -459,14 +459,11 @@ static void ext4_xattr_update_super_block(handle_t *handle,
 	if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR))
 		return;
 
-	lock_super(sb);
 	if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
-		EXT4_SB(sb)->s_es->s_feature_compat |=
-			cpu_to_le32(EXT4_FEATURE_COMPAT_EXT_ATTR);
+		EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
 		sb->s_dirt = 1;
 		ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
 	}
-	unlock_super(sb);
 }
 
 /*
-- 
cgit v1.2.2


From 59287c0913cc9a6c75712a775f6c1c1ef418ef3b Mon Sep 17 00:00:00 2001
From: Marcus Meissner <meissner@suse.de>
Date: Wed, 6 Dec 2006 20:36:24 -0800
Subject: [PATCH] binfmt_elf: randomize PIE binaries (2nd try)

Randomizes -pie compiled binaries from 64k (0x10000) up to ELF_ET_DYN_BASE.

0 -> 64k is excluded to allow NULL ptr accesses to fail.

Signed-off-by: Marcus Meissner <meissner@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index cc72bb4306..b2efbaead6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -856,7 +856,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 			 * default mmap base, as well as whatever program they
 			 * might try to exec.  This is because the brk will
 			 * follow the loader, and is not movable.  */
-			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
+			if (current->flags & PF_RANDOMIZE)
+				load_bias = randomize_range(0x10000,
+							    ELF_ET_DYN_BASE,
+							    0);
+			else
+				load_bias = ELF_ET_DYN_BASE;
+			load_bias = ELF_PAGESTART(load_bias - vaddr);
 		}
 
 		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
-- 
cgit v1.2.2


From 40b851348fe9bf49c26025b34261d25142269b60 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 6 Dec 2006 20:36:26 -0800
Subject: [PATCH] handle ext3 directory corruption better

I've been using Steve Grubb's purely evil "fsfuzzer" tool, at
http://people.redhat.com/sgrubb/files/fsfuzzer-0.4.tar.gz

Basically it makes a filesystem, splats some random bits over it, then
tries to mount it and do some simple filesystem actions.

At best, the filesystem catches the corruption gracefully.  At worst,
things spin out of control.

As you might guess, we found a couple places in ext3 where things spin out
of control :)

First, we had a corrupted directory that was never checked for
consistency...  it was corrupt, and pointed to another bad "entry" of
length 0.  The for() loop looped forever, since the length of
ext3_next_entry(de) was 0, and we kept looking at the same pointer over and
over and over and over...  I modeled this check and subsequent action on
what is done for other directory types in ext3_readdir...

(adding this check adds some computational expense; I am testing a followup
patch to reduce the number of times we check and re-check these directory
entries, in all cases.  Thanks for the idea, Andreas).

Next we had a root directory inode which had a corrupted size, claimed to
be > 200M on a 4M filesystem.  There was only really 1 block in the
directory, but because the size was so large, readdir kept coming back for
more, spewing thousands of printk's along the way.

Per Andreas' suggestion, if we're in this read error condition and we're
trying to read an offset which is greater than i_blocks worth of bytes,
stop trying, and break out of the loop.

With these two changes fsfuzz test survives quite well on ext3.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/dir.c   | 3 +++
 fs/ext3/namei.c | 9 +++++++++
 2 files changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index d0b54f30b9..5a9313ecd4 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -154,6 +154,9 @@ static int ext3_readdir(struct file * filp,
 			ext3_error (sb, "ext3_readdir",
 				"directory #%lu contains a hole at offset %lu",
 				inode->i_ino, (unsigned long)filp->f_pos);
+			/* corrupt size?  Maybe no more blocks to read */
+			if (filp->f_pos > inode->i_blocks << 9)
+				break;
 			filp->f_pos += sb->s_blocksize - offset;
 			continue;
 		}
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 906731a20f..60d2f9dbdb 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -552,6 +552,15 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 					   dir->i_sb->s_blocksize -
 					   EXT3_DIR_REC_LEN(0));
 	for (; de < top; de = ext3_next_entry(de)) {
+		if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
+					(block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
+						+((char *)de - bh->b_data))) {
+			/* On error, skip the f_pos to the next block. */
+			dir_file->f_pos = (dir_file->f_pos |
+					(dir->i_sb->s_blocksize - 1)) + 1;
+			brelse (bh);
+			return count;
+		}
 		ext3fs_dirhash(de->name, de->name_len, hinfo);
 		if ((hinfo->hash < start_hash) ||
 		    ((hinfo->hash == start_hash) &&
-- 
cgit v1.2.2


From e6c4021190c828d7fa24a464db589f86c6708341 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 6 Dec 2006 20:36:28 -0800
Subject: [PATCH] handle ext4 directory corruption better

I've been using Steve Grubb's purely evil "fsfuzzer" tool, at
http://people.redhat.com/sgrubb/files/fsfuzzer-0.4.tar.gz

Basically it makes a filesystem, splats some random bits over it, then
tries to mount it and do some simple filesystem actions.

At best, the filesystem catches the corruption gracefully.  At worst,
things spin out of control.

As you might guess, we found a couple places in ext4 where things spin out
of control :)

First, we had a corrupted directory that was never checked for
consistency...  it was corrupt, and pointed to another bad "entry" of
length 0.  The for() loop looped forever, since the length of
ext4_next_entry(de) was 0, and we kept looking at the same pointer over and
over and over and over...  I modeled this check and subsequent action on
what is done for other directory types in ext4_readdir...

(adding this check adds some computational expense; I am testing a followup
patch to reduce the number of times we check and re-check these directory
entries, in all cases.  Thanks for the idea, Andreas).

Next we had a root directory inode which had a corrupted size, claimed to
be > 200M on a 4M filesystem.  There was only really 1 block in the
directory, but because the size was so large, readdir kept coming back for
more, spewing thousands of printk's along the way.

Per Andreas' suggestion, if we're in this read error condition and we're
trying to read an offset which is greater than i_blocks worth of bytes,
stop trying, and break out of the loop.

With these two changes fsfuzz test survives quite well on ext4.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/dir.c   | 3 +++
 fs/ext4/namei.c | 9 +++++++++
 2 files changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index f8595787a7..f2ed3e7fb9 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -153,6 +153,9 @@ static int ext4_readdir(struct file * filp,
 			ext4_error (sb, "ext4_readdir",
 				"directory #%lu contains a hole at offset %lu",
 				inode->i_ino, (unsigned long)filp->f_pos);
+			/* corrupt size?  Maybe no more blocks to read */
+			if (filp->f_pos > inode->i_blocks << 9)
+				break;
 			filp->f_pos += sb->s_blocksize - offset;
 			continue;
 		}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 8b1bd03d20..859990eac5 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -552,6 +552,15 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 					   dir->i_sb->s_blocksize -
 					   EXT4_DIR_REC_LEN(0));
 	for (; de < top; de = ext4_next_entry(de)) {
+		if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
+					(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
+						+((char *)de - bh->b_data))) {
+			/* On error, skip the f_pos to the next block. */
+			dir_file->f_pos = (dir_file->f_pos |
+					(dir->i_sb->s_blocksize - 1)) + 1;
+			brelse (bh);
+			return count;
+		}
 		ext4fs_dirhash(de->name, de->name_len, hinfo);
 		if ((hinfo->hash < start_hash) ||
 		    ((hinfo->hash == start_hash) &&
-- 
cgit v1.2.2


From 736c4b8572ac24b1e6fd58d00872305a120ac700 Mon Sep 17 00:00:00 2001
From: Mika Kukkonen <mikukkon@gmail.com>
Date: Wed, 6 Dec 2006 20:36:29 -0800
Subject: [PATCH] Function v9fs_get_idpool returns int, not u32 as called twice
 in fs/9p/vfs_inode.c

Function v9fs_get_idpool returns int, not u32.  Actually it returns -1 on
errors, and these two callers check if the value is smaller than 0, which
was caught by gcc with extra warning flags.  Compile tested only but should
be OK, as the value computed in v9fs_get_idpool() is also int.

Signed-of-by: Mika Kukkonen <mikukkon@iki.fi>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Ron Minnich <rminnich@lanl.gov>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/vfs_inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 5241c600ce..18f26cdfd8 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -256,7 +256,7 @@ static int
 v9fs_create(struct v9fs_session_info *v9ses, u32 pfid, char *name, u32 perm,
 	u8 mode, char *extension, u32 *fidp, struct v9fs_qid *qid, u32 *iounit)
 {
-	u32 fid;
+	int fid;
 	int err;
 	struct v9fs_fcall *fcall;
 
@@ -310,7 +310,7 @@ static struct v9fs_fid*
 v9fs_clone_walk(struct v9fs_session_info *v9ses, u32 fid, struct dentry *dentry)
 {
 	int err;
-	u32 nfid;
+	int nfid;
 	struct v9fs_fid *ret;
 	struct v9fs_fcall *fcall;
 
-- 
cgit v1.2.2


From 841d5fb7c75260f76ae682648b28a3dca724940d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 6 Dec 2006 20:36:35 -0800
Subject: [PATCH] binfmt: fix uaccess handling

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b2efbaead6..68e20d5bfe 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -243,8 +243,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	if (interp_aout) {
 		argv = sp + 2;
 		envp = argv + argc + 1;
-		__put_user((elf_addr_t)(unsigned long)argv, sp++);
-		__put_user((elf_addr_t)(unsigned long)envp, sp++);
+		if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
+		    __put_user((elf_addr_t)(unsigned long)envp, sp++))
+			return -EFAULT;
 	} else {
 		argv = sp;
 		envp = argv + argc + 1;
@@ -254,7 +255,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	p = current->mm->arg_end = current->mm->arg_start;
 	while (argc-- > 0) {
 		size_t len;
-		__put_user((elf_addr_t)p, argv++);
+		if (__put_user((elf_addr_t)p, argv++))
+			return -EFAULT;
 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
 			return 0;
@@ -265,7 +267,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	current->mm->arg_end = current->mm->env_start = p;
 	while (envc-- > 0) {
 		size_t len;
-		__put_user((elf_addr_t)p, envp++);
+		if (__put_user((elf_addr_t)p, envp++))
+			return -EFAULT;
 		len = strnlen_user((void __user *)p, PAGE_SIZE*MAX_ARG_PAGES);
 		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
 			return 0;
-- 
cgit v1.2.2


From 7116e994b47f3988389be4ceee67dac64b56e0d0 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 6 Dec 2006 20:36:36 -0800
Subject: [PATCH] compat: fix uaccess handling

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat.c       | 35 +++++++++++++++++++----------------
 fs/compat_ioctl.c | 33 ++++++++++++++++++++-------------
 2 files changed, 39 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 7aef5412f4..a7e3f162fb 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1144,7 +1144,9 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
 	lastdirent = buf.previous;
 	if (lastdirent) {
 		typeof(lastdirent->d_off) d_off = file->f_pos;
-		__put_user_unaligned(d_off, &lastdirent->d_off);
+		error = -EFAULT;
+		if (__put_user_unaligned(d_off, &lastdirent->d_off))
+			goto out_putf;
 		error = count - buf.count;
 	}
 
@@ -1611,14 +1613,14 @@ int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
 		nr &= ~1UL;
 		while (nr) {
 			unsigned long h, l;
-			__get_user(l, ufdset);
-			__get_user(h, ufdset+1);
+			if (__get_user(l, ufdset) || __get_user(h, ufdset+1))
+				return -EFAULT;
 			ufdset += 2;
 			*fdset++ = h << 32 | l;
 			nr -= 2;
 		}
-		if (odd)
-			__get_user(*fdset, ufdset);
+		if (odd && __get_user(*fdset, ufdset))
+			return -EFAULT;
 	} else {
 		/* Tricky, must clear full unsigned long in the
 		 * kernel fdset at the end, this makes sure that
@@ -1630,14 +1632,14 @@ int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
 }
 
 static
-void compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
-			unsigned long *fdset)
+int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
+		      unsigned long *fdset)
 {
 	unsigned long odd;
 	nr = ROUND_UP(nr, __COMPAT_NFDBITS);
 
 	if (!ufdset)
-		return;
+		return 0;
 
 	odd = nr & 1UL;
 	nr &= ~1UL;
@@ -1645,13 +1647,14 @@ void compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
 		unsigned long h, l;
 		l = *fdset++;
 		h = l >> 32;
-		__put_user(l, ufdset);
-		__put_user(h, ufdset+1);
+		if (__put_user(l, ufdset) || __put_user(h, ufdset+1))
+			return -EFAULT;
 		ufdset += 2;
 		nr -= 2;
 	}
-	if (odd)
-		__put_user(*fdset, ufdset);
+	if (odd && __put_user(*fdset, ufdset))
+		return -EFAULT;
+	return 0;
 }
 
 
@@ -1726,10 +1729,10 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
 		ret = 0;
 	}
 
-	compat_set_fd_set(n, inp, fds.res_in);
-	compat_set_fd_set(n, outp, fds.res_out);
-	compat_set_fd_set(n, exp, fds.res_ex);
-
+	if (compat_set_fd_set(n, inp, fds.res_in) ||
+	    compat_set_fd_set(n, outp, fds.res_out) ||
+	    compat_set_fd_set(n, exp, fds.res_ex))
+		ret = -EFAULT;
 out:
 	kfree(bits);
 out_nofds:
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a91f2628c9..6e31776855 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -211,8 +211,10 @@ static int do_video_stillpicture(unsigned int fd, unsigned int cmd, unsigned lon
 	up_native =
 		compat_alloc_user_space(sizeof(struct video_still_picture));
 
-	put_user(compat_ptr(fp), &up_native->iFrame);
-	put_user(size, &up_native->size);
+	err =  put_user(compat_ptr(fp), &up_native->iFrame);
+	err |= put_user(size, &up_native->size);
+	if (err)
+		return -EFAULT;
 
 	err = sys_ioctl(fd, cmd, (unsigned long) up_native);
 
@@ -236,8 +238,10 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd, unsigned
 	err |= get_user(length, &up->length);
 
 	up_native = compat_alloc_user_space(sizeof(struct video_spu_palette));
-	put_user(compat_ptr(palp), &up_native->palette);
-	put_user(length, &up_native->length);
+	err  = put_user(compat_ptr(palp), &up_native->palette);
+	err |= put_user(length, &up_native->length);
+	if (err)
+		return -EFAULT;
 
 	err = sys_ioctl(fd, cmd, (unsigned long) up_native);
 
@@ -2043,16 +2047,19 @@ static int serial_struct_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
         struct serial_struct ss;
         mm_segment_t oldseg = get_fs();
         __u32 udata;
+	unsigned int base;
 
         if (cmd == TIOCSSERIAL) {
                 if (!access_ok(VERIFY_READ, ss32, sizeof(SS32)))
                         return -EFAULT;
                 if (__copy_from_user(&ss, ss32, offsetof(SS32, iomem_base)))
 			return -EFAULT;
-                __get_user(udata, &ss32->iomem_base);
+                if (__get_user(udata, &ss32->iomem_base))
+			return -EFAULT;
                 ss.iomem_base = compat_ptr(udata);
-                __get_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift);
-                __get_user(ss.port_high, &ss32->port_high);
+                if (__get_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) ||
+		    __get_user(ss.port_high, &ss32->port_high))
+			return -EFAULT;
                 ss.iomap_base = 0UL;
         }
         set_fs(KERNEL_DS);
@@ -2063,12 +2070,12 @@ static int serial_struct_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
                         return -EFAULT;
                 if (__copy_to_user(ss32,&ss,offsetof(SS32,iomem_base)))
 			return -EFAULT;
-                __put_user((unsigned long)ss.iomem_base  >> 32 ?
-                            0xffffffff : (unsigned)(unsigned long)ss.iomem_base,
-                            &ss32->iomem_base);
-                __put_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift);
-                __put_user(ss.port_high, &ss32->port_high);
-
+		base = (unsigned long)ss.iomem_base  >> 32 ?
+			0xffffffff : (unsigned)(unsigned long)ss.iomem_base;
+		if (__put_user(base, &ss32->iomem_base) ||
+		    __put_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) ||
+		    __put_user(ss.port_high, &ss32->port_high))
+			return -EFAULT;
         }
         return err;
 }
-- 
cgit v1.2.2


From 4a6e617a4bec9fb2ee4a16cf59565b2af5049e12 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 6 Dec 2006 20:37:04 -0800
Subject: [PATCH] fs/*: trivial vsnprintf() conversion

It would very lame to get buffer overflow via one of the following.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Dave Kleikamp <shaggy@austin.ibm.com>
Cc: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/adfs/super.c    | 2 +-
 fs/affs/amigaffs.c | 4 ++--
 fs/jfs/super.c     | 2 +-
 fs/ocfs2/super.c   | 4 ++--
 fs/udf/super.c     | 4 ++--
 fs/ufs/super.c     | 6 +++---
 6 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index e5a205c9f4..5023351a7a 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -36,7 +36,7 @@ void __adfs_error(struct super_block *sb, const char *function, const char *fmt,
 	va_list args;
 
 	va_start(args, fmt);
-	vsprintf(error_buf, fmt, args);
+	vsnprintf(error_buf, sizeof(error_buf), fmt, args);
 	va_end(args);
 
 	printk(KERN_CRIT "ADFS-fs error (device %s)%s%s: %s\n",
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index ccd624ef42..f4de4b9800 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -445,7 +445,7 @@ affs_error(struct super_block *sb, const char *function, const char *fmt, ...)
 	va_list	 args;
 
 	va_start(args,fmt);
-	vsprintf(ErrorBuffer,fmt,args);
+	vsnprintf(ErrorBuffer,sizeof(ErrorBuffer),fmt,args);
 	va_end(args);
 
 	printk(KERN_CRIT "AFFS error (device %s): %s(): %s\n", sb->s_id,
@@ -461,7 +461,7 @@ affs_warning(struct super_block *sb, const char *function, const char *fmt, ...)
 	va_list	 args;
 
 	va_start(args,fmt);
-	vsprintf(ErrorBuffer,fmt,args);
+	vsnprintf(ErrorBuffer,sizeof(ErrorBuffer),fmt,args);
 	va_end(args);
 
 	printk(KERN_WARNING "AFFS warning (device %s): %s(): %s\n", sb->s_id,
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index ca3e191285..846ac8f345 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -93,7 +93,7 @@ void jfs_error(struct super_block *sb, const char * function, ...)
 	va_list args;
 
 	va_start(args, function);
-	vsprintf(error_buf, function, args);
+	vsnprintf(error_buf, sizeof(error_buf), function, args);
 	va_end(args);
 
 	printk(KERN_ERR "ERROR: (device %s): %s\n", sb->s_id, error_buf);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0524f8a04a..4bf39540e6 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1674,7 +1674,7 @@ void __ocfs2_error(struct super_block *sb,
 	va_list args;
 
 	va_start(args, fmt);
-	vsprintf(error_buf, fmt, args);
+	vsnprintf(error_buf, sizeof(error_buf), fmt, args);
 	va_end(args);
 
 	/* Not using mlog here because we want to show the actual
@@ -1695,7 +1695,7 @@ void __ocfs2_abort(struct super_block* sb,
 	va_list args;
 
 	va_start(args, fmt);
-	vsprintf(error_buf, fmt, args);
+	vsnprintf(error_buf, sizeof(error_buf), fmt, args);
 	va_end(args);
 
 	printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n",
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 397f54aaf6..1dbc2955f0 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1709,7 +1709,7 @@ void udf_error(struct super_block *sb, const char *function,
 		sb->s_dirt = 1;
 	}
 	va_start(args, fmt);
-	vsprintf(error_buf, fmt, args);
+	vsnprintf(error_buf, sizeof(error_buf), fmt, args);
 	va_end(args);
 	printk (KERN_CRIT "UDF-fs error (device %s): %s: %s\n",
 		sb->s_id, function, error_buf);
@@ -1721,7 +1721,7 @@ void udf_warning(struct super_block *sb, const char *function,
 	va_list args;
 
 	va_start (args, fmt);
-	vsprintf(error_buf, fmt, args);
+	vsnprintf(error_buf, sizeof(error_buf), fmt, args);
 	va_end(args);
 	printk(KERN_WARNING "UDF-fs warning (device %s): %s: %s\n",
 		sb->s_id, function, error_buf);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 0b18d2cc2e..8a8e9382ec 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -224,7 +224,7 @@ void ufs_error (struct super_block * sb, const char * function,
 		sb->s_flags |= MS_RDONLY;
 	}
 	va_start (args, fmt);
-	vsprintf (error_buf, fmt, args);
+	vsnprintf (error_buf, sizeof(error_buf), fmt, args);
 	va_end (args);
 	switch (UFS_SB(sb)->s_mount_opt & UFS_MOUNT_ONERROR) {
 	case UFS_MOUNT_ONERROR_PANIC:
@@ -255,7 +255,7 @@ void ufs_panic (struct super_block * sb, const char * function,
 		sb->s_dirt = 1;
 	}
 	va_start (args, fmt);
-	vsprintf (error_buf, fmt, args);
+	vsnprintf (error_buf, sizeof(error_buf), fmt, args);
 	va_end (args);
 	sb->s_flags |= MS_RDONLY;
 	printk (KERN_CRIT "UFS-fs panic (device %s): %s: %s\n",
@@ -268,7 +268,7 @@ void ufs_warning (struct super_block * sb, const char * function,
 	va_list args;
 
 	va_start (args, fmt);
-	vsprintf (error_buf, fmt, args);
+	vsnprintf (error_buf, sizeof(error_buf), fmt, args);
 	va_end (args);
 	printk (KERN_WARNING "UFS-fs warning (device %s): %s: %s\n",
 		sb->s_id, function, error_buf);
-- 
cgit v1.2.2


From 352d94d040053d93bf1cf4acb4be9635e69d9200 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 6 Dec 2006 20:37:04 -0800
Subject: [PATCH] hpfs: bring hpfs_error() into shape

 - switch to error message buffer in .bss
 - missing va_end() (htf it worked before?)
 - use vsnprintf()
 - rename variables to understandable "fmt", "args".
 - "const char *fmt", yes.
 - add __attribute__((format ...

Still, put that coffee down before reading more.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hpfs/hpfs_fn.h |  3 ++-
 fs/hpfs/super.c   | 23 +++++++++--------------
 2 files changed, 11 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 32ab51e42b..1c07aa82d3 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -317,7 +317,8 @@ static inline struct hpfs_sb_info *hpfs_sb(struct super_block *sb)
 
 /* super.c */
 
-void hpfs_error(struct super_block *, char *, ...);
+void hpfs_error(struct super_block *, const char *, ...)
+	__attribute__((format (printf, 2, 3)));
 int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *);
 unsigned hpfs_count_one_bitmap(struct super_block *, secno);
 
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 34d68e2117..d4abc1a1d5 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -46,21 +46,17 @@ static void unmark_dirty(struct super_block *s)
 }
 
 /* Filesystem error... */
+static char err_buf[1024];
 
-#define ERR_BUF_SIZE 1024
-
-void hpfs_error(struct super_block *s, char *m,...)
+void hpfs_error(struct super_block *s, const char *fmt, ...)
 {
-	char *buf;
-	va_list l;
-	va_start(l, m);
-	if (!(buf = kmalloc(ERR_BUF_SIZE, GFP_KERNEL)))
-		printk("HPFS: No memory for error message '%s'\n",m);
-	else if (vsprintf(buf, m, l) >= ERR_BUF_SIZE)
-		printk("HPFS: Grrrr... Kernel memory corrupted ... going on, but it'll crash very soon :-(\n");
-	printk("HPFS: filesystem error: ");
-	if (buf) printk("%s", buf);
-	else printk("%s\n",m);
+	va_list args;
+
+	va_start(args, fmt);
+	vsnprintf(err_buf, sizeof(err_buf), fmt, args);
+	va_end(args);
+
+	printk("HPFS: filesystem error: %s", err_buf);
 	if (!hpfs_sb(s)->sb_was_error) {
 		if (hpfs_sb(s)->sb_err == 2) {
 			printk("; crashing the system because you wanted it\n");
@@ -76,7 +72,6 @@ void hpfs_error(struct super_block *s, char *m,...)
 		} else if (s->s_flags & MS_RDONLY) printk("; going on - but anything won't be destroyed because it's read-only\n");
 		else printk("; corrupted filesystem mounted read/write - your computer will explode within 20 seconds ... but you wanted it so!\n");
 	} else printk("\n");
-	kfree(buf);
 	hpfs_sb(s)->sb_was_error = 1;
 }
 
-- 
cgit v1.2.2


From 18debbbcce1306f0bbb1c71cf587fd90413acab6 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 6 Dec 2006 20:37:05 -0800
Subject: [PATCH] hpfs: fix printk format warnings

Fix hpfs printk warnings:

  fs/hpfs/dir.c:87: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'long unsigned int'
  fs/hpfs/dir.c:147: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'long int'
  fs/hpfs/dir.c:148: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'long int'
  fs/hpfs/dnode.c:537: warning: format '%08x' expects type 'unsigned int', but argument 5 has type 'long unsigned int'
  fs/hpfs/dnode.c:854: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'loff_t'
  fs/hpfs/ea.c:247: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'long unsigned int'
  fs/hpfs/inode.c:254: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'long unsigned int'
  fs/hpfs/map.c:129: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'ino_t'
  fs/hpfs/map.c:135: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'ino_t'
  fs/hpfs/map.c:140: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'ino_t'
  fs/hpfs/map.c:147: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'ino_t'
  fs/hpfs/map.c:154: warning: format '%08x' expects type 'unsigned int', but argument 3 has type 'ino_t'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hpfs/dir.c   | 10 +++++++---
 fs/hpfs/dnode.c | 13 +++++++++----
 fs/hpfs/ea.c    |  5 +++--
 fs/hpfs/inode.c |  5 ++++-
 fs/hpfs/map.c   | 20 ++++++++++++++------
 5 files changed, 37 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index ecc9180645..594f9c428f 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -84,7 +84,8 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		}
 		if (!fno->dirflag) {
 			e = 1;
-			hpfs_error(inode->i_sb, "not a directory, fnode %08x",inode->i_ino);
+			hpfs_error(inode->i_sb, "not a directory, fnode %08lx",
+					(unsigned long)inode->i_ino);
 		}
 		if (hpfs_inode->i_dno != fno->u.external[0].disk_secno) {
 			e = 1;
@@ -144,8 +145,11 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		}
 		if (de->first || de->last) {
 			if (hpfs_sb(inode->i_sb)->sb_chk) {
-				if (de->first && !de->last && (de->namelen != 2 || de ->name[0] != 1 || de->name[1] != 1)) hpfs_error(inode->i_sb, "hpfs_readdir: bad ^A^A entry; pos = %08x", old_pos);
-				if (de->last && (de->namelen != 1 || de ->name[0] != 255)) hpfs_error(inode->i_sb, "hpfs_readdir: bad \\377 entry; pos = %08x", old_pos);
+				if (de->first && !de->last && (de->namelen != 2
+				    || de ->name[0] != 1 || de->name[1] != 1))
+					hpfs_error(inode->i_sb, "hpfs_readdir: bad ^A^A entry; pos = %08lx", old_pos);
+				if (de->last && (de->namelen != 1 || de ->name[0] != 255))
+					hpfs_error(inode->i_sb, "hpfs_readdir: bad \\377 entry; pos = %08lx", old_pos);
 			}
 			hpfs_brelse4(&qbh);
 			goto again;
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index 229ff2fb18..fe83c2b7d2 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -533,10 +533,13 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
 			struct buffer_head *bh;
 			struct dnode *d1;
 			struct quad_buffer_head qbh1;
-			if (hpfs_sb(i->i_sb)->sb_chk) if (up != i->i_ino) {
-				hpfs_error(i->i_sb, "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08x", dno, up, i->i_ino);
+			if (hpfs_sb(i->i_sb)->sb_chk)
+			    if (up != i->i_ino) {
+				hpfs_error(i->i_sb,
+					"bad pointer to fnode, dnode %08x, pointing to %08x, should be %08lx",
+					dno, up, (unsigned long)i->i_ino);
 				return;
-			}
+			    }
 			if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
 				d1->up = up;
 				d1->root_dnode = 1;
@@ -851,7 +854,9 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
 	/* Going to the next dirent */
 	if ((d = de_next_de(de)) < dnode_end_de(dnode)) {
 		if (!(++*posp & 077)) {
-			hpfs_error(inode->i_sb, "map_pos_dirent: pos crossed dnode boundary; pos = %08x", *posp);
+			hpfs_error(inode->i_sb,
+				"map_pos_dirent: pos crossed dnode boundary; pos = %08llx",
+				(unsigned long long)*posp);
 			goto bail;
 		}
 		/* We're going down the tree */
diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c
index 66339dc030..547a838457 100644
--- a/fs/hpfs/ea.c
+++ b/fs/hpfs/ea.c
@@ -243,8 +243,9 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, char *key, char *data
 		fnode->ea_offs = 0xc4;
 	}
 	if (fnode->ea_offs < 0xc4 || fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200) {
-		hpfs_error(s, "fnode %08x: ea_offs == %03x, ea_size_s == %03x",
-			inode->i_ino, fnode->ea_offs, fnode->ea_size_s);
+		hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x",
+			(unsigned long)inode->i_ino,
+			fnode->ea_offs, fnode->ea_size_s);
 		return;
 	}
 	if ((fnode->ea_size_s || !fnode->ea_size_l) &&
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 7faef8544f..85d3e1d9ac 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -251,7 +251,10 @@ void hpfs_write_inode_nolock(struct inode *i)
 			de->file_size = 0;
 			hpfs_mark_4buffers_dirty(&qbh);
 			hpfs_brelse4(&qbh);
-		} else hpfs_error(i->i_sb, "directory %08x doesn't have '.' entry", i->i_ino);
+		} else
+			hpfs_error(i->i_sb,
+				"directory %08lx doesn't have '.' entry",
+				(unsigned long)i->i_ino);
 	}
 	mark_buffer_dirty(bh);
 	brelse(bh);
diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c
index 0fecdac22e..c4724589b2 100644
--- a/fs/hpfs/map.c
+++ b/fs/hpfs/map.c
@@ -126,32 +126,40 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
 			struct extended_attribute *ea;
 			struct extended_attribute *ea_end;
 			if (fnode->magic != FNODE_MAGIC) {
-				hpfs_error(s, "bad magic on fnode %08x", ino);
+				hpfs_error(s, "bad magic on fnode %08lx",
+					(unsigned long)ino);
 				goto bail;
 			}
 			if (!fnode->dirflag) {
 				if ((unsigned)fnode->btree.n_used_nodes + (unsigned)fnode->btree.n_free_nodes !=
 				    (fnode->btree.internal ? 12 : 8)) {
-					hpfs_error(s, "bad number of nodes in fnode %08x", ino);
+					hpfs_error(s,
+					   "bad number of nodes in fnode %08lx",
+					    (unsigned long)ino);
 					goto bail;
 				}
 				if (fnode->btree.first_free !=
 				    8 + fnode->btree.n_used_nodes * (fnode->btree.internal ? 8 : 12)) {
-					hpfs_error(s, "bad first_free pointer in fnode %08x", ino);
+					hpfs_error(s,
+					    "bad first_free pointer in fnode %08lx",
+					    (unsigned long)ino);
 					goto bail;
 				}
 			}
 			if (fnode->ea_size_s && ((signed int)fnode->ea_offs < 0xc4 ||
 			   (signed int)fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200)) {
-				hpfs_error(s, "bad EA info in fnode %08x: ea_offs == %04x ea_size_s == %04x",
-					ino, fnode->ea_offs, fnode->ea_size_s);
+				hpfs_error(s,
+					"bad EA info in fnode %08lx: ea_offs == %04x ea_size_s == %04x",
+					(unsigned long)ino,
+					fnode->ea_offs, fnode->ea_size_s);
 				goto bail;
 			}
 			ea = fnode_ea(fnode);
 			ea_end = fnode_end_ea(fnode);
 			while (ea != ea_end) {
 				if (ea > ea_end) {
-					hpfs_error(s, "bad EA in fnode %08x", ino);
+					hpfs_error(s, "bad EA in fnode %08lx",
+						(unsigned long)ino);
 					goto bail;
 				}
 				ea = next_ea(ea);
-- 
cgit v1.2.2


From dc168427e6250a5a24c59f34afed6538092dab42 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@sw.ru>
Date: Wed, 6 Dec 2006 20:37:07 -0800
Subject: [PATCH] VFS: extra check inside dentry_unhash()

d_count check after dget() is always true.

Signed-off-by:	Vasily Averin <vvs@sw.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 61f99c1967..db1bca26d8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1998,8 +1998,7 @@ asmlinkage long sys_mkdir(const char __user *pathname, int mode)
 void dentry_unhash(struct dentry *dentry)
 {
 	dget(dentry);
-	if (atomic_read(&dentry->d_count))
-		shrink_dcache_parent(dentry);
+	shrink_dcache_parent(dentry);
 	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
 	if (atomic_read(&dentry->d_count) == 2)
-- 
cgit v1.2.2


From 072330584404392dae44cd0793ac9b316cff045b Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 6 Dec 2006 20:37:12 -0800
Subject: [PATCH] vfs_getattr(): remove dead code

As Mikulas points out, (1 << anything) won't be evaluating to zero.  This code
is long-dead.

Cc: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/stat.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'fs')

diff --git a/fs/stat.c b/fs/stat.c
index bca07eb200..a0ebfc7f8a 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -51,13 +51,6 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 		return inode->i_op->getattr(mnt, dentry, stat);
 
 	generic_fillattr(inode, stat);
-	if (!stat->blksize) {
-		struct super_block *s = inode->i_sb;
-		unsigned blocks;
-		blocks = (stat->size+s->s_blocksize-1) >> s->s_blocksize_bits;
-		stat->blocks = (s->s_blocksize / 512) * blocks;
-		stat->blksize = s->s_blocksize;
-	}
 	return 0;
 }
 
-- 
cgit v1.2.2


From 3a229b39eb8497ae5f8077f81f7c8c3e1aacd624 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 6 Dec 2006 20:37:14 -0800
Subject: [PATCH] ext3: uninline large functions

Saves nearly 4kbytes on x86.

Cc: Arnaldo Carvalho de Melo <acme@mandriva.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/Makefile   |  2 +-
 fs/ext3/ext3_jbd.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 fs/ext3/ext3_jbd.c

(limited to 'fs')

diff --git a/fs/ext3/Makefile b/fs/ext3/Makefile
index 704cd44a40..e77766a8b3 100644
--- a/fs/ext3/Makefile
+++ b/fs/ext3/Makefile
@@ -5,7 +5,7 @@
 obj-$(CONFIG_EXT3_FS) += ext3.o
 
 ext3-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-	   ioctl.o namei.o super.o symlink.o hash.o resize.o
+	   ioctl.o namei.o super.o symlink.o hash.o resize.o ext3_jbd.o
 
 ext3-$(CONFIG_EXT3_FS_XATTR)	 += xattr.o xattr_user.o xattr_trusted.o
 ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
diff --git a/fs/ext3/ext3_jbd.c b/fs/ext3/ext3_jbd.c
new file mode 100644
index 0000000000..e1f91fd26a
--- /dev/null
+++ b/fs/ext3/ext3_jbd.c
@@ -0,0 +1,59 @@
+/*
+ * Interface between ext3 and JBD
+ */
+
+#include <linux/ext3_jbd.h>
+
+int __ext3_journal_get_undo_access(const char *where, handle_t *handle,
+				struct buffer_head *bh)
+{
+	int err = journal_get_undo_access(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext3_journal_get_write_access(const char *where, handle_t *handle,
+				struct buffer_head *bh)
+{
+	int err = journal_get_write_access(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext3_journal_forget(const char *where, handle_t *handle,
+				struct buffer_head *bh)
+{
+	int err = journal_forget(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext3_journal_revoke(const char *where, handle_t *handle,
+				unsigned long blocknr, struct buffer_head *bh)
+{
+	int err = journal_revoke(handle, blocknr, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext3_journal_get_create_access(const char *where,
+				handle_t *handle, struct buffer_head *bh)
+{
+	int err = journal_get_create_access(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext3_journal_dirty_metadata(const char *where,
+				handle_t *handle, struct buffer_head *bh)
+{
+	int err = journal_dirty_metadata(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
-- 
cgit v1.2.2


From 8984d137df669a6e94dbce7b87095e4ce80b9e67 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 6 Dec 2006 20:37:15 -0800
Subject: [PATCH] ext4: uninline large functions

Saves nearly 4kbytes on x86.

Cc: Arnaldo Carvalho de Melo <acme@mandriva.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/Makefile    |  3 ++-
 fs/ext4/ext4_jbd2.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+), 1 deletion(-)
 create mode 100644 fs/ext4/ext4_jbd2.c

(limited to 'fs')

diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index a6acb96ebe..ae6e7e502a 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -5,7 +5,8 @@
 obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
 
 ext4dev-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-	   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o
+		   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
+		   ext4_jbd2.o
 
 ext4dev-$(CONFIG_EXT4DEV_FS_XATTR)	+= xattr.o xattr_user.o xattr_trusted.o
 ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL)	+= acl.o
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
new file mode 100644
index 0000000000..d6afe4e273
--- /dev/null
+++ b/fs/ext4/ext4_jbd2.c
@@ -0,0 +1,59 @@
+/*
+ * Interface between ext4 and JBD
+ */
+
+#include <linux/ext4_jbd2.h>
+
+int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
+				struct buffer_head *bh)
+{
+	int err = jbd2_journal_get_undo_access(handle, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext4_journal_get_write_access(const char *where, handle_t *handle,
+				struct buffer_head *bh)
+{
+	int err = jbd2_journal_get_write_access(handle, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext4_journal_forget(const char *where, handle_t *handle,
+				struct buffer_head *bh)
+{
+	int err = jbd2_journal_forget(handle, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext4_journal_revoke(const char *where, handle_t *handle,
+				ext4_fsblk_t blocknr, struct buffer_head *bh)
+{
+	int err = jbd2_journal_revoke(handle, blocknr, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext4_journal_get_create_access(const char *where,
+				handle_t *handle, struct buffer_head *bh)
+{
+	int err = jbd2_journal_get_create_access(handle, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+int __ext4_journal_dirty_metadata(const char *where,
+				handle_t *handle, struct buffer_head *bh)
+{
+	int err = jbd2_journal_dirty_metadata(handle, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
-- 
cgit v1.2.2


From 8bb0269160df2a60764013994d0bc5165406cf4a Mon Sep 17 00:00:00 2001
From: Phillip Lougher <phillip@lougher.org.uk>
Date: Wed, 6 Dec 2006 20:37:20 -0800
Subject: [PATCH] corrupted cramfs filesystems cause kernel oops

Steve Grubb's fzfuzzer tool (http://people.redhat.com/sgrubb/files/
fsfuzzer-0.6.tar.gz) generates corrupt Cramfs filesystems which cause
Cramfs to kernel oops in cramfs_uncompress_block().  The cause of the oops
is an unchecked corrupted block length field read by cramfs_readpage().

This patch adds a sanity check to cramfs_readpage() which checks that the
block length field is sensible.  The (PAGE_CACHE_SIZE << 1) size check is
intentional, even though the uncompressed data is not going to be larger
than PAGE_CACHE_SIZE, gzip sometimes generates compressed data larger than
the original source data.  Mkcramfs checks that the compressed size is
always less than or equal to PAGE_CACHE_SIZE << 1.  Of course Cramfs could
use the original uncompressed data in this case, but it doesn't.

Signed-off-by: Phillip Lougher <phillip@lougher.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cramfs/inode.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index a624c3ec81..0509cedd41 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -481,6 +481,8 @@ static int cramfs_readpage(struct file *file, struct page * page)
 		pgdata = kmap(page);
 		if (compr_len == 0)
 			; /* hole */
+		else if (compr_len > (PAGE_CACHE_SIZE << 1))
+			printk(KERN_ERR "cramfs: bad compressed blocksize %u\n", compr_len);
 		else {
 			mutex_lock(&read_mutex);
 			bytes_filled = cramfs_uncompress_block(pgdata,
-- 
cgit v1.2.2


From 6fb50ea79cb869667adaa71ed32cc15dd73986de Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:37:25 -0800
Subject: [PATCH] ext4_ext_split(): remove dead code

The Coverity checker noted that this was dead code, since in all places
above in this function, "err" is immediately checked.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2608dce18f..1442ccbaea 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -800,9 +800,6 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	}
 
 	/* insert new index */
-	if (err)
-		goto cleanup;
-
 	err = ext4_ext_insert_index(handle, inode, path + at,
 				    le32_to_cpu(border), newblock);
 
-- 
cgit v1.2.2


From 93f210dd9e614ddab7ecef0b4c9ba6ad3720d860 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 6 Dec 2006 20:37:33 -0800
Subject: [PATCH] protect ext2 ioctl modifying append_only immutable etc with
 i_mutex

Port commit a090d9132c1e53e3517111123680c15afb25c0a4 into ext2:

All modifications of ->i_flags in inodes that might be visible to somebody
else must be under ->i_mutex.  That patch fixes ext2 ioctl() setting S_APPEND.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/ioctl.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 1dfba77eab..e3cf8c8150 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -44,6 +44,7 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		if (!S_ISDIR(inode->i_mode))
 			flags &= ~EXT2_DIRSYNC_FL;
 
+		mutex_lock(&inode->i_mutex);
 		oldflags = ei->i_flags;
 
 		/*
@@ -53,13 +54,16 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		 * This test looks nicer. Thanks to Pauline Middelink
 		 */
 		if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
-			if (!capable(CAP_LINUX_IMMUTABLE))
+			if (!capable(CAP_LINUX_IMMUTABLE)) {
+				mutex_unlock(&inode->i_mutex);
 				return -EPERM;
+			}
 		}
 
 		flags = flags & EXT2_FL_USER_MODIFIABLE;
 		flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
 		ei->i_flags = flags;
+		mutex_unlock(&inode->i_mutex);
 
 		ext2_set_inode_flags(inode);
 		inode->i_ctime = CURRENT_TIME_SEC;
-- 
cgit v1.2.2


From e92a4d595b464c4aae64be39ca61a9ffe9c8b278 Mon Sep 17 00:00:00 2001
From: Andrey Savochkin <saw@sw.ru>
Date: Wed, 6 Dec 2006 20:37:34 -0800
Subject: [PATCH] retries in ext3_prepare_write() violate ordering requirements

In journal=ordered or journal=data mode retry in ext3_prepare_write()
breaks the requirements of journaling of data with respect to metadata.
The fix is to call commit_write to commit allocated zero blocks before
retry.

Signed-off-by: Kirill Korotaev <dev@openvz.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ken Chen <kenneth.w.chen@intel.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/inode.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 75 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 03ba5bcab1..beaf25f511 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1148,37 +1148,102 @@ static int do_journal_get_write_access(handle_t *handle,
 	return ext3_journal_get_write_access(handle, bh);
 }
 
+/*
+ * The idea of this helper function is following:
+ * if prepare_write has allocated some blocks, but not all of them, the
+ * transaction must include the content of the newly allocated blocks.
+ * This content is expected to be set to zeroes by block_prepare_write().
+ * 2006/10/14  SAW
+ */
+static int ext3_prepare_failure(struct file *file, struct page *page,
+				unsigned from, unsigned to)
+{
+	struct address_space *mapping;
+	struct buffer_head *bh, *head, *next;
+	unsigned block_start, block_end;
+	unsigned blocksize;
+	int ret;
+	handle_t *handle = ext3_journal_current_handle();
+
+	mapping = page->mapping;
+	if (ext3_should_writeback_data(mapping->host)) {
+		/* optimization: no constraints about data */
+skip:
+		return ext3_journal_stop(handle);
+	}
+
+	head = page_buffers(page);
+	blocksize = head->b_size;
+	for (	bh = head, block_start = 0;
+		bh != head || !block_start;
+	    	block_start = block_end, bh = next)
+	{
+		next = bh->b_this_page;
+		block_end = block_start + blocksize;
+		if (block_end <= from)
+			continue;
+		if (block_start >= to) {
+			block_start = to;
+			break;
+		}
+		if (!buffer_mapped(bh))
+		/* prepare_write failed on this bh */
+			break;
+		if (ext3_should_journal_data(mapping->host)) {
+			ret = do_journal_get_write_access(handle, bh);
+			if (ret) {
+				ext3_journal_stop(handle);
+				return ret;
+			}
+		}
+	/*
+	 * block_start here becomes the first block where the current iteration
+	 * of prepare_write failed.
+	 */
+	}
+	if (block_start <= from)
+		goto skip;
+
+	/* commit allocated and zeroed buffers */
+	return mapping->a_ops->commit_write(file, page, from, block_start);
+}
+
 static int ext3_prepare_write(struct file *file, struct page *page,
 			      unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
-	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
+	int ret, ret2;
+	int needed_blocks = ext3_writepage_trans_blocks(inode);
 	handle_t *handle;
 	int retries = 0;
 
 retry:
 	handle = ext3_journal_start(inode, needed_blocks);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		goto out;
-	}
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
 	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
 		ret = nobh_prepare_write(page, from, to, ext3_get_block);
 	else
 		ret = block_prepare_write(page, from, to, ext3_get_block);
 	if (ret)
-		goto prepare_write_failed;
+		goto failure;
 
 	if (ext3_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
 				from, to, NULL, do_journal_get_write_access);
+		if (ret)
+			/* fatal error, just put the handle and return */
+			journal_stop(handle);
 	}
-prepare_write_failed:
-	if (ret)
-		ext3_journal_stop(handle);
+	return ret;
+
+failure:
+	ret2 = ext3_prepare_failure(file, page, from, to);
+	if (ret2 < 0)
+		return ret2;
 	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
-out:
+	/* retry number exceeded, or other error like -EDQUOT */
 	return ret;
 }
 
-- 
cgit v1.2.2


From b46be05004abb419e303e66e143eed9f8a6e9f3f Mon Sep 17 00:00:00 2001
From: Andrey Savochkin <saw@sw.ru>
Date: Wed, 6 Dec 2006 20:37:36 -0800
Subject: [PATCH] retries in ext4_prepare_write() violate ordering requirements

In journal=ordered or journal=data mode retry in ext4_prepare_write()
breaks the requirements of journaling of data with respect to metadata.
The fix is to call commit_write to commit allocated zero blocks before
retry.

Signed-off-by: Kirill Korotaev <dev@openvz.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ken Chen <kenneth.w.chen@intel.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/inode.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 75 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0a60ec5a16..1d85d4ec95 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1147,37 +1147,102 @@ static int do_journal_get_write_access(handle_t *handle,
 	return ext4_journal_get_write_access(handle, bh);
 }
 
+/*
+ * The idea of this helper function is following:
+ * if prepare_write has allocated some blocks, but not all of them, the
+ * transaction must include the content of the newly allocated blocks.
+ * This content is expected to be set to zeroes by block_prepare_write().
+ * 2006/10/14  SAW
+ */
+static int ext4_prepare_failure(struct file *file, struct page *page,
+				unsigned from, unsigned to)
+{
+	struct address_space *mapping;
+	struct buffer_head *bh, *head, *next;
+	unsigned block_start, block_end;
+	unsigned blocksize;
+	int ret;
+	handle_t *handle = ext4_journal_current_handle();
+
+	mapping = page->mapping;
+	if (ext4_should_writeback_data(mapping->host)) {
+		/* optimization: no constraints about data */
+skip:
+		return ext4_journal_stop(handle);
+	}
+
+	head = page_buffers(page);
+	blocksize = head->b_size;
+	for (	bh = head, block_start = 0;
+		bh != head || !block_start;
+	    	block_start = block_end, bh = next)
+	{
+		next = bh->b_this_page;
+		block_end = block_start + blocksize;
+		if (block_end <= from)
+			continue;
+		if (block_start >= to) {
+			block_start = to;
+			break;
+		}
+		if (!buffer_mapped(bh))
+		/* prepare_write failed on this bh */
+			break;
+		if (ext4_should_journal_data(mapping->host)) {
+			ret = do_journal_get_write_access(handle, bh);
+			if (ret) {
+				ext4_journal_stop(handle);
+				return ret;
+			}
+		}
+	/*
+	 * block_start here becomes the first block where the current iteration
+	 * of prepare_write failed.
+	 */
+	}
+	if (block_start <= from)
+		goto skip;
+
+	/* commit allocated and zeroed buffers */
+	return mapping->a_ops->commit_write(file, page, from, block_start);
+}
+
 static int ext4_prepare_write(struct file *file, struct page *page,
 			      unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
-	int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
+	int ret, ret2;
+	int needed_blocks = ext4_writepage_trans_blocks(inode);
 	handle_t *handle;
 	int retries = 0;
 
 retry:
 	handle = ext4_journal_start(inode, needed_blocks);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		goto out;
-	}
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
 	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
 		ret = nobh_prepare_write(page, from, to, ext4_get_block);
 	else
 		ret = block_prepare_write(page, from, to, ext4_get_block);
 	if (ret)
-		goto prepare_write_failed;
+		goto failure;
 
 	if (ext4_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
 				from, to, NULL, do_journal_get_write_access);
+		if (ret)
+			/* fatal error, just put the handle and return */
+			journal_stop(handle);
 	}
-prepare_write_failed:
-	if (ret)
-		ext4_journal_stop(handle);
+	return ret;
+
+failure:
+	ret2 = ext4_prepare_failure(file, page, from, to);
+	if (ret2 < 0)
+		return ret2;
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
-out:
+	/* retry number exceeded, or other error like -EDQUOT */
 	return ret;
 }
 
-- 
cgit v1.2.2


From c36264dfb2d6fa6383082de0a1bba8e12b477da1 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Wed, 6 Dec 2006 20:37:42 -0800
Subject: [PATCH] remove the syslog interface when printk is disabled

Attempts to read() from the non-existent dmesg buffer will return zero and
userspace tends to get stuck in a busyloop.

So just remove /dev/kmsg altogether if CONFIG_PRINTK=n.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/Makefile    | 3 ++-
 fs/proc/proc_misc.c | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 7431d7ba2d..f6c7762725 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,8 +8,9 @@ proc-y			:= nommu.o task_nommu.o
 proc-$(CONFIG_MMU)	:= mmu.o task_mmu.o
 
 proc-y       += inode.o root.o base.o generic.o array.o \
-		kmsg.o proc_tty.o proc_misc.o
+		proc_tty.o proc_misc.o
 
 proc-$(CONFIG_PROC_KCORE)	+= kcore.o
 proc-$(CONFIG_PROC_VMCORE)	+= vmcore.o
 proc-$(CONFIG_PROC_DEVICETREE)	+= proc_devtree.o
+proc-$(CONFIG_PRINTK)	+= kmsg.o
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 93c43b676e..51815cece6 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -696,9 +696,11 @@ void __init proc_misc_init(void)
 	proc_symlink("mounts", NULL, "self/mounts");
 
 	/* And now for trickier ones */
+#ifdef CONFIG_PRINTK
 	entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
 	if (entry)
 		entry->proc_fops = &proc_kmsg_operations;
+#endif
 	create_seq_entry("devices", 0, &proc_devinfo_operations);
 	create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
 #ifdef CONFIG_BLOCK
-- 
cgit v1.2.2


From 386d9a7edd9f3492c99124b0a659e9ed7abb30f9 Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus@valinux.co.jp>
Date: Wed, 6 Dec 2006 20:37:53 -0800
Subject: [PATCH] elf: Always define elf_addr_t in linux/elf.h

Define elf_addr_t in linux/elf.h.  The size of the type is determined using
ELF_CLASS.  This allows us to remove the defines that today are spread all
over .c and .h files.

Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Cc: Daniel Jacobowitz <drow@false.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c       | 4 ----
 fs/binfmt_elf_fdpic.c | 3 ---
 2 files changed, 7 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 68e20d5bfe..14ea630a85 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -47,10 +47,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
 static int load_elf_library(struct file *);
 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
 
-#ifndef elf_addr_t
-#define elf_addr_t unsigned long
-#endif
-
 /*
  * If we don't support core dumping, then supply a NULL so we
  * don't even try.
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index f86d5c9ce5..ed9a61c6be 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -40,9 +40,6 @@
 #include <asm/pgalloc.h>
 
 typedef char *elf_caddr_t;
-#ifndef elf_addr_t
-#define elf_addr_t unsigned long
-#endif
 
 #if 0
 #define kdebug(fmt, ...) printk("FDPIC "fmt"\n" ,##__VA_ARGS__ )
-- 
cgit v1.2.2


From 360276042d7a8369ce912acff99c1c4de394b312 Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus@valinux.co.jp>
Date: Wed, 6 Dec 2006 20:38:00 -0800
Subject: [PATCH] elf: fix kcore note size calculation

 - Define "CORE" string as CORE_STR in single common place.
 - Include terminating zero in CORE_STR length calculation for elf_buflen.
 - Use roundup(,4) to include alignment in elf_buflen calculation.

[akpm@osdl.org: simplification suggested by Roland]
Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Cc: Daniel Jacobowitz <drow@false.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/kcore.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 1294eda4ac..1be73082ed 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -22,6 +22,7 @@
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
+#define CORE_STR "CORE"
 
 static int open_kcore(struct inode * inode, struct file * filp)
 {
@@ -82,10 +83,11 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
 	}
 	*elf_buflen =	sizeof(struct elfhdr) + 
 			(*nphdr + 2)*sizeof(struct elf_phdr) + 
-			3 * (sizeof(struct elf_note) + 4) +
-			sizeof(struct elf_prstatus) +
-			sizeof(struct elf_prpsinfo) +
-			sizeof(struct task_struct);
+			3 * ((sizeof(struct elf_note)) +
+			     roundup(sizeof(CORE_STR), 4)) +
+			roundup(sizeof(struct elf_prstatus), 4) +
+			roundup(sizeof(struct elf_prpsinfo), 4) +
+			roundup(sizeof(struct task_struct), 4);
 	*elf_buflen = PAGE_ALIGN(*elf_buflen);
 	return size + *elf_buflen;
 }
@@ -210,7 +212,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 	nhdr->p_offset	= offset;
 
 	/* set up the process status */
-	notes[0].name = "CORE";
+	notes[0].name = CORE_STR;
 	notes[0].type = NT_PRSTATUS;
 	notes[0].datasz = sizeof(struct elf_prstatus);
 	notes[0].data = &prstatus;
@@ -221,7 +223,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 	bufp = storenote(&notes[0], bufp);
 
 	/* set up the process info */
-	notes[1].name	= "CORE";
+	notes[1].name	= CORE_STR;
 	notes[1].type	= NT_PRPSINFO;
 	notes[1].datasz	= sizeof(struct elf_prpsinfo);
 	notes[1].data	= &prpsinfo;
@@ -238,7 +240,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 	bufp = storenote(&notes[1], bufp);
 
 	/* set up the task structure */
-	notes[2].name	= "CORE";
+	notes[2].name	= CORE_STR;
 	notes[2].type	= NT_TASKSTRUCT;
 	notes[2].datasz	= sizeof(struct task_struct);
 	notes[2].data	= current;
-- 
cgit v1.2.2


From de21c57b90b3716f6f951e88e039d00ab6729ce9 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@openvz.org>
Date: Wed, 6 Dec 2006 20:38:02 -0800
Subject: [PATCH] reiserfs: add missing D-cache flushing

Looks like, reiserfs_prepare_file_region_for_write() doesn't contain
several flush_dcache_page() calls.

Found with help from Dmitriy Monakhov <dmonakhov@openvz.org>

[akpm@osdl.org: small speedup]
Signed-off-by: Alexey Dobriyan <adobriyan@openvz.org>
Cc: Dmitriy Monakhov <dmonakhov@openvz.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/file.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index ac14318c81..6526498949 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1045,6 +1045,7 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode
 			char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0);
 			memset(kaddr, 0, from);
 			kunmap_atomic(kaddr, KM_USER0);
+			flush_dcache_page(prepared_pages[0]);
 		}
 		if (to != PAGE_CACHE_SIZE) {	/* Last page needs to be partially zeroed */
 			char *kaddr =
@@ -1052,6 +1053,7 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode
 					KM_USER0);
 			memset(kaddr + to, 0, PAGE_CACHE_SIZE - to);
 			kunmap_atomic(kaddr, KM_USER0);
+			flush_dcache_page(prepared_pages[num_pages - 1]);
 		}
 
 		/* Since all blocks are new - use already calculated value */
@@ -1185,6 +1187,7 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode
 					memset(kaddr + block_start, 0,
 					       from - block_start);
 					kunmap_atomic(kaddr, KM_USER0);
+					flush_dcache_page(prepared_pages[0]);
 					set_buffer_uptodate(bh);
 				}
 			}
@@ -1222,6 +1225,7 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode
 							KM_USER0);
 					memset(kaddr + to, 0, block_end - to);
 					kunmap_atomic(kaddr, KM_USER0);
+					flush_dcache_page(prepared_pages[num_pages - 1]);
 					set_buffer_uptodate(bh);
 				}
 			}
-- 
cgit v1.2.2


From 02316067852187b8bec781bec07410e91af79627 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 6 Dec 2006 20:38:17 -0800
Subject: [PATCH] hotplug CPU: clean up hotcpu_notifier() use

There was lots of #ifdef noise in the kernel due to hotcpu_notifier(fn,
prio) not correctly marking 'fn' as used in the !HOTPLUG_CPU case, and thus
generating compiler warnings of unused symbols, hence forcing people to add
#ifdefs.

the compiler can skip truly unused functions just fine:

    text    data     bss     dec     hex filename
 1624412  728710 3674856 6027978  5bfaca vmlinux.before
 1624412  728710 3674856 6027978  5bfaca vmlinux.after

[akpm@osdl.org: topology.c fix]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index a8ca0ac214..517860f2d7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2972,7 +2972,6 @@ init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags)
 	}
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static void buffer_exit_cpu(int cpu)
 {
 	int i;
@@ -2994,7 +2993,6 @@ static int buffer_cpu_notify(struct notifier_block *self,
 		buffer_exit_cpu((unsigned long)hcpu);
 	return NOTIFY_OK;
 }
-#endif /* CONFIG_HOTPLUG_CPU */
 
 void __init buffer_init(void)
 {
-- 
cgit v1.2.2


From 2bd94bd79e5bfa217714f78e5d6d7b6517ca546f Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 6 Dec 2006 20:38:18 -0800
Subject: [PATCH] ext3: fix reservation extension

Hugh Dickins wrote:
> Not found anything relevant, but I keep noticing these lines
> in ext2_try_to_allocate_with_rsv(), ext3 and ext4 similar:
>
> 		} else if (grp_goal > 0 &&
> 				(my_rsv->rsv_end - grp_goal + 1) < *count)
> 			try_to_extend_reservation(my_rsv, sb,
> 					*count-my_rsv->rsv_end + grp_goal - 1);
>
> They're wrong, a no-op in most groups, aren't they?  rsv_end is an
> absolute block number, whereas grp_goal is group-relative, so the
> calculation ought to bring in group_first_block?  Or I'm confused.
>

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Cc: "linux-ext4@vger.kernel.org" <linux-ext4@vger.kernel.org>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index b41a7d7e20..f96c40a90e 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1307,10 +1307,14 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 			if (!goal_in_my_reservation(&my_rsv->rsv_window,
 							grp_goal, group, sb))
 				grp_goal = -1;
-		} else if (grp_goal > 0 &&
-			  (my_rsv->rsv_end-grp_goal+1) < *count)
-			try_to_extend_reservation(my_rsv, sb,
-					*count-my_rsv->rsv_end + grp_goal - 1);
+		} else if (grp_goal > 0) {
+			int curr = my_rsv->rsv_end -
+					(grp_goal + group_first_block) + 1;
+
+			if (curr < *count)
+				try_to_extend_reservation(my_rsv, sb,
+							*count - curr);
+		}
 
 		if ((my_rsv->rsv_start > group_last_block) ||
 				(my_rsv->rsv_end < group_first_block)) {
-- 
cgit v1.2.2


From 1df1e63b9e9340015c01b85817568fb9afde10bc Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 6 Dec 2006 20:38:19 -0800
Subject: [PATCH] ext4: fix reservation extension

Hugh Dickins wrote:
> Not found anything relevant, but I keep noticing these lines
> in ext2_try_to_allocate_with_rsv(), ext3 and ext4 similar:
>
> 		} else if (grp_goal > 0 &&
> 				(my_rsv->rsv_end - grp_goal + 1) < *count)
> 			try_to_extend_reservation(my_rsv, sb,
> 					*count-my_rsv->rsv_end + grp_goal - 1);
>
> They're wrong, a no-op in most groups, aren't they?  rsv_end is an
> absolute block number, whereas grp_goal is group-relative, so the
> calculation ought to bring in group_first_block?  Or I'm confused.
>

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Cc: "linux-ext4@vger.kernel.org" <linux-ext4@vger.kernel.org>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 5d45582f95..6bd0bd5bbd 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1324,10 +1324,14 @@ ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 			if (!goal_in_my_reservation(&my_rsv->rsv_window,
 							grp_goal, group, sb))
 				grp_goal = -1;
-		} else if (grp_goal > 0 &&
-			  (my_rsv->rsv_end-grp_goal+1) < *count)
-			try_to_extend_reservation(my_rsv, sb,
-					*count-my_rsv->rsv_end + grp_goal - 1);
+		} else if (grp_goal > 0) {
+			int curr = my_rsv->rsv_end -
+					(grp_goal + group_first_block) + 1;
+
+			if (curr < *count)
+				try_to_extend_reservation(my_rsv, sb,
+							*count - curr);
+		}
 
 		if ((my_rsv->rsv_start > group_last_block) ||
 				(my_rsv->rsv_end < group_first_block)) {
-- 
cgit v1.2.2


From 8487f2e4067839ad3d009c240d51b682264320ae Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:38:25 -0800
Subject: [PATCH] make ecryptfs_version_str_map[] static

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ecryptfs/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 306f8fbd1a..3ede12b259 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -691,7 +691,7 @@ static ssize_t version_show(struct ecryptfs_obj *obj, char *buff)
 
 static struct ecryptfs_attribute sysfs_attr_version = __ATTR_RO(version);
 
-struct ecryptfs_version_str_map_elem {
+static struct ecryptfs_version_str_map_elem {
 	u32 flag;
 	char *str;
 } ecryptfs_version_str_map[] = {
-- 
cgit v1.2.2


From d394e122bc1adba0f3eb1ebec1cedb8a8c524741 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:38:26 -0800
Subject: [PATCH] make fs/jbd/transaction.c:__journal_temp_unlink_buffer()
 static

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/transaction.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 4f82bcd63e..d38e0d575e 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -27,6 +27,8 @@
 #include <linux/mm.h>
 #include <linux/highmem.h>
 
+static void __journal_temp_unlink_buffer(struct journal_head *jh);
+
 /*
  * get_transaction: obtain a new transaction_t object.
  *
@@ -1499,7 +1501,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
  *
  * Called under j_list_lock.  The journal may not be locked.
  */
-void __journal_temp_unlink_buffer(struct journal_head *jh)
+static void __journal_temp_unlink_buffer(struct journal_head *jh)
 {
 	struct journal_head **list = NULL;
 	transaction_t *transaction;
-- 
cgit v1.2.2


From 7ddae86095794cce4364740edd8463c77654a265 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:38:27 -0800
Subject: [PATCH] make
 fs/jbd2/transaction.c:__kbd2_journal_temp_unlink_buffer() static

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/transaction.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index c051a94c8a..3a8700153c 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -27,6 +27,8 @@
 #include <linux/mm.h>
 #include <linux/highmem.h>
 
+static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
+
 /*
  * jbd2_get_transaction: obtain a new transaction_t object.
  *
-- 
cgit v1.2.2


From c585646dd1d98caf0a5f2e85c794c1441df6fac1 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:38:29 -0800
Subject: [PATCH] fs/lockd/host.c: make 2 functions static

Make the following needlessly global functions static:

 - nlm_lookup_host()
 - nsm_find()

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/host.c | 53 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 28 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index fb24a97303..3d4610c2a2 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -36,34 +36,14 @@ static DEFINE_MUTEX(nlm_host_mutex);
 static void			nlm_gc_hosts(void);
 static struct nsm_handle *	__nsm_find(const struct sockaddr_in *,
 					const char *, int, int);
-
-/*
- * Find an NLM server handle in the cache. If there is none, create it.
- */
-struct nlm_host *
-nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
-			const char *hostname, int hostname_len)
-{
-	return nlm_lookup_host(0, sin, proto, version,
-			       hostname, hostname_len);
-}
-
-/*
- * Find an NLM client handle in the cache. If there is none, create it.
- */
-struct nlm_host *
-nlmsvc_lookup_host(struct svc_rqst *rqstp,
-			const char *hostname, int hostname_len)
-{
-	return nlm_lookup_host(1, &rqstp->rq_addr,
-			       rqstp->rq_prot, rqstp->rq_vers,
-			       hostname, hostname_len);
-}
+static struct nsm_handle *	nsm_find(const struct sockaddr_in *sin,
+					 const char *hostname,
+					 int hostname_len);
 
 /*
  * Common host lookup routine for server & client
  */
-struct nlm_host *
+static struct nlm_host *
 nlm_lookup_host(int server, const struct sockaddr_in *sin,
 					int proto, int version,
 					const char *hostname,
@@ -194,6 +174,29 @@ nlm_destroy_host(struct nlm_host *host)
 	kfree(host);
 }
 
+/*
+ * Find an NLM server handle in the cache. If there is none, create it.
+ */
+struct nlm_host *
+nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
+			const char *hostname, int hostname_len)
+{
+	return nlm_lookup_host(0, sin, proto, version,
+			       hostname, hostname_len);
+}
+
+/*
+ * Find an NLM client handle in the cache. If there is none, create it.
+ */
+struct nlm_host *
+nlmsvc_lookup_host(struct svc_rqst *rqstp,
+			const char *hostname, int hostname_len)
+{
+	return nlm_lookup_host(1, &rqstp->rq_addr,
+			       rqstp->rq_prot, rqstp->rq_vers,
+			       hostname, hostname_len);
+}
+
 /*
  * Create the NLM RPC client for an NLM peer
  */
@@ -495,7 +498,7 @@ out:
 	return nsm;
 }
 
-struct nsm_handle *
+static struct nsm_handle *
 nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len)
 {
 	return __nsm_find(sin, hostname, hostname_len, 1);
-- 
cgit v1.2.2


From 9711ef9945ce33b2b4ecb51a4db3f65f7d784876 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:38:31 -0800
Subject: [PATCH] make fs/proc/base.c:proc_pid_instantiate() static

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/base.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 05ace70d05..b859fc749c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1885,8 +1885,9 @@ out:
 	return;
 }
 
-struct dentry *proc_pid_instantiate(struct inode *dir,
-	struct dentry * dentry, struct task_struct *task, void *ptr)
+static struct dentry *proc_pid_instantiate(struct inode *dir,
+					   struct dentry * dentry,
+					   struct task_struct *task, void *ptr)
 {
 	struct dentry *error = ERR_PTR(-ENOENT);
 	struct inode *inode;
-- 
cgit v1.2.2


From d18de5a2721f84ffd6a5d637915746ed47becc1c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 6 Dec 2006 20:38:45 -0800
Subject: [PATCH] don't insert pipe dentries into dentry_hashtable.

We currently insert pipe dentries into the global dentry hashtable.  This
is suboptimal because there is currently no way these entries can be used
for a lookup().  (/proc/xxx/fd/xxx uses a different mechanism).  Inserting
them in dentry hashtable slows dcache lookups.

To let __dpath() still work correctly (ie not adding a " (deleted)") after
dentry name, we do :

 - Right after d_alloc(), pretend they are hashed by clearing the
   DCACHE_UNHASHED bit.

 - Call d_instantiate() instead of d_add() : dentry is not inserted in
   hash table.

__dpath() & friends work as intended during dentry lifetime.

 - At dismantle time, once dput() must clear the dentry, setting again
   DCACHE_UNHASHED bit inside the custom d_delete() function provided by
   pipe code, so that dput() can just kill_it.

This patch, combined with (avoid RCU for never hashed dentries) reduced
time of { pipe(p); close(p[0]); close(p[1]);} on my UP machine (1.6GHz
Pentium-M) from 3.23 us to 2.86 us (But this patch does not depend on other
patches, only bench results)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: David Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/pipe.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/pipe.c b/fs/pipe.c
index b1626f269a..ae36b89b1a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -830,7 +830,14 @@ void free_pipe_info(struct inode *inode)
 static struct vfsmount *pipe_mnt __read_mostly;
 static int pipefs_delete_dentry(struct dentry *dentry)
 {
-	return 1;
+	/*
+	 * At creation time, we pretended this dentry was hashed
+	 * (by clearing DCACHE_UNHASHED bit in d_flags)
+	 * At delete time, we restore the truth : not hashed.
+	 * (so that dput() can proceed correctly)
+	 */
+	dentry->d_flags |= DCACHE_UNHASHED;
+	return 0;
 }
 
 static struct dentry_operations pipefs_dentry_operations = {
@@ -891,17 +898,22 @@ struct file *create_write_pipe(void)
 	if (!inode)
 		goto err_file;
 
-	sprintf(name, "[%lu]", inode->i_ino);
+	this.len = sprintf(name, "[%lu]", inode->i_ino);
 	this.name = name;
-	this.len = strlen(name);
-	this.hash = inode->i_ino; /* will go */
+	this.hash = 0;
 	err = -ENOMEM;
 	dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
 	if (!dentry)
 		goto err_inode;
 
 	dentry->d_op = &pipefs_dentry_operations;
-	d_add(dentry, inode);
+	/*
+	 * We dont want to publish this dentry into global dentry hash table.
+	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
+	 * This permits a working /proc/$pid/fd/XXX on pipes
+	 */
+	dentry->d_flags &= ~DCACHE_UNHASHED;
+	d_instantiate(dentry, inode);
 	f->f_vfsmnt = mntget(pipe_mnt);
 	f->f_dentry = dentry;
 	f->f_mapping = inode->i_mapping;
-- 
cgit v1.2.2


From b3423415fbc2e5461605826317da1c8dbbf21f97 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 6 Dec 2006 20:38:48 -0800
Subject: [PATCH] dcache: avoid RCU for never-hashed dentries

Some dentries don't need to be globally visible in dentry hashtable.
(pipes & sockets)

Such dentries dont need to wait for a RCU grace period at delete time.
Being able to free them permits a better CPU cache use (hot cache)

This patch combined with (dont insert pipe dentries into dentry_hashtable)
reduced time of { pipe(p); close(p[0]); close(p[1]);} on my UP machine (1.6
GHz Pentium-M) from 3.23 us to 2.86 us (But this patch does not depend on
other patches, only bench results)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Acked-by: David Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index a7c67cee5d..d68631f18d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -68,15 +68,19 @@ struct dentry_stat_t dentry_stat = {
 	.age_limit = 45,
 };
 
-static void d_callback(struct rcu_head *head)
+static void __d_free(struct dentry *dentry)
 {
-	struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
-
 	if (dname_external(dentry))
 		kfree(dentry->d_name.name);
 	kmem_cache_free(dentry_cache, dentry); 
 }
 
+static void d_callback(struct rcu_head *head)
+{
+	struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
+	__d_free(dentry);
+}
+
 /*
  * no dcache_lock, please.  The caller must decrement dentry_stat.nr_dentry
  * inside dcache_lock.
@@ -85,7 +89,11 @@ static void d_free(struct dentry *dentry)
 {
 	if (dentry->d_op && dentry->d_op->d_release)
 		dentry->d_op->d_release(dentry);
- 	call_rcu(&dentry->d_u.d_rcu, d_callback);
+	/* if dentry was never inserted into hash, immediate free is OK */
+	if (dentry->d_hash.pprev == NULL)
+		__d_free(dentry);
+	else
+		call_rcu(&dentry->d_u.d_rcu, d_callback);
 }
 
 /*
-- 
cgit v1.2.2


From 01afb2134ed079fa4551b4d26f62423df6790c09 Mon Sep 17 00:00:00 2001
From: Yan Burman <burman.yan@gmail.com>
Date: Wed, 6 Dec 2006 20:39:01 -0800
Subject: [PATCH] reiser: replace kmalloc+memset with kzalloc

Replace kmalloc+memset with kzalloc

Signed-off-by: Yan Burman <burman.yan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/file.c  | 3 +--
 fs/reiserfs/inode.c | 7 ++-----
 fs/reiserfs/super.c | 3 +--
 3 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 6526498949..970ecd9dbe 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -317,12 +317,11 @@ static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handl
 			/* area filled with zeroes, to supply as list of zero blocknumbers
 			   We allocate it outside of loop just in case loop would spin for
 			   several iterations. */
-			char *zeros = kmalloc(to_paste * UNFM_P_SIZE, GFP_ATOMIC);	// We cannot insert more than MAX_ITEM_LEN bytes anyway.
+			char *zeros = kzalloc(to_paste * UNFM_P_SIZE, GFP_ATOMIC);	// We cannot insert more than MAX_ITEM_LEN bytes anyway.
 			if (!zeros) {
 				res = -ENOMEM;
 				goto error_exit_free_blocks;
 			}
-			memset(zeros, 0, to_paste * UNFM_P_SIZE);
 			do {
 				to_paste =
 				    min_t(__u64, hole_size,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a625688de4..254239e6f9 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -929,15 +929,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
 			if (blocks_needed == 1) {
 				un = &unf_single;
 			} else {
-				un = kmalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC);	// We need to avoid scheduling.
+				un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC);	// We need to avoid scheduling.
 				if (!un) {
 					un = &unf_single;
 					blocks_needed = 1;
 					max_to_insert = 0;
-				} else
-					memset(un, 0,
-					       UNFM_P_SIZE * min(blocks_needed,
-								 max_to_insert));
+				}
 			}
 			if (blocks_needed <= max_to_insert) {
 				/* we are going to add target block to the file. Use allocated
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 745bc714ba..7fb5fb036f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1549,13 +1549,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 	struct reiserfs_sb_info *sbi;
 	int errval = -EINVAL;
 
-	sbi = kmalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
+	sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
 	if (!sbi) {
 		errval = -ENOMEM;
 		goto error;
 	}
 	s->s_fs_info = sbi;
-	memset(sbi, 0, sizeof(struct reiserfs_sb_info));
 	/* Set default values for options: non-aggressive tails, RO on errors */
 	REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL);
 	REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO);
-- 
cgit v1.2.2


From c55747682e938c57a9a859d3b26f2c4c83cea011 Mon Sep 17 00:00:00 2001
From: "Vladimir V. Saveliev" <vs@namesys.com>
Date: Wed, 6 Dec 2006 20:39:12 -0800
Subject: [PATCH] reiserfs: do not add save links for O_DIRECT writes

We add a save link for O_DIRECT writes to protect the i_size against the
crashes before we actually finish the I/O.  If we hit an -ENOSPC in
aops->prepare_write(), we would do a truncate() to release the blocks which
might have got initialized.  Now the truncate would add another save link
for the same inode causing a reiserfs panic for having multiple save links
for the same inode.

Signed-off-by: Vladimir V. Saveliev <vs@namesys.com>
Signed-off-by: Amit Arora <amitarora@in.ibm.com>
Signed-off-by: Suzuki K P <suzuki@in.ibm.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Chris Mason <mason@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/file.c | 54 ++++--------------------------------------------------
 1 file changed, 4 insertions(+), 50 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 970ecd9dbe..373d862c3f 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -406,6 +406,8 @@ static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handl
 				   we restart it. This will also free the path. */
 				if (journal_transaction_should_end
 				    (th, th->t_blocks_allocated)) {
+					inode->i_size = cpu_key_k_offset(&key) +
+						(to_paste << inode->i_blkbits);
 					res =
 					    restart_transaction(th, inode,
 								&path);
@@ -1310,56 +1312,8 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 			count = MAX_NON_LFS - (unsigned long)*ppos;
 	}
 
-	if (file->f_flags & O_DIRECT) {	// Direct IO needs treatment
-		ssize_t result, after_file_end = 0;
-		if ((*ppos + count >= inode->i_size)
-		    || (file->f_flags & O_APPEND)) {
-			/* If we are appending a file, we need to put this savelink in here.
-			   If we will crash while doing direct io, finish_unfinished will
-			   cut the garbage from the file end. */
-			reiserfs_write_lock(inode->i_sb);
-			err =
-			    journal_begin(&th, inode->i_sb,
-					  JOURNAL_PER_BALANCE_CNT);
-			if (err) {
-				reiserfs_write_unlock(inode->i_sb);
-				return err;
-			}
-			reiserfs_update_inode_transaction(inode);
-			add_save_link(&th, inode, 1 /* Truncate */ );
-			after_file_end = 1;
-			err =
-			    journal_end(&th, inode->i_sb,
-					JOURNAL_PER_BALANCE_CNT);
-			reiserfs_write_unlock(inode->i_sb);
-			if (err)
-				return err;
-		}
-		result = do_sync_write(file, buf, count, ppos);
-
-		if (after_file_end) {	/* Now update i_size and remove the savelink */
-			struct reiserfs_transaction_handle th;
-			reiserfs_write_lock(inode->i_sb);
-			err = journal_begin(&th, inode->i_sb, 1);
-			if (err) {
-				reiserfs_write_unlock(inode->i_sb);
-				return err;
-			}
-			reiserfs_update_inode_transaction(inode);
-			mark_inode_dirty(inode);
-			err = journal_end(&th, inode->i_sb, 1);
-			if (err) {
-				reiserfs_write_unlock(inode->i_sb);
-				return err;
-			}
-			err = remove_save_link(inode, 1 /* truncate */ );
-			reiserfs_write_unlock(inode->i_sb);
-			if (err)
-				return err;
-		}
-
-		return result;
-	}
+	if (file->f_flags & O_DIRECT)
+		return do_sync_write(file, buf, count, ppos);
 
 	if (unlikely((ssize_t) count < 0))
 		return -EINVAL;
-- 
cgit v1.2.2


From 126039256cf164f69d8cfa20f1952d00fa61f52f Mon Sep 17 00:00:00 2001
From: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Date: Wed, 6 Dec 2006 20:39:17 -0800
Subject: [PATCH] jbd2: wait for already submitted t_sync_datalist buffer to
 complete

In the current jbd code, if a buffer on BJ_SyncData list is dirty and not
locked, the buffer is refiled to BJ_Locked list, submitted to the IO and
waited for IO completion.

But the fsstress test showed the case that when a buffer was already
submitted to the IO just before the buffer_dirty(bh) check, the buffer was
not waited for IO completion.

Following patch solves this problem.  If it is assumed that a buffer is
submitted to the IO before the buffer_dirty(bh) check and still being
written to disk, this buffer is refiled to BJ_Locked list.

Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Cc: Jan Kara <jack@ucw.cz>
Cc: "Stephen C. Tweedie" <sct@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/commit.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 70b2ae1ef2..6bd8005e3d 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -248,8 +248,12 @@ write_out_data:
 				bufs = 0;
 				goto write_out_data;
 			}
-		}
-		else {
+		} else if (!locked && buffer_locked(bh)) {
+			__jbd2_journal_file_buffer(jh, commit_transaction,
+						BJ_Locked);
+			jbd_unlock_bh_state(bh);
+			put_bh(bh);
+		} else {
 			BUFFER_TRACE(bh, "writeout complete: unfile");
 			__jbd2_journal_unfile_buffer(jh);
 			jbd_unlock_bh_state(bh);
-- 
cgit v1.2.2


From cd16c8f72aaaf2af06969d1441051b90010f7041 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:39:18 -0800
Subject: [PATCH] ext4 balloc: reset windowsz when full

ext4_new_blocks should reset the reservation window size to 0 when squeezing
the last blocks out of an almost full filesystem, so the retry doesn't skip
any groups with less than half that free, reporting ENOSPC too soon.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 6bd0bd5bbd..af3acf3c37 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1566,6 +1566,7 @@ retry_alloc:
 	 */
 	if (my_rsv) {
 		my_rsv = NULL;
+		windowsz = 0;
 		group_no = goal_group;
 		goto retry_alloc;
 	}
-- 
cgit v1.2.2


From e7dc95db2695dc92b223cdc49227ac57e63406d2 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:39:19 -0800
Subject: [PATCH] ext4 balloc: fix off-by-one against grp_goal

grp_goal 0 is a genuine goal (unlike -1), so ext4_try_to_allocate_with_rsv
should treat it as such.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index af3acf3c37..2bcca5261e 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1288,7 +1288,7 @@ ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 	}
 	/*
 	 * grp_goal is a group relative block number (if there is a goal)
-	 * 0 < grp_goal < EXT4_BLOCKS_PER_GROUP(sb)
+	 * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb)
 	 * first block is a filesystem wide block number
 	 * first block is the block number of the first block in this group
 	 */
@@ -1324,7 +1324,7 @@ ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 			if (!goal_in_my_reservation(&my_rsv->rsv_window,
 							grp_goal, group, sb))
 				grp_goal = -1;
-		} else if (grp_goal > 0) {
+		} else if (grp_goal >= 0) {
 			int curr = my_rsv->rsv_end -
 					(grp_goal + group_first_block) + 1;
 
-- 
cgit v1.2.2


From b2f2c76d17b68869914a1ec3ab04c7674668f60d Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:39:20 -0800
Subject: [PATCH] ext4 balloc: fix off-by-one against rsv_end

rsv_end is the last block within the reservation, so alloc_new_reservation
should accept start_block == rsv_end as success.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 2bcca5261e..c5589b3f58 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1165,7 +1165,7 @@ retry:
 	 * check if the first free block is within the
 	 * free space we just reserved
 	 */
-	if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end)
+	if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
 		return 0;		/* success */
 	/*
 	 * if the first free bit we found is out of the reservable space
-- 
cgit v1.2.2


From b78a657f0a64134b3813bbdf4e1853d1420eb8d4 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:39:21 -0800
Subject: [PATCH] ext4 balloc: say rb_entry not list_entry

The reservations tree is an rb_tree not a list, so it's less confusing to use
rb_entry() than list_entry() - though they're both just container_of().

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index c5589b3f58..8e7249e3e3 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -165,7 +165,7 @@ restart:
 
 	printk("Block Allocation Reservation Windows Map (%s):\n", fn);
 	while (n) {
-		rsv = list_entry(n, struct ext4_reserve_window_node, rsv_node);
+		rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
 		if (verbose)
 			printk("reservation window 0x%p "
 			       "start:  %llu, end:  %llu\n",
@@ -966,7 +966,7 @@ static int find_next_reservable_window(
 
 		prev = rsv;
 		next = rb_next(&rsv->rsv_node);
-		rsv = list_entry(next,struct ext4_reserve_window_node,rsv_node);
+		rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node);
 
 		/*
 		 * Reached the last reservation, we can just append to the
@@ -1210,7 +1210,7 @@ static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
 	if (!next)
 		my_rsv->rsv_end += size;
 	else {
-		next_rsv = list_entry(next, struct ext4_reserve_window_node, rsv_node);
+		next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node);
 
 		if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
 			my_rsv->rsv_end += size;
-- 
cgit v1.2.2


From 341cee438593ff9c4520fb73c561460074f29a9a Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:39:24 -0800
Subject: [PATCH] ext4 balloc: use io_error label

ext4_new_blocks has a nice io_error label for setting -EIO, so goto that in
the one place that doesn't already use it.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 8e7249e3e3..1c8ef0e35f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1529,10 +1529,8 @@ retry_alloc:
 		if (group_no >= ngroups)
 			group_no = 0;
 		gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
-		if (!gdp) {
-			*errp = -EIO;
-			goto out;
-		}
+		if (!gdp)
+			goto io_error;
 		free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
 		/*
 		 * skip this group if the number of
-- 
cgit v1.2.2


From ec0837f230e57afde65db72539e748d2a75abed0 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:39:26 -0800
Subject: [PATCH] ext4 balloc: fix _with_rsv freeze

Port fix to the off-by-one in find_next_usable_block's memscan from ext2 to
ext4; but it didn't cause a serious problem for ext4 because the additional
ext4_test_allocatable check rescued it from the error.

[akpm@osdl.org: build fix]
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1c8ef0e35f..c4dd1103cc 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -747,7 +747,7 @@ find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
 		here = 0;
 
 	p = ((char *)bh->b_data) + (here >> 3);
-	r = memscan(p, 0, (maxblocks - here + 7) >> 3);
+	r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
 	next = (r - ((char *)bh->b_data)) << 3;
 
 	if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh))
-- 
cgit v1.2.2


From c949d4eb40ce021b1abc3b63af23aa535662cb17 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Wed, 6 Dec 2006 20:39:38 -0800
Subject: [PATCH] autofs: fix error code path in autofs_fill_sb()

When kernel is compiled with old version of autofs (CONFIG_AUTOFS_FS), and
new (observed at least with 5.x.x) automount deamon is started, kernel
correctly reports incompatible version of kernel and userland daemon, but
then screws things up instead of correct handling of the error:

 autofs: kernel does not match daemon version
 =====================================
 [ BUG: bad unlock balance detected! ]
 -------------------------------------
 automount/4199 is trying to release lock (&type->s_umount_key) at:
 [<c0163b9e>] get_sb_nodev+0x76/0xa4
 but there are no more locks to release!

 other info that might help us debug this:
 no locks held by automount/4199.

 stack backtrace:
  [<c0103b15>] dump_trace+0x68/0x1b2
  [<c0103c77>] show_trace_log_lvl+0x18/0x2c
  [<c01041db>] show_trace+0xf/0x11
  [<c010424d>] dump_stack+0x12/0x14
  [<c012e02c>] print_unlock_inbalance_bug+0xe7/0xf3
  [<c012fd4f>] lock_release+0x8d/0x164
  [<c012b452>] up_write+0x14/0x27
  [<c0163b9e>] get_sb_nodev+0x76/0xa4
  [<c0163689>] vfs_kern_mount+0x83/0xf6
  [<c016373e>] do_kern_mount+0x2d/0x3e
  [<c017513f>] do_mount+0x607/0x67a
  [<c0175224>] sys_mount+0x72/0xa4
  [<c0102b96>] sysenter_past_esp+0x5f/0x99
 DWARF2 unwinder stuck at sysenter_past_esp+0x5f/0x99
 Leftover inexact backtrace:
  =======================

and then deadlock comes.

The problem: autofs_fill_super() returns EINVAL to get_sb_nodev(), but
before that, it calls kill_anon_super() to destroy the superblock which
won't be needed.  This is however way too soon to call kill_anon_super(),
because get_sb_nodev() has to perform its own cleanup of the superblock
first (deactivate_super(), etc.).  The correct time to call
kill_anon_super() is in the autofs_kill_sb() callback, which is called by
deactivate_super() at proper time, when the superblock is ready to be
killed.

I can see the same faulty codepath also in autofs4.  This patch solves
issues in both filesystems in a same way - it postpones the
kill_anon_super() until the proper time is signalized by deactivate_super()
calling the kill_sb() callback.

[raven@themaw.net: update comment]
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Ian Kent <raven@themaw.net>
Cc: <stable@kernel.org>
Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs/inode.c  | 7 ++++---
 fs/autofs4/inode.c | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 38ede5c9d6..f968d13428 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -28,10 +28,11 @@ void autofs_kill_sb(struct super_block *sb)
 	/*
 	 * In the event of a failure in get_sb_nodev the superblock
 	 * info is not present so nothing else has been setup, so
-	 * just exit when we are called from deactivate_super.
+	 * just call kill_anon_super when we are called from
+	 * deactivate_super.
 	 */
 	if (!sbi)
-		return;
+		goto out_kill_sb;
 
 	if ( !sbi->catatonic )
 		autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */
@@ -44,6 +45,7 @@ void autofs_kill_sb(struct super_block *sb)
 
 	kfree(sb->s_fs_info);
 
+out_kill_sb:
 	DPRINTK(("autofs: shutting down\n"));
 	kill_anon_super(sb);
 }
@@ -209,7 +211,6 @@ fail_iput:
 fail_free:
 	kfree(sbi);
 	s->s_fs_info = NULL;
-	kill_anon_super(s);
 fail_unlock:
 	return -EINVAL;
 }
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index ce7c0f1dd5..9c48250fd7 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -152,10 +152,11 @@ void autofs4_kill_sb(struct super_block *sb)
 	/*
 	 * In the event of a failure in get_sb_nodev the superblock
 	 * info is not present so nothing else has been setup, so
-	 * just exit when we are called from deactivate_super.
+	 * just call kill_anon_super when we are called from
+	 * deactivate_super.
 	 */
 	if (!sbi)
-		return;
+		goto out_kill_sb;
 
 	sb->s_fs_info = NULL;
 
@@ -167,6 +168,7 @@ void autofs4_kill_sb(struct super_block *sb)
 
 	kfree(sbi);
 
+out_kill_sb:
 	DPRINTK("shutting down");
 	kill_anon_super(sb);
 }
@@ -426,7 +428,6 @@ fail_ino:
 fail_free:
 	kfree(sbi);
 	s->s_fs_info = NULL;
-	kill_anon_super(s);
 fail_unlock:
 	return -EINVAL;
 }
-- 
cgit v1.2.2


From 3982cd99c30285ebc71d405cd8530a3dfb7265de Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:40:01 -0800
Subject: [PATCH] fs/sysv/: doc cleanup

Remove two different changelog files from fs/sysv/ and merges the INTRO
file into Documentation/filesystems/sysv-fs.txt

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/sysv/CHANGES   |  60 ------------------
 fs/sysv/ChangeLog | 106 -------------------------------
 fs/sysv/INTRO     | 182 ------------------------------------------------------
 3 files changed, 348 deletions(-)
 delete mode 100644 fs/sysv/CHANGES
 delete mode 100644 fs/sysv/ChangeLog
 delete mode 100644 fs/sysv/INTRO

(limited to 'fs')

diff --git a/fs/sysv/CHANGES b/fs/sysv/CHANGES
deleted file mode 100644
index 66ea6e92be..0000000000
--- a/fs/sysv/CHANGES
+++ /dev/null
@@ -1,60 +0,0 @@
-Mon, 15 Dec 1997	  Krzysztof G. Baranowski <kgb@manjak.knm.org.pl>
-	*    namei.c: struct sysv_dir_inode_operations updated to use dentries.
-
-Fri, 23 Jan 1998   Krzysztof G. Baranowski <kgb@manjak.knm.org.pl>
-	*    inode.c: corrected 1 track offset setting (in sb->sv_block_base).
-		      Originally it was overridden (by setting to zero)
-		      in detected_[xenix,sysv4,sysv2,coherent]. Thanks
-		      to Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl>
-		      for identifying the problem.
-
-Tue, 27 Jan 1998   Krzysztof G. Baranowski <kgb@manjak.knm.org.pl>
-        *    inode.c: added 2048-byte block support to SystemV FS.
-		      Merged detected_bs[512,1024,2048]() into one function:
-		      void detected_bs (u_char type, struct super_block *sb).
-		      Thanks to Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl>
-		      for the patch.
-
-Wed, 4 Feb 1998   Krzysztof G. Baranowski <kgb@manjak.knm.org.pl>
-	*    namei.c: removed static subdir(); is_subdir() from dcache.c
-		      is used instead. Cosmetic changes.
-
-Thu, 3 Dec 1998   Al Viro (viro@parcelfarce.linux.theplanet.co.uk)
-	*    namei.c (sysv_rmdir):
-		      Bugectomy: old check for victim being busy
-		      (inode->i_count) wasn't replaced (with checking
-		      dentry->d_count) and escaped Linus in the last round
-		      of changes. Shot and buried.
-
-Wed, 9 Dec 1998   AV
-	*    namei.c (do_sysv_rename):
-		       Fixed incorrect check for other owners + race.
-		       Removed checks that went to VFS.
-	*    namei.c (sysv_unlink):
-		       Removed checks that went to VFS.
-
-Thu, 10 Dec 1998   AV
-	*    namei.c (do_mknod):
-			Removed dead code - mknod is never asked to
-			create a symlink or directory. Incidentially,
-			it wouldn't do it right if it would be called.
-
-Sat, 26 Dec 1998   KGB
-	*    inode.c (detect_sysv4):
-			Added detection of expanded s_type field (0x10,
-			0x20 and 0x30).  Forced read-only access in this case.
-
-Sun, 21 Mar 1999   AV
-	*    namei.c (sysv_link):
-			Fixed i_count usage that resulted in dcache corruption.
-	*    inode.c:
-			Filled ->delete_inode() method with sysv_delete_inode().
-			sysv_put_inode() is gone, as it tried to do ->delete_
-			_inode()'s job.
-	*    ialloc.c: (sysv_free_inode):
-			Fixed race.
-
-Sun, 30 Apr 1999   AV
-	*    namei.c (sysv_mknod):
-			Removed dead code (S_IFREG case is now passed to
-			->create() by VFS).
diff --git a/fs/sysv/ChangeLog b/fs/sysv/ChangeLog
deleted file mode 100644
index f403f8b91b..0000000000
--- a/fs/sysv/ChangeLog
+++ /dev/null
@@ -1,106 +0,0 @@
-Thu Feb 14 2002  Andrew Morton  <akpm@zip.com.au>
-
-	* dir_commit_chunk(): call writeout_one_page() as well as
-	  waitfor_one_page() for IS_SYNC directories, so that we
-	  actually do sync the directory. (forward-port from 2.4).
-
-Thu Feb  7 2002  Alexander Viro  <viro@parcelfarce.linux.theplanet.co.uk>
-
-	* super.c: switched to ->get_sb()
-	* ChangeLog: fixed dates ;-)
-
-2002-01-24  David S. Miller  <davem@redhat.com>
-
-	* inode.c: Include linux/init.h
-
-Mon Jan 21 2002  Alexander Viro  <viro@parcelfarce.linux.theplanet.co.uk>
-	* ialloc.c (sysv_new_inode): zero SYSV_I(inode)->i_data out.
-	* i_vnode renamed to vfs_inode.  Sorry, but let's keep that
-	  consistent.
-
-Sat Jan 19 2002  Christoph Hellwig  <hch@infradead.org>
-
-	* include/linux/sysv_fs.h (SYSV_I): Get fs-private inode data using
-		list_entry() instead of inode->u.
-	* include/linux/sysv_fs_i.h: Add 'struct inode  i_vnode' field to
-		sysv_inode_info structure.
-	* inode.c: Include <linux/slab.h>, implement alloc_inode/destroy_inode
-		sop methods, add infrastructure for per-fs inode slab cache.
-	* super.c (init_sysv_fs): Initialize inode cache, recover properly
-		in the case of failed register_filesystem for V7.
-	(exit_sysv_fs): Destroy inode cache.
-
-Sat Jan 19 2002  Christoph Hellwig  <hch@infradead.org>
-
-	* include/linux/sysv_fs.h: Include <linux/sysv_fs_i.h>, declare SYSV_I().
-	* dir.c (sysv_find_entry): Use SYSV_I() instead of ->u.sysv_i to
-		access fs-private inode data.
-	* ialloc.c (sysv_new_inode): Likewise.
-	* inode.c (sysv_read_inode): Likewise.
-	(sysv_update_inode): Likewise.
-	* itree.c (get_branch): Likewise.
-	(sysv_truncate): Likewise.
-	* symlink.c (sysv_readlink): Likewise.
-	(sysv_follow_link): Likewise.
-
-Fri Jan  4 2002  Alexander Viro  <viro@parcelfarce.linux.theplanet.co.uk>
-
-	* ialloc.c (sysv_free_inode): Use sb->s_id instead of bdevname().
-	* inode.c (sysv_read_inode): Likewise.
-	  (sysv_update_inode): Likewise.
-	  (sysv_sync_inode): Likewise.
-	* super.c (detect_sysv): Likewise.
-	  (complete_read_super): Likewise.
-	  (sysv_read_super): Likewise.
-	  (v7_read_super): Likewise.
-
-Sun Dec 30 2001  Manfred Spraul  <manfred@colorfullife.com>
-
-	* dir.c (dir_commit_chunk): Do not set dir->i_version.
-	(sysv_readdir): Likewise.
-
-Thu Dec 27 2001  Alexander Viro  <viro@parcelfarce.linux.theplanet.co.uk>
-
-	* itree.c (get_block): Use map_bh() to fill out bh_result.
-
-Tue Dec 25 2001  Alexander Viro  <viro@parcelfarce.linux.theplanet.co.uk>
-
-	* super.c (sysv_read_super): Use sb_set_blocksize() to set blocksize.
-	  (v7_read_super): Likewise.
-
-Tue Nov 27 2001  Alexander Viro  <viro@parcelfarce.linux.theplanet.co.uk>
-
-	* itree.c (get_block): Change type for iblock argument to sector_t.
-	* super.c (sysv_read_super): Set s_blocksize early.
-	  (v7_read_super): Likewise.
-	* balloc.c (sysv_new_block): Use sb_bread(). instead of bread().
-	  (sysv_count_free_blocks): Likewise.
-	* ialloc.c (sysv_raw_inode): Likewise.
-	* itree.c (get_branch): Likewise.
-	  (free_branches): Likewise.
-	* super.c (sysv_read_super): Likewise.
-	  (v7_read_super): Likewise.
-
-Sat Dec 15 2001  Christoph Hellwig  <hch@infradead.org>
-
-	* inode.c (sysv_read_inode): Mark inode as bad in case of failure.
-	* super.c (complete_read_super): Check for bad root inode.
-
-Wed Nov 21 2001  Andrew Morton  <andrewm@uow.edu.au>
-
-	* file.c (sysv_sync_file): Call fsync_inode_data_buffers.
-
-Fri Oct 26 2001  Christoph Hellwig  <hch@infradead.org>
-
-	* dir.c, ialloc.c, namei.c, include/linux/sysv_fs_i.h:
-	Implement per-Inode lookup offset cache.
-	Modelled after Ted's ext2 patch.
-
-Fri Oct 26 2001  Christoph Hellwig  <hch@infradead.org>
-
-	* inode.c, super.c, include/linux/sysv_fs.h,
-	  include/linux/sysv_fs_sb.h:
-	Remove symlink faking.	Noone really wants to use these as
-	linux filesystems and native OSes don't support it anyway.
-
-
diff --git a/fs/sysv/INTRO b/fs/sysv/INTRO
deleted file mode 100644
index de4e4d17ca..0000000000
--- a/fs/sysv/INTRO
+++ /dev/null
@@ -1,182 +0,0 @@
-This is the implementation of the SystemV/Coherent filesystem for Linux.
-It grew out of separate filesystem implementations
-
-    Xenix FS      Doug Evans <dje@cygnus.com>  June 1992
-    SystemV FS    Paul B. Monday <pmonday@eecs.wsu.edu> March-June 1993
-    Coherent FS   B. Haible <haible@ma2s2.mathematik.uni-karlsruhe.de> June 1993
-
-and was merged together in July 1993.
-
-These filesystems are rather similar. Here is a comparison with Minix FS:
-
-* Linux fdisk reports on partitions
-  - Minix FS     0x81 Linux/Minix
-  - Xenix FS     ??
-  - SystemV FS   ??
-  - Coherent FS  0x08 AIX bootable
-
-* Size of a block or zone (data allocation unit on disk)
-  - Minix FS     1024
-  - Xenix FS     1024 (also 512 ??)
-  - SystemV FS   1024 (also 512 and 2048)
-  - Coherent FS   512
-
-* General layout: all have one boot block, one super block and
-  separate areas for inodes and for directories/data.
-  On SystemV Release 2 FS (e.g. Microport) the first track is reserved and
-  all the block numbers (including the super block) are offset by one track.
-
-* Byte ordering of "short" (16 bit entities) on disk:
-  - Minix FS     little endian  0 1
-  - Xenix FS     little endian  0 1
-  - SystemV FS   little endian  0 1
-  - Coherent FS  little endian  0 1
-  Of course, this affects only the file system, not the data of files on it!
-
-* Byte ordering of "long" (32 bit entities) on disk:
-  - Minix FS     little endian  0 1 2 3
-  - Xenix FS     little endian  0 1 2 3
-  - SystemV FS   little endian  0 1 2 3
-  - Coherent FS  PDP-11         2 3 0 1
-  Of course, this affects only the file system, not the data of files on it!
-
-* Inode on disk: "short", 0 means non-existent, the root dir ino is:
-  - Minix FS                            1
-  - Xenix FS, SystemV FS, Coherent FS   2
-
-* Maximum number of hard links to a file:
-  - Minix FS     250
-  - Xenix FS     ??
-  - SystemV FS   ??
-  - Coherent FS  >=10000
-
-* Free inode management:
-  - Minix FS                             a bitmap
-  - Xenix FS, SystemV FS, Coherent FS
-      There is a cache of a certain number of free inodes in the super-block.
-      When it is exhausted, new free inodes are found using a linear search.
-
-* Free block management:
-  - Minix FS                             a bitmap
-  - Xenix FS, SystemV FS, Coherent FS
-      Free blocks are organized in a "free list". Maybe a misleading term,
-      since it is not true that every free block contains a pointer to
-      the next free block. Rather, the free blocks are organized in chunks
-      of limited size, and every now and then a free block contains pointers
-      to the free blocks pertaining to the next chunk; the first of these
-      contains pointers and so on. The list terminates with a "block number"
-      0 on Xenix FS and SystemV FS, with a block zeroed out on Coherent FS.
-
-* Super-block location:
-  - Minix FS     block 1 = bytes 1024..2047
-  - Xenix FS     block 1 = bytes 1024..2047
-  - SystemV FS   bytes 512..1023
-  - Coherent FS  block 1 = bytes 512..1023
-
-* Super-block layout:
-  - Minix FS
-                    unsigned short s_ninodes;
-                    unsigned short s_nzones;
-                    unsigned short s_imap_blocks;
-                    unsigned short s_zmap_blocks;
-                    unsigned short s_firstdatazone;
-                    unsigned short s_log_zone_size;
-                    unsigned long s_max_size;
-                    unsigned short s_magic;
-  - Xenix FS, SystemV FS, Coherent FS
-                    unsigned short s_firstdatazone;
-                    unsigned long  s_nzones;
-                    unsigned short s_fzone_count;
-                    unsigned long  s_fzones[NICFREE];
-                    unsigned short s_finode_count;
-                    unsigned short s_finodes[NICINOD];
-                    char           s_flock;
-                    char           s_ilock;
-                    char           s_modified;
-                    char           s_rdonly;
-                    unsigned long  s_time;
-                    short          s_dinfo[4]; -- SystemV FS only
-                    unsigned long  s_free_zones;
-                    unsigned short s_free_inodes;
-                    short          s_dinfo[4]; -- Xenix FS only
-                    unsigned short s_interleave_m,s_interleave_n; -- Coherent FS only
-                    char           s_fname[6];
-                    char           s_fpack[6];
-    then they differ considerably:
-        Xenix FS
-                    char           s_clean;
-                    char           s_fill[371];
-                    long           s_magic;
-                    long           s_type;
-        SystemV FS
-                    long           s_fill[12 or 14];
-                    long           s_state;
-                    long           s_magic;
-                    long           s_type;
-        Coherent FS
-                    unsigned long  s_unique;
-    Note that Coherent FS has no magic.
-
-* Inode layout:
-  - Minix FS
-                    unsigned short i_mode;
-                    unsigned short i_uid;
-                    unsigned long  i_size;
-                    unsigned long  i_time;
-                    unsigned char  i_gid;
-                    unsigned char  i_nlinks;
-                    unsigned short i_zone[7+1+1];
-  - Xenix FS, SystemV FS, Coherent FS
-                    unsigned short i_mode;
-                    unsigned short i_nlink;
-                    unsigned short i_uid;
-                    unsigned short i_gid;
-                    unsigned long  i_size;
-                    unsigned char  i_zone[3*(10+1+1+1)];
-                    unsigned long  i_atime;
-                    unsigned long  i_mtime;
-                    unsigned long  i_ctime;
-
-* Regular file data blocks are organized as
-  - Minix FS
-               7 direct blocks
-               1 indirect block (pointers to blocks)
-               1 double-indirect block (pointer to pointers to blocks)
-  - Xenix FS, SystemV FS, Coherent FS
-              10 direct blocks
-               1 indirect block (pointers to blocks)
-               1 double-indirect block (pointer to pointers to blocks)
-               1 triple-indirect block (pointer to pointers to pointers to blocks)
-
-* Inode size, inodes per block
-  - Minix FS        32   32
-  - Xenix FS        64   16
-  - SystemV FS      64   16
-  - Coherent FS     64    8
-
-* Directory entry on disk
-  - Minix FS
-                    unsigned short inode;
-                    char name[14/30];
-  - Xenix FS, SystemV FS, Coherent FS
-                    unsigned short inode;
-                    char name[14];
-
-* Dir entry size, dir entries per block
-  - Minix FS     16/32    64/32
-  - Xenix FS     16       64
-  - SystemV FS   16       64
-  - Coherent FS  16       32
-
-* How to implement symbolic links such that the host fsck doesn't scream:
-  - Minix FS     normal
-  - Xenix FS     kludge: as regular files with  chmod 1000
-  - SystemV FS   ??
-  - Coherent FS  kludge: as regular files with  chmod 1000
-
-
-Notation: We often speak of a "block" but mean a zone (the allocation unit)
-and not the disk driver's notion of "block".
-
-
-Bruno Haible  <haible@ma2s2.mathematik.uni-karlsruhe.de>
-- 
cgit v1.2.2


From 0da1480ec33d4bac8c32051c1d33202be6dc439f Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:40:03 -0800
Subject: [PATCH] proper prototype for remove_inode_dquot_ref()

Add a proper prototype for remove_inode_dquot_ref() in
include/linux/quotaops.h

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 699aa4f7aa..9ecccab732 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1242,9 +1242,6 @@ EXPORT_SYMBOL(inode_needs_sync);
  */
 #ifdef CONFIG_QUOTA
 
-/* Function back in dquot.c */
-int remove_inode_dquot_ref(struct inode *, int, struct list_head *);
-
 void remove_dquot_ref(struct super_block *sb, int type,
 			struct list_head *tofree_head)
 {
-- 
cgit v1.2.2


From 71a3d1b4f7633835048f96a4ba79c8c87f03df4b Mon Sep 17 00:00:00 2001
From: Mariusz Kozlowski <m.kozlowski@tuxland.pl>
Date: Wed, 6 Dec 2006 20:40:09 -0800
Subject: [PATCH] fs: ufs add missing bracket

Signed-off-by: Mariusz Kozlowski <m.kozlowski@tuxland.pl>
Cc: Evgeniy Dushistov <dushistov@mail.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ufs/util.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 28fce6c239..7dd12bb1d6 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -299,7 +299,7 @@ static inline void *get_usb_offset(struct ufs_sb_private_info *uspi,
 
 #define ubh_get_addr16(ubh,begin) \
 	(((__fs16*)((ubh)->bh[(begin) >> (uspi->s_fshift-1)]->b_data)) + \
-	((begin) & (uspi->fsize>>1) - 1)))
+	((begin) & ((uspi->fsize>>1) - 1)))
 
 #define ubh_get_addr32(ubh,begin) \
 	(((__fs32*)((ubh)->bh[(begin) >> (uspi->s_fshift-2)]->b_data)) + \
-- 
cgit v1.2.2


From a8f48a95619cbce8f85423480e7d0a1bf971a62b Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 6 Dec 2006 20:40:13 -0800
Subject: [PATCH] ext3/4: don't do orphan processing on readonly devices

If you do something like:

  # touch foo
  # tail -f foo &
  # rm foo
  # <take snapshot>
  # <mount snapshot>

you'll panic, because ext3/4 tries to do orphan list processing on the
readonly snapshot device, and:

  kernel: journal commit I/O error
  kernel: Assertion failure in journal_flush_Rsmp_e2f189ce() at journal.c:1356: "!journal->j_checkpoint_transactions"
  kernel: Kernel panic: Fatal exception

for a truly readonly underlying device, it's reasonable and necessary
to just skip orphan list processing.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/super.c | 6 ++++++
 fs/ext4/super.c | 6 ++++++
 2 files changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8ab1981333..580b8a6ca9 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1264,6 +1264,12 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 		return;
 	}
 
+	if (bdev_read_only(sb->s_bdev)) {
+		printk(KERN_ERR "EXT3-fs: write access "
+			"unavailable, skipping orphan cleanup.\n");
+		return;
+	}
+
 	if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
 		if (es->s_last_orphan)
 			jbd_debug(1, "Errors on filesystem, "
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2ede7e2c70..486a641ca7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1321,6 +1321,12 @@ static void ext4_orphan_cleanup (struct super_block * sb,
 		return;
 	}
 
+	if (bdev_read_only(sb->s_bdev)) {
+		printk(KERN_ERR "EXT4-fs: write access "
+			"unavailable, skipping orphan cleanup.\n");
+		return;
+	}
+
 	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
 		if (es->s_last_orphan)
 			jbd_debug(1, "Errors on filesystem, "
-- 
cgit v1.2.2


From 8de61e69c2feb10e5391cca67a3faf1d2bf77ce0 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@cs.washington.edu>
Date: Wed, 6 Dec 2006 20:40:16 -0800
Subject: [PATCH] fs: remove unused variable

Removed unused 'have_pt_gnu_stack' variable.

Reported by David Binderman <dcb314@hotmail.com>

Signed-off-by: David Rientjes <rientjes@cs.washington.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 14ea630a85..be5869d349 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -544,7 +544,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	unsigned long reloc_func_desc = 0;
 	char passed_fileno[6];
 	struct files_struct *files;
-	int have_pt_gnu_stack, executable_stack = EXSTACK_DEFAULT;
+	int executable_stack = EXSTACK_DEFAULT;
 	unsigned long def_flags = 0;
 	struct {
 		struct elfhdr elf_ex;
@@ -707,7 +707,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 				executable_stack = EXSTACK_DISABLE_X;
 			break;
 		}
-	have_pt_gnu_stack = (i < loc->elf_ex.e_phnum);
 
 	/* Some simple consistency checks for the interpreter */
 	if (elf_interpreter) {
-- 
cgit v1.2.2


From 3ee6f61ca0720c71086a9eaf3f5bd0f7c51fe139 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 6 Dec 2006 20:40:23 -0800
Subject: [PATCH] remove NFSD_OPTIMIZE_SPACE

This patch removes the unused NFSD_OPTIMIZE_SPACE.

Additionally, it does differently what NFSD_OPTIMIZE_SPACE was supposed to do:

Nowadays, gcc knows best when to inline code, and CONFIG_CC_OPTIMIZE_FOR_SIZE
even tells gcc globally whether to optimize for size or for speed.  Therefore,
this patch also removes all inline's from these files.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs3xdr.c | 24 ++++++++++--------------
 fs/nfsd/nfsxdr.c  | 13 ++++---------
 2 files changed, 14 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index b4baca3053..277df40f09 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -24,10 +24,6 @@
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
 
-#ifdef NFSD_OPTIMIZE_SPACE
-# define inline
-#endif
-
 
 /*
  * Mapping of S_IF* types to NFS file types
@@ -42,14 +38,14 @@ static u32	nfs3_ftypes[] = {
 /*
  * XDR functions for basic NFS types
  */
-static inline __be32 *
+static __be32 *
 encode_time3(__be32 *p, struct timespec *time)
 {
 	*p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec);
 	return p;
 }
 
-static inline __be32 *
+static __be32 *
 decode_time3(__be32 *p, struct timespec *time)
 {
 	time->tv_sec = ntohl(*p++);
@@ -57,7 +53,7 @@ decode_time3(__be32 *p, struct timespec *time)
 	return p;
 }
 
-static inline __be32 *
+static __be32 *
 decode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	unsigned int size;
@@ -77,7 +73,7 @@ __be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp)
 	return decode_fh(p, fhp);
 }
 
-static inline __be32 *
+static __be32 *
 encode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	unsigned int size = fhp->fh_handle.fh_size;
@@ -91,7 +87,7 @@ encode_fh(__be32 *p, struct svc_fh *fhp)
  * Decode a file name and make sure that the path contains
  * no slashes or null bytes.
  */
-static inline __be32 *
+static __be32 *
 decode_filename(__be32 *p, char **namp, int *lenp)
 {
 	char		*name;
@@ -107,7 +103,7 @@ decode_filename(__be32 *p, char **namp, int *lenp)
 	return p;
 }
 
-static inline __be32 *
+static __be32 *
 decode_sattr3(__be32 *p, struct iattr *iap)
 {
 	u32	tmp;
@@ -153,7 +149,7 @@ decode_sattr3(__be32 *p, struct iattr *iap)
 	return p;
 }
 
-static inline __be32 *
+static __be32 *
 encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
 	      struct kstat *stat)
 {
@@ -186,7 +182,7 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
 	return p;
 }
 
-static inline __be32 *
+static __be32 *
 encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
 	struct inode	*inode = fhp->fh_dentry->d_inode;
@@ -776,7 +772,7 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
 		return xdr_ressize_check(rqstp, p);
 }
 
-static inline __be32 *
+static __be32 *
 encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
 	     int namlen, ino_t ino)
 {
@@ -790,7 +786,7 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
 	return p;
 }
 
-static inline __be32 *
+static __be32 *
 encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p,
 		struct svc_fh *fhp)
 {
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 56ebb1443e..f5243f9439 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -18,11 +18,6 @@
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
 
-
-#ifdef NFSD_OPTIMIZE_SPACE
-# define inline
-#endif
-
 /*
  * Mapping of S_IF* types to NFS file types
  */
@@ -55,7 +50,7 @@ __be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp)
 	return decode_fh(p, fhp);
 }
 
-static inline __be32 *
+static __be32 *
 encode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE);
@@ -66,7 +61,7 @@ encode_fh(__be32 *p, struct svc_fh *fhp)
  * Decode a file name and make sure that the path contains
  * no slashes or null bytes.
  */
-static inline __be32 *
+static __be32 *
 decode_filename(__be32 *p, char **namp, int *lenp)
 {
 	char		*name;
@@ -82,7 +77,7 @@ decode_filename(__be32 *p, char **namp, int *lenp)
 	return p;
 }
 
-static inline __be32 *
+static __be32 *
 decode_pathname(__be32 *p, char **namp, int *lenp)
 {
 	char		*name;
@@ -98,7 +93,7 @@ decode_pathname(__be32 *p, char **namp, int *lenp)
 	return p;
 }
 
-static inline __be32 *
+static __be32 *
 decode_sattr(__be32 *p, struct iattr *iap)
 {
 	u32	tmp, tmp1;
-- 
cgit v1.2.2


From b593e48d2bf09c40c0f5165f2f2a10cc3983ea51 Mon Sep 17 00:00:00 2001
From: Yan Burman <burman.yan@gmail.com>
Date: Wed, 6 Dec 2006 20:40:32 -0800
Subject: [PATCH] affs: replace kmalloc+memset with kzalloc

Replace kmalloc+memset with kzalloc

Signed-off-by: Yan Burman <burman.yan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/affs/bitmap.c | 3 +--
 fs/afs/server.c  | 3 +--
 fs/afs/super.c   | 4 +---
 3 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index b0b953683c..b330009fe4 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -289,12 +289,11 @@ int affs_init_bitmap(struct super_block *sb, int *flags)
 	sbi->s_bmap_count = (sbi->s_partition_size - sbi->s_reserved +
 				 sbi->s_bmap_bits - 1) / sbi->s_bmap_bits;
 	size = sbi->s_bmap_count * sizeof(*bm);
-	bm = sbi->s_bitmap = kmalloc(size, GFP_KERNEL);
+	bm = sbi->s_bitmap = kzalloc(size, GFP_KERNEL);
 	if (!sbi->s_bitmap) {
 		printk(KERN_ERR "AFFS: Bitmap allocation failed\n");
 		return -ENOMEM;
 	}
-	memset(sbi->s_bitmap, 0, size);
 
 	bmap_blk = (__be32 *)sbi->s_root_bh->b_data;
 	blk = sb->s_blocksize / 4 - 49;
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 22afaae1a4..44aff81dc6 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -55,13 +55,12 @@ int afs_server_lookup(struct afs_cell *cell, const struct in_addr *addr,
 	_enter("%p,%08x,", cell, ntohl(addr->s_addr));
 
 	/* allocate and initialise a server record */
-	server = kmalloc(sizeof(struct afs_server), GFP_KERNEL);
+	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
 	if (!server) {
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
 	}
 
-	memset(server, 0, sizeof(struct afs_server));
 	atomic_set(&server->usage, 1);
 
 	INIT_LIST_HEAD(&server->link);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 9a351c4c75..18d9b77ba4 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -242,14 +242,12 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent)
 	kenter("");
 
 	/* allocate a superblock info record */
-	as = kmalloc(sizeof(struct afs_super_info), GFP_KERNEL);
+	as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
 	if (!as) {
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
 	}
 
-	memset(as, 0, sizeof(struct afs_super_info));
-
 	afs_get_volume(params->volume);
 	as->volume = params->volume;
 
-- 
cgit v1.2.2


From 4a08a9f68168e547c2baf100020e9b96cae5fbd1 Mon Sep 17 00:00:00 2001
From: Yan Burman <burman.yan@gmail.com>
Date: Wed, 6 Dec 2006 20:40:34 -0800
Subject: [PATCH] jffs: replace kmalloc+memset with kzalloc

Replace kmalloc+memset with kzalloc

Signed-off-by: Yan Burman <burman.yan@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jffs/intrep.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index 478a74e2e9..d0e783f199 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -592,7 +592,7 @@ jffs_add_virtual_root(struct jffs_control *c)
 	D2(printk("jffs_add_virtual_root(): "
 		  "Creating a virtual root directory.\n"));
 
-	if (!(root = kmalloc(sizeof(struct jffs_file), GFP_KERNEL))) {
+	if (!(root = kzalloc(sizeof(struct jffs_file), GFP_KERNEL))) {
 		return -ENOMEM;
 	}
 	no_jffs_file++;
@@ -604,7 +604,6 @@ jffs_add_virtual_root(struct jffs_control *c)
 	DJM(no_jffs_node++);
 	memset(node, 0, sizeof(struct jffs_node));
 	node->ino = JFFS_MIN_INO;
-	memset(root, 0, sizeof(struct jffs_file));
 	root->ino = JFFS_MIN_INO;
 	root->mode = S_IFDIR | S_IRWXU | S_IRGRP
 		     | S_IXGRP | S_IROTH | S_IXOTH;
-- 
cgit v1.2.2


From 15ad7cdcfd76450d4beebc789ec646664238184d Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 6 Dec 2006 20:40:36 -0800
Subject: [PATCH] struct seq_operations and struct file_operations
 constification

 - move some file_operations structs into the .rodata section

 - move static strings from policy_types[] array into the .rodata section

 - fix generic seq_operations usages, so that those structs may be defined
   as "const" as well

[akpm@osdl.org: couple of fixes]
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/seq_file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 555b9ac04c..10690aa401 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -26,7 +26,7 @@
  *	ERR_PTR(error).  In the end of sequence they return %NULL. ->show()
  *	returns 0 in case of success and negative number in case of error.
  */
-int seq_open(struct file *file, struct seq_operations *op)
+int seq_open(struct file *file, const struct seq_operations *op)
 {
 	struct seq_file *p = file->private_data;
 
@@ -408,7 +408,7 @@ EXPORT_SYMBOL(single_open);
 
 int single_release(struct inode *inode, struct file *file)
 {
-	struct seq_operations *op = ((struct seq_file *)file->private_data)->op;
+	const struct seq_operations *op = ((struct seq_file *)file->private_data)->op;
 	int res = seq_release(inode, file);
 	kfree(op);
 	return res;
-- 
cgit v1.2.2


From f46ba2235feab5e686b1234c328a0577cde86e21 Mon Sep 17 00:00:00 2001
From: Jun Chen <jimcgnu@yahoo.com>
Date: Wed, 6 Dec 2006 20:40:37 -0800
Subject: [PATCH] fs: make nls_cp936.c handle some U00XY characters and U20AC
 correctly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Twenty characters in cp936 are not correctly handled.  They're all in the
U00 plane.  nls_cp936 converts all U00XY to XY but this is not correct for
some characters.(e.g.  U00B7 -> A1A4, U00A8 -> A1A7).

This problem is fixed by generating u2c_00 based on all c2u_xx and changing
uni2char() to give U00 plane a special handling.  The "€"(U20AC,80 in
cp936) is also be handled properly.

Acked-by: Gang Chen <cgdlut@gmail.com>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nls/nls_cp936.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 103 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nls/nls_cp936.c b/fs/nls/nls_cp936.c
index 046fde8170..65e640c61c 100644
--- a/fs/nls/nls_cp936.c
+++ b/fs/nls/nls_cp936.c
@@ -4421,6 +4421,73 @@ static wchar_t *page_charset2uni[256] = {
 	c2u_F8, c2u_F9, c2u_FA, c2u_FB, c2u_FC, c2u_FD, c2u_FE, NULL,   
 };
 
+static unsigned char u2c_00[512] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0C-0x0F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x13 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x14-0x17 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1C-0x1F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x23 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x24-0x27 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x2C-0x2F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x33 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x34-0x37 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3C-0x3F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x43 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x44-0x47 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x4C-0x4F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x53 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x54-0x57 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x5C-0x5F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x63 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x64-0x67 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x6C-0x6F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x73 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x74-0x77 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x7C-0x7F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x83 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x84-0x87 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x8C-0x8F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x93 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x94-0x97 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9B */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9C-0x9F */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA0-0xA3 */
+	0xA1, 0xE8, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xEC, /* 0xA4-0xA7 */
+	0xA1, 0xA7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xA8-0xAB */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xAC-0xAF */
+	0xA1, 0xE3, 0xA1, 0xC0, 0x00, 0x00, 0x00, 0x00, /* 0xB0-0xB3 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xA4, /* 0xB4-0xB7 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xB8-0xBB */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xBC-0xBF */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC0-0xC3 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC4-0xC7 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xC8-0xCB */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xCC-0xCF */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD0-0xD3 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC1, /* 0xD4-0xD7 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xD8-0xDB */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xDC-0xDF */
+	0xA8, 0xA4, 0xA8, 0xA2, 0x00, 0x00, 0x00, 0x00, /* 0xE0-0xE3 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xE4-0xE7 */
+	0xA8, 0xA8, 0xA8, 0xA6, 0xA8, 0xBA, 0x00, 0x00, /* 0xE8-0xEB */
+	0xA8, 0xAC, 0xA8, 0xAA, 0x00, 0x00, 0x00, 0x00, /* 0xEC-0xEF */
+	0x00, 0x00, 0x00, 0x00, 0xA8, 0xB0, 0xA8, 0xAE, /* 0xF0-0xF3 */
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA1, 0xC2, /* 0xF4-0xF7 */
+	0x00, 0x00, 0xA8, 0xB4, 0xA8, 0xB2, 0x00, 0x00, /* 0xF8-0xFB */
+	0xA8, 0xB9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xFC-0xFF */
+};
+
 static unsigned char u2c_01[512] = {
 	0xA8, 0xA1, 0xA8, 0xA1, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x03 */
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x04-0x07 */
@@ -10825,7 +10892,7 @@ static unsigned char u2c_FF[512] = {
 };
 
 static unsigned char *page_uni2charset[256] = {
-	NULL,   u2c_01, u2c_02, u2c_03, u2c_04, NULL,   NULL,   NULL,   
+	u2c_00, u2c_01, u2c_02, u2c_03, u2c_04, NULL,   NULL,   NULL,
 	NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   
 	NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   
 	NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   NULL,   
@@ -10936,11 +11003,34 @@ static int uni2char(const wchar_t uni,
 	unsigned char *uni2charset;
 	unsigned char cl = uni&0xFF;
 	unsigned char ch = (uni>>8)&0xFF;
-	int n;
+	unsigned char out0,out1;
 
 	if (boundlen <= 0)
 		return -ENAMETOOLONG;
 
+	if (uni == 0x20ac) {/* Euro symbol.The only exception with a non-ascii unicode */
+		out[0] = 0x80;
+		return 1;
+	}
+
+	if (ch == 0) { /* handle the U00 plane*/
+		/* if (cl == 0) return -EINVAL;*/ /*U0000 is legal in cp936*/
+		out0 = u2c_00[cl*2];
+		out1 = u2c_00[cl*2+1];
+		if (out0 == 0x00 && out1 == 0x00) {
+			if (cl<0x80) {
+				out[0] = cl;
+				return 1;
+			}
+			return -EINVAL;
+		} else {
+			if (boundlen <= 1)
+				return -ENAMETOOLONG;
+			out[0] = out0;
+			out[1] = out1;
+			return 2;
+		}
+	}
 
 	uni2charset = page_uni2charset[ch];
 	if (uni2charset) {
@@ -10950,15 +11040,10 @@ static int uni2char(const wchar_t uni,
 		out[1] = uni2charset[cl*2+1];
 		if (out[0] == 0x00 && out[1] == 0x00)
 			return -EINVAL;
-		n = 2;
-	} else if (ch==0 && cl) {
-		out[0] = cl;
-		n = 1;
+		return 2;
 	}
 	else
 		return -EINVAL;
-
-	return n;
 }
 
 static int char2uni(const unsigned char *rawstring, int boundlen,
@@ -10972,7 +11057,11 @@ static int char2uni(const unsigned char *rawstring, int boundlen,
 		return -ENAMETOOLONG;
 
 	if (boundlen == 1) {
-		*uni = rawstring[0];
+		if (rawstring[0]==0x80) { /* Euro symbol.The only exception with a non-ascii unicode */
+			*uni = 0x20ac;
+		} else {
+			*uni = rawstring[0];
+		}
 		return 1;
 	}
 
@@ -10986,7 +11075,11 @@ static int char2uni(const unsigned char *rawstring, int boundlen,
 			return -EINVAL;
 		n = 2;
 	} else{
-		*uni = ch;
+		if (ch==0x80) {/* Euro symbol.The only exception with a non-ascii unicode */
+			*uni = 0x20ac;
+		} else {
+			*uni = ch;
+		}
 		n = 1;
 	}
 	return n;
-- 
cgit v1.2.2


From 6d4df677f8a60ea6bc0ef1a596c1a3a79b1d4882 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 6 Dec 2006 20:40:39 -0800
Subject: [PATCH] do_coredump() and not stopping rewrite attacks?

On Sat, Dec 02, 2006 at 11:47:44PM +0300, Alexey Dobriyan wrote:
> David Binderman compiled 2.6.19 with icc and grepped for "was set but never
> used". Many warnings are on
> 	http://coderock.org/kj/unused-2.6.19-fs

Heh, the very first line:
fs/exec.c(1465): remark #593: variable "flag" was set but never used

fs/exec.c:
  1477		/*
  1478		 *	We cannot trust fsuid as being the "true" uid of the
  1479		 *	process nor do we know its entire history. We only know it
  1480		 *	was tainted so we dump it as root in mode 2.
  1481		 */
  1482		if (mm->dumpable == 2) {	/* Setuid core dump mode */
  1483			flag = O_EXCL;		/* Stop rewrite attacks */
  1484			current->fsuid = 0;	/* Dump root private */
  1485		}

And then filp_open follows with "flag" totally ignored.

(akpm: this restores the code to Alan's original version.  Andi's "Support
piping into commands in /proc/sys/kernel/core_pattern" (cset d025c9db) broke
it).

Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: <stable@kerenl.org>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 2092bd2074..add0e03c3e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1515,7 +1515,8 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 		ispipe = 1;
  	} else
  		file = filp_open(corename,
-				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE, 0600);
+				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
+				 0600);
 	if (IS_ERR(file))
 		goto fail_unlock;
 	inode = file->f_dentry->d_inode;
-- 
cgit v1.2.2


From b62e8ec2ac580b47c11eb76e8852ac1ec7d617cd Mon Sep 17 00:00:00 2001
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Date: Wed, 6 Dec 2006 20:40:43 -0800
Subject: [PATCH] aio: kill pointless ki_nbytes assignment in
 aio_setup_single_vector

io_submit_one assigns ki_left = ki_nbytes = iocb->aio_nbytes, then calls
down to aio_setup_iocb, then to aio_setup_single_vector.  In there,
ki_nbytes is reassigned to the same value it got two call stack above it.
There is no need to do so.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Acked-by: Zach Brown <zach.brown@oracle.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 13aa9298ab..f02ad2677f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1413,7 +1413,6 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb)
 	kiocb->ki_iovec->iov_len = kiocb->ki_left;
 	kiocb->ki_nr_segs = 1;
 	kiocb->ki_cur_seg = 0;
-	kiocb->ki_nbytes = kiocb->ki_left;
 	return 0;
 }
 
-- 
cgit v1.2.2


From 97d2a80584b30b5cd32da411deca1986ef61877a Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Wed, 6 Dec 2006 20:40:45 -0800
Subject: [PATCH] aio: remove ki_retried debugging member

Remove the ki_retried member from struct kiocb.  I think the idea was
bounced around a while back, but Arnaldo pointed out another reason that we
should dig it up when he pointed out that the last cacheline of struct
kiocb only contains 4 bytes.  By removing the debugging member, we save
more than the 8 byte on 64 bit machines.

Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
Acked-by: Ken Chen <kenneth.w.chen@intel.com>
Acked-by: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index f02ad2677f..d3a6ec2c96 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -666,17 +666,6 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
 	ssize_t (*retry)(struct kiocb *);
 	ssize_t ret;
 
-	if (iocb->ki_retried++ > 1024*1024) {
-		printk("Maximal retry count.  Bytes done %Zd\n",
-			iocb->ki_nbytes - iocb->ki_left);
-		return -EAGAIN;
-	}
-
-	if (!(iocb->ki_retried & 0xff)) {
-		pr_debug("%ld retry: %zd of %zd\n", iocb->ki_retried,
-			iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes);
-	}
-
 	if (!(retry = iocb->ki_retry)) {
 		printk("aio_run_iocb: iocb->ki_retry = NULL\n");
 		return 0;
@@ -1005,9 +994,6 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2)
 	kunmap_atomic(ring, KM_IRQ1);
 
 	pr_debug("added to ring %p at [%lu]\n", iocb, tail);
-
-	pr_debug("%ld retries: %zd of %zd\n", iocb->ki_retried,
-		iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes);
 put_rq:
 	/* everything turned out well, dispose of the aiocb. */
 	ret = __aio_put_req(ctx, iocb);
@@ -1590,7 +1576,6 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	req->ki_opcode = iocb->aio_lio_opcode;
 	init_waitqueue_func_entry(&req->ki_wait, aio_wake_function);
 	INIT_LIST_HEAD(&req->ki_wait.task_list);
-	req->ki_retried = 0;
 
 	ret = aio_setup_iocb(req);
 
-- 
cgit v1.2.2


From feb189274638ea357750a9e809f5a6e2223a082e Mon Sep 17 00:00:00 2001
From: Johann Lombardi <johann.lombardi@bull.net>
Date: Wed, 6 Dec 2006 20:40:46 -0800
Subject: [PATCH] ext4: fix credit calculation in
 ext4_ext_calc_credits_for_insert

Fix a nit in ext4_ext_calc_credits_for_insert().  Besides, credits for the
new root are already added in the index split accounting.

Signed-off-by: Johann Lombardi <johann.lombardi@bull.net>
Signed-off-by: Alex Tomas <alex@clusterfs.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 1442ccbaea..994a6e450e 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1531,16 +1531,17 @@ int inline ext4_ext_calc_credits_for_insert(struct inode *inode,
 
 	/*
 	 * tree can be full, so it would need to grow in depth:
-	 * allocation + old root + new root
+	 * we need one credit to modify old root, credits for
+	 * new root will be added in split accounting
 	 */
-	needed += 2 + 1 + 1;
+	needed += 1;
 
 	/*
 	 * Index split can happen, we would need:
 	 *    allocate intermediate indexes (bitmap + group)
 	 *  + change two blocks at each level, but root (already included)
 	 */
-	needed = (depth * 2) + (depth * 2);
+	needed += (depth * 2) + (depth * 2);
 
 	/* any allocation modifies superblock */
 	needed += 1;
-- 
cgit v1.2.2


From ef5036782e619ab394daaec5c00876fa6b17d7a1 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:41:23 -0800
Subject: [PATCH] ext3 balloc: reset windowsz when full

ext3_new_blocks should reset the reservation window size to 0 when squeezing
the last blocks out of an almost full filesystem, so the retry doesn't skip
any groups with less than half that free, reporting ENOSPC too soon.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index f96c40a90e..db8ca34d10 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1552,6 +1552,7 @@ retry_alloc:
 	 */
 	if (my_rsv) {
 		my_rsv = NULL;
+		windowsz = 0;
 		group_no = goal_group;
 		goto retry_alloc;
 	}
-- 
cgit v1.2.2


From 1650242324a0c19d629a8be0d5a8ecdc174d31f8 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:41:25 -0800
Subject: [PATCH] ext3 balloc: fix off-by-one against grp_goal

grp_goal 0 is a genuine goal (unlike -1), so ext3_try_to_allocate_with_rsv
should treat it as such.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index db8ca34d10..0706a45d3c 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1271,7 +1271,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 	}
 	/*
 	 * grp_goal is a group relative block number (if there is a goal)
-	 * 0 < grp_goal < EXT3_BLOCKS_PER_GROUP(sb)
+	 * 0 <= grp_goal < EXT3_BLOCKS_PER_GROUP(sb)
 	 * first block is a filesystem wide block number
 	 * first block is the block number of the first block in this group
 	 */
@@ -1307,7 +1307,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 			if (!goal_in_my_reservation(&my_rsv->rsv_window,
 							grp_goal, group, sb))
 				grp_goal = -1;
-		} else if (grp_goal > 0) {
+		} else if (grp_goal >= 0) {
 			int curr = my_rsv->rsv_end -
 					(grp_goal + group_first_block) + 1;
 
-- 
cgit v1.2.2


From ff50dc562bf6de00fec264b11531e2745209ba12 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:41:25 -0800
Subject: [PATCH] ext3 balloc: fix off-by-one against rsv_end

rsv_end is the last block within the reservation, so alloc_new_reservation
should accept start_block == rsv_end as success.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 0706a45d3c..57cf47a942 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1148,7 +1148,7 @@ retry:
 	 * check if the first free block is within the
 	 * free space we just reserved
 	 */
-	if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end)
+	if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
 		return 0;		/* success */
 	/*
 	 * if the first free bit we found is out of the reservable space
-- 
cgit v1.2.2


From c56d2561f7189762c4fce854630d4f7f6d64ccee Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:41:27 -0800
Subject: [PATCH] ext3 balloc: say rb_entry not list_entry

The reservations tree is an rb_tree not a list, so it's less confusing to use
rb_entry() than list_entry() - though they're both just container_of().

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 57cf47a942..03d39b6fd3 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -144,7 +144,7 @@ restart:
 
 	printk("Block Allocation Reservation Windows Map (%s):\n", fn);
 	while (n) {
-		rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node);
+		rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
 		if (verbose)
 			printk("reservation window 0x%p "
 			       "start:  %lu, end:  %lu\n",
@@ -949,7 +949,7 @@ static int find_next_reservable_window(
 
 		prev = rsv;
 		next = rb_next(&rsv->rsv_node);
-		rsv = list_entry(next,struct ext3_reserve_window_node,rsv_node);
+		rsv = rb_entry(next,struct ext3_reserve_window_node,rsv_node);
 
 		/*
 		 * Reached the last reservation, we can just append to the
@@ -1193,7 +1193,7 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
 	if (!next)
 		my_rsv->rsv_end += size;
 	else {
-		next_rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node);
+		next_rsv = rb_entry(next, struct ext3_reserve_window_node, rsv_node);
 
 		if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
 			my_rsv->rsv_end += size;
-- 
cgit v1.2.2


From 2823b5535efad71e950ef50c2ce5f9e4dbaedc17 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:41:28 -0800
Subject: [PATCH] ext3 balloc: use io_error label

ext3_new_blocks has a nice io_error label for setting -EIO, so goto that in
the one place that doesn't already use it.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 03d39b6fd3..5a2c198762 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1515,10 +1515,8 @@ retry_alloc:
 		if (group_no >= ngroups)
 			group_no = 0;
 		gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
-		if (!gdp) {
-			*errp = -EIO;
-			goto out;
-		}
+		if (!gdp)
+			goto io_error;
 		free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
 		/*
 		 * skip this group if the number of
-- 
cgit v1.2.2


From 7d1c520bb57e4b5e94ec937c13553dccf473341b Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 6 Dec 2006 20:41:30 -0800
Subject: [PATCH] ext3 balloc: fix _with_rsv freeze

Port fix to the off-by-one in find_next_usable_block's memscan from ext2 to
ext3; but it didn't cause a serious problem for ext3 because the additional
ext3_test_allocatable check rescued it from the error.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/balloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 5a2c198762..22161740ba 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -730,7 +730,7 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
 		here = 0;
 
 	p = ((char *)bh->b_data) + (here >> 3);
-	r = memscan(p, 0, (maxblocks - here + 7) >> 3);
+	r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
 	next = (r - ((char *)bh->b_data)) << 3;
 
 	if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh))
-- 
cgit v1.2.2


From 7e0289766a0750a56260565bd3b74eb544483d45 Mon Sep 17 00:00:00 2001
From: Avantika Mathur <mathur@us.ibm.com>
Date: Wed, 6 Dec 2006 20:41:33 -0800
Subject: [PATCH] ext4: if expression format

changes instances of
	if ((lhs = expression)) {

to the preferred coding style

	lhs=expression;
	if (lhs) {

Signed-off-by: Avantika Mathur <mathur@us.ibm.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c | 63 ++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 42 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 994a6e450e..06ce8e8773 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -186,7 +186,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 		depth = path->p_depth;
 
 		/* try to predict block placement */
-		if ((ex = path[depth].p_ext))
+		ex = path[depth].p_ext;
+		if (ex)
 			return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block));
 
 		/* it looks like index is empty;
@@ -543,7 +544,8 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 	struct ext4_extent_idx *ix;
 	int len, err;
 
-	if ((err = ext4_ext_get_access(handle, inode, curp)))
+	err = ext4_ext_get_access(handle, inode, curp);
+	if (err)
 		return err;
 
 	BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block));
@@ -665,7 +667,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	}
 	lock_buffer(bh);
 
-	if ((err = ext4_journal_get_create_access(handle, bh)))
+	err = ext4_journal_get_create_access(handle, bh);
+	if (err)
 		goto cleanup;
 
 	neh = ext_block_hdr(bh);
@@ -702,18 +705,21 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	set_buffer_uptodate(bh);
 	unlock_buffer(bh);
 
-	if ((err = ext4_journal_dirty_metadata(handle, bh)))
+	err = ext4_journal_dirty_metadata(handle, bh);
+	if (err)
 		goto cleanup;
 	brelse(bh);
 	bh = NULL;
 
 	/* correct old leaf */
 	if (m) {
-		if ((err = ext4_ext_get_access(handle, inode, path + depth)))
+		err = ext4_ext_get_access(handle, inode, path + depth);
+		if (err)
 			goto cleanup;
 		path[depth].p_hdr->eh_entries =
 		     cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
-		if ((err = ext4_ext_dirty(handle, inode, path + depth)))
+		err = ext4_ext_dirty(handle, inode, path + depth);
+		if (err)
 			goto cleanup;
 
 	}
@@ -736,7 +742,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		}
 		lock_buffer(bh);
 
-		if ((err = ext4_journal_get_create_access(handle, bh)))
+		err = ext4_journal_get_create_access(handle, bh);
+		if (err)
 			goto cleanup;
 
 		neh = ext_block_hdr(bh);
@@ -780,7 +787,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		set_buffer_uptodate(bh);
 		unlock_buffer(bh);
 
-		if ((err = ext4_journal_dirty_metadata(handle, bh)))
+		err = ext4_journal_dirty_metadata(handle, bh);
+		if (err)
 			goto cleanup;
 		brelse(bh);
 		bh = NULL;
@@ -854,7 +862,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 	}
 	lock_buffer(bh);
 
-	if ((err = ext4_journal_get_create_access(handle, bh))) {
+	err = ext4_journal_get_create_access(handle, bh);
+	if (err) {
 		unlock_buffer(bh);
 		goto out;
 	}
@@ -874,11 +883,13 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 	set_buffer_uptodate(bh);
 	unlock_buffer(bh);
 
-	if ((err = ext4_journal_dirty_metadata(handle, bh)))
+	err = ext4_journal_dirty_metadata(handle, bh);
+	if (err)
 		goto out;
 
 	/* create index in new top-level index: num,max,pointer */
-	if ((err = ext4_ext_get_access(handle, inode, curp)))
+	err = ext4_ext_get_access(handle, inode, curp);
+	if (err)
 		goto out;
 
 	curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
@@ -1070,20 +1081,24 @@ int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
 	 */
 	k = depth - 1;
 	border = path[depth].p_ext->ee_block;
-	if ((err = ext4_ext_get_access(handle, inode, path + k)))
+	err = ext4_ext_get_access(handle, inode, path + k);
+	if (err)
 		return err;
 	path[k].p_idx->ei_block = border;
-	if ((err = ext4_ext_dirty(handle, inode, path + k)))
+	err = ext4_ext_dirty(handle, inode, path + k);
+	if (err)
 		return err;
 
 	while (k--) {
 		/* change all left-side indexes */
 		if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
 			break;
-		if ((err = ext4_ext_get_access(handle, inode, path + k)))
+		err = ext4_ext_get_access(handle, inode, path + k);
+		if (err)
 			break;
 		path[k].p_idx->ei_block = border;
-		if ((err = ext4_ext_dirty(handle, inode, path + k)))
+		err = ext4_ext_dirty(handle, inode, path + k);
+		if (err)
 			break;
 	}
 
@@ -1142,7 +1157,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 				le16_to_cpu(newext->ee_len),
 				le32_to_cpu(ex->ee_block),
 				le16_to_cpu(ex->ee_len), ext_pblock(ex));
-		if ((err = ext4_ext_get_access(handle, inode, path + depth)))
+		err = ext4_ext_get_access(handle, inode, path + depth);
+		if (err)
 			return err;
 		ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len)
 					 + le16_to_cpu(newext->ee_len));
@@ -1192,7 +1208,8 @@ repeat:
 has_space:
 	nearex = path[depth].p_ext;
 
-	if ((err = ext4_ext_get_access(handle, inode, path + depth)))
+	err = ext4_ext_get_access(handle, inode, path + depth);
+	if (err)
 		goto cleanup;
 
 	if (!nearex) {
@@ -1486,10 +1503,12 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 	path--;
 	leaf = idx_pblock(path->p_idx);
 	BUG_ON(path->p_hdr->eh_entries == 0);
-	if ((err = ext4_ext_get_access(handle, inode, path)))
+	err = ext4_ext_get_access(handle, inode, path);
+	if (err)
 		return err;
 	path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1);
-	if ((err = ext4_ext_dirty(handle, inode, path)))
+	err = ext4_ext_dirty(handle, inode, path);
+	if (err)
 		return err;
 	ext_debug("index is empty, remove it, free block %llu\n", leaf);
 	bh = sb_find_get_block(inode->i_sb, leaf);
@@ -1930,7 +1949,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	mutex_lock(&EXT4_I(inode)->truncate_mutex);
 
 	/* check in cache */
-	if ((goal = ext4_ext_in_cache(inode, iblock, &newex))) {
+	goal = ext4_ext_in_cache(inode, iblock, &newex);
+	if (goal) {
 		if (goal == EXT4_EXT_CACHE_GAP) {
 			if (!create) {
 				/* block isn't allocated yet and
@@ -1969,7 +1989,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	 */
 	BUG_ON(path[depth].p_ext == NULL && depth != 0);
 
-	if ((ex = path[depth].p_ext)) {
+	ex = path[depth].p_ext;
+	if (ex) {
 	        unsigned long ee_block = le32_to_cpu(ex->ee_block);
 		ext4_fsblk_t ee_start = ext_pblock(ex);
 		unsigned short ee_len  = le16_to_cpu(ex->ee_len);
-- 
cgit v1.2.2


From 5d4958f923f431e148d9ba8ff894209a134b943e Mon Sep 17 00:00:00 2001
From: Avantika Mathur <mathur@us.ibm.com>
Date: Wed, 6 Dec 2006 20:41:35 -0800
Subject: [PATCH] ext4: kmalloc to kzalloc

Performs kmalloc to kzalloc conversion

Signed-off-by: Avantika Mathur <mathur@us.ibm.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 06ce8e8773..e41be1866d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -477,13 +477,12 @@ ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path)
 
 	/* account possible depth increase */
 	if (!path) {
-		path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 2),
+		path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
 				GFP_NOFS);
 		if (!path)
 			return ERR_PTR(-ENOMEM);
 		alloc = 1;
 	}
-	memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1));
 	path[0].p_hdr = eh;
 
 	/* walk through the tree */
@@ -643,10 +642,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	 * We need this to handle errors and free blocks
 	 * upon them.
 	 */
-	ablocks = kmalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
+	ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
 	if (!ablocks)
 		return -ENOMEM;
-	memset(ablocks, 0, sizeof(ext4_fsblk_t) * depth);
 
 	/* allocate all needed blocks */
 	ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
@@ -1773,12 +1771,11 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
 	 * We start scanning from right side, freeing all the blocks
 	 * after i_size and walking into the tree depth-wise.
 	 */
-	path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
+	path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
 	if (path == NULL) {
 		ext4_journal_stop(handle);
 		return -ENOMEM;
 	}
-	memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1));
 	path[0].p_hdr = ext_inode_hdr(inode);
 	if (ext4_ext_check_header(__FUNCTION__, inode, path[0].p_hdr)) {
 		err = -EIO;
-- 
cgit v1.2.2


From 09b882520bbe01f2e5044642109c1c1d19fe3559 Mon Sep 17 00:00:00 2001
From: Avantika Mathur <mathur@us.ibm.com>
Date: Wed, 6 Dec 2006 20:41:36 -0800
Subject: [PATCH] ext4: Eliminate inline functions

Removes all inline keywords, since the compiler will make static functions
inline when it is appropriate.

Signed-off-by: Avantika Mathur <mathur@us.ibm.com>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e41be1866d..dc2724fa76 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -48,7 +48,7 @@
  * ext_pblock:
  * combine low and high parts of physical block number into ext4_fsblk_t
  */
-static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
+static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
 {
 	ext4_fsblk_t block;
 
@@ -61,7 +61,7 @@ static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
  * idx_pblock:
  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
  */
-static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
+static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
 {
 	ext4_fsblk_t block;
 
@@ -75,7 +75,7 @@ static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
  * stores a large physical block number into an extent struct,
  * breaking it into parts
  */
-static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
+static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
 {
 	ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
 	ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
@@ -86,7 +86,7 @@ static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb
  * stores a large physical block number into an index struct,
  * breaking it into parts
  */
-static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
+static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
 {
 	ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff));
 	ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
@@ -216,7 +216,7 @@ ext4_ext_new_block(handle_t *handle, struct inode *inode,
 	return newblock;
 }
 
-static inline int ext4_ext_space_block(struct inode *inode)
+static int ext4_ext_space_block(struct inode *inode)
 {
 	int size;
 
@@ -229,7 +229,7 @@ static inline int ext4_ext_space_block(struct inode *inode)
 	return size;
 }
 
-static inline int ext4_ext_space_block_idx(struct inode *inode)
+static int ext4_ext_space_block_idx(struct inode *inode)
 {
 	int size;
 
@@ -242,7 +242,7 @@ static inline int ext4_ext_space_block_idx(struct inode *inode)
 	return size;
 }
 
-static inline int ext4_ext_space_root(struct inode *inode)
+static int ext4_ext_space_root(struct inode *inode)
 {
 	int size;
 
@@ -256,7 +256,7 @@ static inline int ext4_ext_space_root(struct inode *inode)
 	return size;
 }
 
-static inline int ext4_ext_space_root_idx(struct inode *inode)
+static int ext4_ext_space_root_idx(struct inode *inode)
 {
 	int size;
 
@@ -1103,7 +1103,7 @@ int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
 	return err;
 }
 
-static int inline
+static int
 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 				struct ext4_extent *ex2)
 {
@@ -1395,7 +1395,7 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block,
 	return err;
 }
 
-static inline void
+static void
 ext4_ext_put_in_cache(struct inode *inode, __u32 block,
 			__u32 len, __u32 start, int type)
 {
@@ -1413,7 +1413,7 @@ ext4_ext_put_in_cache(struct inode *inode, __u32 block,
  * calculate boundaries of the gap that the requested block fits into
  * and cache this gap
  */
-static inline void
+static void
 ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
 				unsigned long block)
 {
@@ -1454,7 +1454,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
 	ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP);
 }
 
-static inline int
+static int
 ext4_ext_in_cache(struct inode *inode, unsigned long block,
 			struct ext4_extent *ex)
 {
@@ -1523,7 +1523,7 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
  * the caller should calculate credits under truncate_mutex and
  * pass the actual path.
  */
-int inline ext4_ext_calc_credits_for_insert(struct inode *inode,
+int ext4_ext_calc_credits_for_insert(struct inode *inode,
 						struct ext4_ext_path *path)
 {
 	int depth, needed;
@@ -1733,7 +1733,7 @@ out:
  * ext4_ext_more_to_rm:
  * returns 1 if current index has to be freed (even partial)
  */
-static int inline
+static int
 ext4_ext_more_to_rm(struct ext4_ext_path *path)
 {
 	BUG_ON(path->p_idx == NULL);
-- 
cgit v1.2.2