From 71bce256bf2e2aaddf54a2f4ac216329c74be3ad Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Thu, 5 May 2005 13:23:27 -0700
Subject: [XFS] Move the XFS inode to the front of its hash list on a cache hit

SGI Modid: xfs-linux:xfs-kern:21915a

Signed-off-by: Nathan Scott <nathans@sgi.com>
Signed-off-by: Christoph Hellwig <hch@sgi.com>
---
 fs/xfs/xfs_iget.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 3a0ba1dfd0e8..d3da00045f26 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -135,6 +135,40 @@ xfs_chash_free(xfs_mount_t *mp)
 	mp->m_chash = NULL;
 }
 
+/*
+ * Try to move an inode to the front of its hash list if possible
+ * (and if its not there already).  Called right after obtaining
+ * the list version number and then dropping the read_lock on the
+ * hash list in question (which is done right after looking up the
+ * inode in question...).
+ */
+STATIC void
+xfs_ihash_promote(
+	xfs_ihash_t	*ih,
+	xfs_inode_t	*ip,
+	ulong		version)
+{
+	xfs_inode_t	*iq;
+
+	if ((ip->i_prevp != &ih->ih_next) && write_trylock(&ih->ih_lock)) {
+		if (likely(version == ih->ih_version)) {
+			/* remove from list */
+			if ((iq = ip->i_next)) {
+				iq->i_prevp = ip->i_prevp;
+			}
+			*ip->i_prevp = iq;
+
+			/* insert at list head */
+			iq = ih->ih_next;
+			iq->i_prevp = &ip->i_next;
+			ip->i_next = iq;
+			ip->i_prevp = &ih->ih_next;
+			ih->ih_next = ip;
+		}
+		write_unlock(&ih->ih_lock);
+	}
+}
+
 /*
  * Look up an inode by number in the given file system.
  * The inode is looked up in the hash table for the file system
@@ -229,7 +263,9 @@ again:
 				XFS_STATS_INC(xs_ig_found);
 
 				ip->i_flags &= ~XFS_IRECLAIMABLE;
+				version = ih->ih_version;
 				read_unlock(&ih->ih_lock);
+				xfs_ihash_promote(ih, ip, version);
 
 				XFS_MOUNT_ILOCK(mp);
 				list_del_init(&ip->i_reclaim);
@@ -259,8 +295,15 @@ again:
 						inode_vp, vp);
 			}
 
+			/*
+			 * Inode cache hit: if ip is not at the front of
+			 * its hash chain, move it there now.
+			 * Do this with the lock held for update, but
+			 * do statistics after releasing the lock.
+			 */
+			version = ih->ih_version;
 			read_unlock(&ih->ih_lock);
-
+			xfs_ihash_promote(ih, ip, version);
 			XFS_STATS_INC(xs_ig_found);
 
 finish_inode:
@@ -547,6 +590,7 @@ xfs_inode_incore(xfs_mount_t	*mp,
 {
 	xfs_ihash_t	*ih;
 	xfs_inode_t	*ip;
+	ulong		version;
 
 	ih = XFS_IHASH(mp, ino);
 	read_lock(&ih->ih_lock);
@@ -554,11 +598,15 @@ xfs_inode_incore(xfs_mount_t	*mp,
 		if (ip->i_ino == ino) {
 			/*
 			 * If we find it and tp matches, return it.
+			 * Also move it to the front of the hash list
+			 * if we find it and it is not already there.
 			 * Otherwise break from the loop and return
 			 * NULL.
 			 */
 			if (ip->i_transp == tp) {
+				version = ih->ih_version;
 				read_unlock(&ih->ih_lock);
+				xfs_ihash_promote(ih, ip, version);
 				return (ip);
 			}
 			break;
@@ -685,6 +733,7 @@ xfs_iextract(
 		iq->i_prevp = ip->i_prevp;
 	}
 	*ip->i_prevp = iq;
+	ih->ih_version++;
 	write_unlock(&ih->ih_lock);
 
 	/*
-- 
cgit v1.2.2


From de20614b351e4d4d23af0a105baa8fd2d7cd4f26 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Thu, 5 May 2005 13:24:13 -0700
Subject: [XFS] Block mount attempts for filesystems with version 1
 directories.

SGI Modid: xfs-linux:xfs-kern:21937a

Signed-off-by: Nathan Scott <nathans@sgi.com>
Signed-off-by: Christoph Hellwig <hch@sgi.com>
---
 fs/xfs/xfs_mount.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b57423caef9b..2ec967d93e5a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -300,6 +300,15 @@ xfs_mount_validate_sb(
 		return XFS_ERROR(EFSCORRUPTED);
 	}
 
+	/*
+	 * Version 1 directory format has never worked on Linux.
+	 */
+	if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) {
+		cmn_err(CE_WARN,
+	"XFS: Attempted to mount file system using version 1 directory format");
+		return XFS_ERROR(ENOSYS);
+	}
+
 	/*
 	 * Until this is fixed only page-sized or smaller data blocks work.
 	 */
-- 
cgit v1.2.2


From 31b084aef3f088962f56c100b31bb8479c5dd769 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Thu, 5 May 2005 13:25:00 -0700
Subject: [XFS] Fix up uses of nlink_t incorrectly restricting us to 2^16 links
 for some platforms

SGI Modid: xfs-linux:xfs-kern:22032a

Signed-off-by: Nathan Scott <nathans@sgi.com>
Signed-off-by: Christoph Hellwig <hch@sgi.com>
---
 fs/xfs/linux-2.6/xfs_vnode.h | 2 +-
 fs/xfs/xfs_inode.c           | 2 +-
 fs/xfs/xfs_inode.h           | 6 +++---
 fs/xfs/xfs_types.h           | 1 +
 fs/xfs/xfs_utils.c           | 2 +-
 fs/xfs/xfs_utils.h           | 2 +-
 6 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index da76c1f1e11c..7cff0f3e9bc3 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -409,7 +409,7 @@ typedef struct vattr {
 	int		va_mask;	/* bit-mask of attributes present */
 	enum vtype	va_type;	/* vnode type (for create) */
 	mode_t		va_mode;	/* file access mode and type */
-	nlink_t		va_nlink;	/* number of references to file */
+	xfs_nlink_t	va_nlink;	/* number of references to file */
 	uid_t		va_uid;		/* owner user id */
 	gid_t		va_gid;		/* owner group id */
 	xfs_ino_t	va_nodeid;	/* file id */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 43c632ab86ad..bc8c8c7f9039 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1130,7 +1130,7 @@ xfs_ialloc(
 	xfs_trans_t	*tp,
 	xfs_inode_t	*pip,
 	mode_t		mode,
-	nlink_t		nlink,
+	xfs_nlink_t	nlink,
 	xfs_dev_t	rdev,
 	cred_t		*cr,
 	xfs_prid_t	prid,
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index a53b1ccf6070..37e1c316f3b6 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -495,9 +495,9 @@ int		xfs_itobp(struct xfs_mount *, struct xfs_trans *,
 int		xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
 			  xfs_inode_t **, xfs_daddr_t);
 int		xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
-int		xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, nlink_t,
-			   xfs_dev_t, struct cred *, xfs_prid_t, int,
-			   struct xfs_buf **, boolean_t *, xfs_inode_t **);
+int		xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
+			   xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t,
+			   int, struct xfs_buf **, boolean_t *, xfs_inode_t **);
 void		xfs_xlate_dinode_core(xfs_caddr_t, struct xfs_dinode_core *,
 					int);
 uint		xfs_ip2xflags(struct xfs_inode *);
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 04609d27ea51..e4bf711e48ff 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -63,6 +63,7 @@ typedef __u64			xfs_ino_t;	/* <inode> type */
 typedef __s64			xfs_daddr_t;	/* <disk address> type */
 typedef char *			xfs_caddr_t;	/* <core address> type */
 typedef __u32			xfs_dev_t;
+typedef __u32			xfs_nlink_t;
 
 /* __psint_t is the same size as a pointer */
 #if (BITS_PER_LONG == 32)
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 816b945fa0ea..d1f8146a06ea 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -147,7 +147,7 @@ xfs_dir_ialloc(
 	xfs_inode_t	*dp,		/* directory within whose allocate
 					   the inode. */
 	mode_t		mode,
-	nlink_t		nlink,
+	xfs_nlink_t	nlink,
 	xfs_dev_t	rdev,
 	cred_t		*credp,
 	prid_t		prid,		/* project id */
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index e1ed6a588000..01d98b4b7af7 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -42,7 +42,7 @@ extern int xfs_get_dir_entry (vname_t *, xfs_inode_t **);
 extern int xfs_dir_lookup_int (bhv_desc_t *, uint, vname_t *, xfs_ino_t *,
 				xfs_inode_t **);
 extern int xfs_truncate_file (xfs_mount_t *, xfs_inode_t *);
-extern int xfs_dir_ialloc (xfs_trans_t **, xfs_inode_t *, mode_t, nlink_t,
+extern int xfs_dir_ialloc (xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
 				xfs_dev_t, cred_t *, prid_t, int,
 				xfs_inode_t **, int *);
 extern int xfs_droplink (xfs_trans_t *, xfs_inode_t *);
-- 
cgit v1.2.2


From 9effd8e62570c6e47b91734770e6122002c33ed5 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sgi.com>
Date: Thu, 5 May 2005 13:26:18 -0700
Subject: [XFS] Enable XFS_VNODE_TRACE

SGI Modid: xfs-linux:xfs-kern:190725a

Signed-off-by: Eric Sandeen <sandeen@sgi.com>
Signed-off-by: Christoph Hellwig <hch@sgi.com>
.
---
 fs/xfs/Makefile              | 2 +-
 fs/xfs/linux-2.6/xfs_vnode.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 554e4a18c152..d3ff78354638 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -49,7 +49,7 @@ ifeq ($(CONFIG_XFS_TRACE),y)
 	EXTRA_CFLAGS += -DXFS_LOG_TRACE
 	EXTRA_CFLAGS += -DXFS_RW_TRACE
 	EXTRA_CFLAGS += -DPAGEBUF_TRACE
-	# EXTRA_CFLAGS += -DXFS_VNODE_TRACE
+	EXTRA_CFLAGS += -DXFS_VNODE_TRACE
 endif
 
 obj-$(CONFIG_XFS_FS)		+= xfs.o
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 7cff0f3e9bc3..3b40c929544e 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -86,10 +86,11 @@ typedef struct vnode {
 	vnumber_t	v_number;		/* in-core vnode number */
 	vn_bhv_head_t	v_bh;			/* behavior head */
 	spinlock_t	v_lock;			/* VN_LOCK/VN_UNLOCK */
-	struct inode	v_inode;		/* Linux inode */
 #ifdef XFS_VNODE_TRACE
 	struct ktrace	*v_trace;		/* trace header structure    */
 #endif
+	struct inode	v_inode;		/* Linux inode */
+	/* inode MUST be last */
 } vnode_t;
 
 #define v_fbhv			v_bh.bh_first	       /* first behavior */
-- 
cgit v1.2.2


From 5fcbab355e1528545671a5221666ef640b8250c8 Mon Sep 17 00:00:00 2001
From: Dean Roehrich <roehrich@sgi.com>
Date: Thu, 5 May 2005 13:27:19 -0700
Subject: [XFS] Add ATTR_NOLOCK for xfs_setattr to indicate that XFS_IOLOCK is
 held

SGI Modid: xfs-linux:xfs-kern:190711a

Signed-off-by: Dean Roehrich <roehrich@sgi.com>
Signed-off-by: Christoph Hellwig <hch@sgi.com>
.
---
 fs/xfs/linux-2.6/xfs_vnode.h |  1 +
 fs/xfs/xfs_vnodeops.c        | 11 +++++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 3b40c929544e..4ee229301241 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -626,6 +626,7 @@ static inline int VN_BAD(struct vnode *vp)
 #define	ATTR_DMI	0x08	/* invocation from a DMI function */
 #define	ATTR_LAZY	0x80	/* set/get attributes lazily */
 #define	ATTR_NONBLOCK	0x100	/* return EAGAIN if operation would block */
+#define ATTR_NOLOCK	0x200	/* Don't grab any conflicting locks */
 
 /*
  * Flags to VOP_FSYNC and VOP_RECLAIM.
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 70092963ca9e..25a526629b12 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -305,7 +305,7 @@ xfs_setattr(
 	int			mandlock_before, mandlock_after;
 	struct xfs_dquot	*udqp, *gdqp, *olddquot1, *olddquot2;
 	int			file_owner;
-	int			need_iolock = (flags & ATTR_DMI) == 0;
+	int			need_iolock = 1;
 
 	vp = BHV_TO_VNODE(bdp);
 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
@@ -384,6 +384,9 @@ xfs_setattr(
 	 */
 	tp = NULL;
 	lock_flags = XFS_ILOCK_EXCL;
+	ASSERT(flags & ATTR_NOLOCK ? flags & ATTR_DMI : 1);
+	if (flags & ATTR_NOLOCK)
+		need_iolock = 0;
 	if (!(mask & XFS_AT_SIZE)) {
 		if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) ||
 		    (mp->m_flags & XFS_MOUNT_WSYNC)) {
@@ -4320,7 +4323,7 @@ xfs_free_file_space(
 	int			rt;
 	xfs_fileoff_t		startoffset_fsb;
 	xfs_trans_t		*tp;
-	int			need_iolock = (attr_flags & ATTR_DMI) == 0;
+	int			need_iolock = 1;
 
 	vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
 	mp = ip->i_mount;
@@ -4348,8 +4351,12 @@ xfs_free_file_space(
 			return(error);
 	}
 
+	ASSERT(attr_flags & ATTR_NOLOCK ? attr_flags & ATTR_DMI : 1);
+	if (attr_flags & ATTR_NOLOCK)
+		need_iolock = 0;
 	if (need_iolock)
 		xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
 	rounding = MAX((__uint8_t)(1 << mp->m_sb.sb_blocklog),
 			(__uint8_t)NBPP);
 	ilen = len + (offset & (rounding - 1));
-- 
cgit v1.2.2


From 1f443ad70d3afa6bc74019ade2d664eadd7d505a Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Thu, 5 May 2005 13:28:29 -0700
Subject: [XFS] Allow initial XFS delayed allocation size to be increased
 beyond 64KB.

SGI Modid: xfs-linux:xfs-kern:22261a

Signed-off-by: Nathan Scott <nathans@sgi.com>
Signed-off-by: Christoph Hellwig <hch@sgi.com>
---
 fs/xfs/xfs_mount.h  |  6 ++---
 fs/xfs/xfs_vfsops.c | 67 +++++++++++++++++++++++++++++++++++------------------
 2 files changed, 48 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 5fc6201dd8e2..1b968471ec8b 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -428,10 +428,10 @@ typedef struct xfs_mount {
 #define XFS_WRITEIO_LOG_LARGE	16
 
 /*
- * Max and min values for UIO and mount-option defined I/O sizes;
- * min value can't be less than a page.  Currently unused.
+ * Max and min values for mount-option defined I/O
+ * preallocation sizes.
  */
-#define XFS_MAX_IO_LOG		16	/* 64K */
+#define XFS_MAX_IO_LOG		30	/* 1G */
 #define XFS_MIN_IO_LOG		PAGE_SHIFT
 
 /*
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 00aae9c6a904..b53736650100 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -1649,6 +1649,7 @@ xfs_vget(
 #define MNTOPT_SWIDTH	"swidth"	/* data volume stripe width */
 #define MNTOPT_NOUUID	"nouuid"	/* ignore filesystem UUID */
 #define MNTOPT_MTPT	"mtpt"		/* filesystem mount point */
+#define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
 #define MNTOPT_IHASHSIZE    "ihashsize"    /* size of inode hash table */
 #define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
 #define MNTOPT_NOLOGFLUSH   "nologflush"   /* don't hard flush on log writes */
@@ -1657,6 +1658,28 @@ xfs_vget(
 #define MNTOPT_IKEEP	"ikeep"		/* do not free empty inode clusters */
 #define MNTOPT_NOIKEEP	"noikeep"	/* free empty inode clusters */
 
+STATIC unsigned long
+suffix_strtoul(const char *cp, char **endp, unsigned int base)
+{
+	int	last, shift_left_factor = 0;
+	char	*value = (char *)cp;
+
+	last = strlen(value) - 1;
+	if (value[last] == 'K' || value[last] == 'k') {
+		shift_left_factor = 10;
+		value[last] = '\0';
+	}
+	if (value[last] == 'M' || value[last] == 'm') {
+		shift_left_factor = 20;
+		value[last] = '\0';
+	}
+	if (value[last] == 'G' || value[last] == 'g') {
+		shift_left_factor = 30;
+		value[last] = '\0';
+	}
+
+	return simple_strtoul(cp, endp, base) << shift_left_factor;
+}
 
 int
 xfs_parseargs(
@@ -1688,60 +1711,60 @@ xfs_parseargs(
 		if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
 			if (!value || !*value) {
 				printk("XFS: %s option requires an argument\n",
-					MNTOPT_LOGBUFS);
+					this_char);
 				return EINVAL;
 			}
 			args->logbufs = simple_strtoul(value, &eov, 10);
 		} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
-			int	last, in_kilobytes = 0;
-
 			if (!value || !*value) {
 				printk("XFS: %s option requires an argument\n",
-					MNTOPT_LOGBSIZE);
+					this_char);
 				return EINVAL;
 			}
-			last = strlen(value) - 1;
-			if (value[last] == 'K' || value[last] == 'k') {
-				in_kilobytes = 1;
-				value[last] = '\0';
-			}
-			args->logbufsize = simple_strtoul(value, &eov, 10);
-			if (in_kilobytes)
-				args->logbufsize <<= 10;
+			args->logbufsize = suffix_strtoul(value, &eov, 10);
 		} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
 			if (!value || !*value) {
 				printk("XFS: %s option requires an argument\n",
-					MNTOPT_LOGDEV);
+					this_char);
 				return EINVAL;
 			}
 			strncpy(args->logname, value, MAXNAMELEN);
 		} else if (!strcmp(this_char, MNTOPT_MTPT)) {
 			if (!value || !*value) {
 				printk("XFS: %s option requires an argument\n",
-					MNTOPT_MTPT);
+					this_char);
 				return EINVAL;
 			}
 			strncpy(args->mtpt, value, MAXNAMELEN);
 		} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
 			if (!value || !*value) {
 				printk("XFS: %s option requires an argument\n",
-					MNTOPT_RTDEV);
+					this_char);
 				return EINVAL;
 			}
 			strncpy(args->rtname, value, MAXNAMELEN);
 		} else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
 			if (!value || !*value) {
 				printk("XFS: %s option requires an argument\n",
-					MNTOPT_BIOSIZE); 
+					this_char);
 				return EINVAL;
 			}
 			iosize = simple_strtoul(value, &eov, 10);
 			args->flags |= XFSMNT_IOSIZE;
 			args->iosizelog = (uint8_t) iosize;
+		} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
+			if (!value || !*value) {
+				printk("XFS: %s option requires an argument\n",
+					this_char);
+				return EINVAL;
+			}
+			iosize = suffix_strtoul(value, &eov, 10);
+			args->flags |= XFSMNT_IOSIZE;
+			args->iosizelog = ffs(iosize) - 1;
 		} else if (!strcmp(this_char, MNTOPT_IHASHSIZE)) {
 			if (!value || !*value) {
 				printk("XFS: %s option requires an argument\n",
-					this_char); 
+					this_char);
 				return EINVAL;
 			}
 			args->flags |= XFSMNT_IHASHSIZE;
@@ -1756,7 +1779,7 @@ xfs_parseargs(
 			args->flags |= XFSMNT_INO64;
 #if !XFS_BIG_INUMS
 			printk("XFS: %s option not allowed on this system\n",
-				MNTOPT_INO64);
+				this_char);
 			return EINVAL;
 #endif
 		} else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
@@ -1766,14 +1789,14 @@ xfs_parseargs(
 		} else if (!strcmp(this_char, MNTOPT_SUNIT)) {
 			if (!value || !*value) {
 				printk("XFS: %s option requires an argument\n",
-					MNTOPT_SUNIT);
+					this_char);
 				return EINVAL;
 			}
 			dsunit = simple_strtoul(value, &eov, 10);
 		} else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
 			if (!value || !*value) {
 				printk("XFS: %s option requires an argument\n",
-					MNTOPT_SWIDTH);
+					this_char);
 				return EINVAL;
 			}
 			dswidth = simple_strtoul(value, &eov, 10);
@@ -1781,7 +1804,7 @@ xfs_parseargs(
 			args->flags &= ~XFSMNT_32BITINODES;
 #if !XFS_BIG_INUMS
 			printk("XFS: %s option not allowed on this system\n",
-				MNTOPT_64BITINODE);
+				this_char);
 			return EINVAL;
 #endif
 		} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
@@ -1877,7 +1900,7 @@ xfs_showargs(
 		seq_printf(m, "," MNTOPT_IHASHSIZE "=%d", mp->m_ihsize);
 
 	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
-		seq_printf(m, "," MNTOPT_BIOSIZE "=%d", mp->m_writeio_log);
+		seq_printf(m, "," MNTOPT_ALLOCSIZE "=%d", 1<<mp->m_writeio_log);
 
 	if (mp->m_logbufs > 0)
 		seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
-- 
cgit v1.2.2


From 764433b7f1cf5d7b5e27ceb4f5546042e1f4acc7 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sgi.com>
Date: Thu, 5 May 2005 13:29:17 -0700
Subject: [XFS] Fix up warnings

SGI Modid: xfs-linux:xfs-kern:191411a

Signed-off-by: Eric Sandeen <sandeen@sgi.com>
Signed-off-by: Christoph Hellwig <hch@sgi.com>
---
 fs/xfs/linux-2.6/xfs_vnode.c | 4 ++--
 fs/xfs/linux-2.6/xfs_vnode.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 849c61c74f3c..4b1b1bcdb6b3 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -424,13 +424,13 @@ vn_remove(
  * Vnode tracing code.
  */
 void
-vn_trace_entry(vnode_t *vp, char *func, inst_t *ra)
+vn_trace_entry(vnode_t *vp, const char *func, inst_t *ra)
 {
 	KTRACE_ENTER(vp, VNODE_KTRACE_ENTRY, func, 0, ra);
 }
 
 void
-vn_trace_exit(vnode_t *vp, char *func, inst_t *ra)
+vn_trace_exit(vnode_t *vp, const char *func, inst_t *ra)
 {
 	KTRACE_ENTER(vp, VNODE_KTRACE_EXIT, func, 0, ra);
 }
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 4ee229301241..00466c3194ac 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -648,8 +648,8 @@ static inline int VN_BAD(struct vnode *vp)
 #define	VNODE_KTRACE_REF	4
 #define	VNODE_KTRACE_RELE	5
 
-extern void vn_trace_entry(struct vnode *, char *, inst_t *);
-extern void vn_trace_exit(struct vnode *, char *, inst_t *);
+extern void vn_trace_entry(struct vnode *, const char *, inst_t *);
+extern void vn_trace_exit(struct vnode *, const char *, inst_t *);
 extern void vn_trace_hold(struct vnode *, char *, int, inst_t *);
 extern void vn_trace_ref(struct vnode *, char *, int, inst_t *);
 extern void vn_trace_rele(struct vnode *, char *, int, inst_t *);
-- 
cgit v1.2.2


From abd0cf7aeaaf8a15d9777e65606b6076868b6186 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Thu, 5 May 2005 13:30:13 -0700
Subject: [XFS] Resolve an issue with xfsbufd not getting along with swsusp.

SGI Modid: xfs-linux:xfs-kern:22342a

Signed-off-by: Nathan Scott <nathans@sgi.com>
Signed-off-by: Christoph Hellwig <hch@sgi.com>
---
 fs/xfs/linux-2.6/xfs_buf.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 23e0eb67fc25..997963e53622 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1746,13 +1746,15 @@ STATIC DECLARE_COMPLETION(pagebuf_daemon_done);
 STATIC struct task_struct *pagebuf_daemon_task;
 STATIC int pagebuf_daemon_active;
 STATIC int force_flush;
-
+STATIC int force_sleep;
 
 STATIC int
 pagebuf_daemon_wakeup(
 	int			priority,
 	unsigned int		mask)
 {
+	if (force_sleep)
+		return 0;
 	force_flush = 1;
 	barrier();
 	wake_up_process(pagebuf_daemon_task);
@@ -1778,7 +1780,12 @@ pagebuf_daemon(
 
 	INIT_LIST_HEAD(&tmp);
 	do {
-		try_to_freeze(PF_FREEZE);
+		if (unlikely(current->flags & PF_FREEZE)) {
+			force_sleep = 1;
+			refrigerator(PF_FREEZE);
+		} else {
+			force_sleep = 0;
+		}
 
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100);
-- 
cgit v1.2.2


From 3f243766660da3df4a75ea1892c310d6080646c8 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Thu, 5 May 2005 13:30:34 -0700
Subject: [XFS] Disable the combination of XFS direct IO and AIO until the IO
 completion handling for unwritten extents can be moved out of interrupt
 context.

SGI Modid: xfs-linux:xfs-kern:22343a

Signed-off-by: Nathan Scott <nathans@sgi.com>
Signed-off-by: Christoph Hellwig <hch@sgi.com>
---
 fs/xfs/linux-2.6/xfs_file.c | 43 +++++++++++++++++++++++++++++++++++++++++--
 fs/xfs/linux-2.6/xfs_lrw.c  |  3 +++
 2 files changed, 44 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 9f057a4a5b06..d0d412afd261 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -515,10 +515,49 @@ open_exec_out:
 }
 #endif /* HAVE_FOP_OPEN_EXEC */
 
+/*
+ * Temporary workaround to the AIO direct IO write problem.
+ * This code can go and we can revert to do_sync_write once
+ * the writepage(s) rework is merged.
+ */
+STATIC ssize_t
+linvfs_write(
+	struct file	*filp,
+	const char	__user *buf,
+	size_t		len,
+	loff_t		*ppos)
+{
+	struct kiocb	kiocb;
+	ssize_t		ret;
+
+	init_sync_kiocb(&kiocb, filp);
+	kiocb.ki_pos = *ppos;
+	ret = __linvfs_write(&kiocb, buf, 0, len, kiocb.ki_pos);
+	*ppos = kiocb.ki_pos;
+	return ret;
+}
+STATIC ssize_t
+linvfs_write_invis(
+	struct file	*filp,
+	const char	__user *buf,
+	size_t		len,
+	loff_t		*ppos)
+{
+	struct kiocb	kiocb;
+	ssize_t		ret;
+
+	init_sync_kiocb(&kiocb, filp);
+	kiocb.ki_pos = *ppos;
+	ret = __linvfs_write(&kiocb, buf, IO_INVIS, len, kiocb.ki_pos);
+	*ppos = kiocb.ki_pos;
+	return ret;
+}
+
+
 struct file_operations linvfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
-	.write		= do_sync_write,
+	.write		= linvfs_write,
 	.readv		= linvfs_readv,
 	.writev		= linvfs_writev,
 	.aio_read	= linvfs_aio_read,
@@ -540,7 +579,7 @@ struct file_operations linvfs_file_operations = {
 struct file_operations linvfs_invis_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
-	.write		= do_sync_write,
+	.write		= linvfs_write_invis,
 	.readv		= linvfs_readv_invis,
 	.writev		= linvfs_writev_invis,
 	.aio_read	= linvfs_aio_read_invis,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index ff145fd0d1a4..aa9daaea6c34 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -683,6 +683,9 @@ xfs_write(
 			(xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
 				mp->m_rtdev_targp : mp->m_ddev_targp;
 
+		if (ioflags & IO_ISAIO)
+			return XFS_ERROR(-ENOSYS);
+
 		if ((pos & target->pbr_smask) || (count & target->pbr_smask))
 			return XFS_ERROR(-EINVAL);
 
-- 
cgit v1.2.2


From 3ba0815a4b0709ef5e7b481067573d10806bbe2c Mon Sep 17 00:00:00 2001
From: Daniel Moore <dxm@sgi.com>
Date: Thu, 5 May 2005 13:31:34 -0700
Subject: [XFS] stop background sync from waiting for in-use inodes

SGI Modid: xfs-linux:xfs-kern:191586a

Signed-off-by: Daniel Moore <dxm@sgi.com>
Signed-off-by: Christoph Hellwig <hch@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 76a84758073a..1e71a9633d8f 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -725,9 +725,12 @@ xfs_page_state_convert(
 	__uint64_t              end_offset;
 	pgoff_t                 end_index, last_index, tlast;
 	int			len, err, i, cnt = 0, uptodate = 1;
-	int			flags = startio ? 0 : BMAPI_TRYLOCK;
+	int			flags;
 	int			page_dirty, delalloc = 0;
 
+	/* wait for other IO threads? */
+	flags = (startio && wbc->sync_mode != WB_SYNC_NONE) ? 0 : BMAPI_TRYLOCK;
+
 	/* Is this page beyond the end of the file? */
 	offset = i_size_read(inode);
 	end_index = offset >> PAGE_CACHE_SHIFT;
-- 
cgit v1.2.2


From 18e0a926ad7c5ab4ef05334230671c6975dac26b Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sgi.com>
Date: Thu, 5 May 2005 13:32:18 -0700
Subject: [XFS] remove noisy printk at vnode trace allocation

SGI Modid: xfs-linux:xfs-kern:191625a

Signed-off-by: Eric Sandeen <sandeen@sgi.com>
Signed-off-by: Christoph Hellwig <hch@sgi.com>
---
 fs/xfs/linux-2.6/xfs_vnode.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 4b1b1bcdb6b3..a832d165f24f 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -156,7 +156,6 @@ vn_initialize(
 
 #ifdef	XFS_VNODE_TRACE
 	vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
-	printk("Allocated VNODE_TRACE at 0x%p\n", vp->v_trace);
 #endif	/* XFS_VNODE_TRACE */
 
 	vn_trace_exit(vp, "vn_initialize", (inst_t *)__return_address);
-- 
cgit v1.2.2


From 775bf6c99a4ebde13bdb8dfa528ed241483b49ef Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Thu, 5 May 2005 13:33:01 -0700
Subject: [XFS] Do not do delalloc conversion on pages beyond EOF ever, not
 just sometimes

SGI Modid: xfs-linux:xfs-kern:22376a

Signed-off-by: Nathan Scott <nathans@sgi.com>
Signed-off-by: Christoph Hellwig <hch@sgi.com>
---
 fs/xfs/linux-2.6/xfs_aops.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 1e71a9633d8f..1edd3b694332 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -726,7 +726,7 @@ xfs_page_state_convert(
 	pgoff_t                 end_index, last_index, tlast;
 	int			len, err, i, cnt = 0, uptodate = 1;
 	int			flags;
-	int			page_dirty, delalloc = 0;
+	int			page_dirty;
 
 	/* wait for other IO threads? */
 	flags = (startio && wbc->sync_mode != WB_SYNC_NONE) ? 0 : BMAPI_TRYLOCK;
@@ -807,7 +807,6 @@ xfs_page_state_convert(
 		 */
 		} else if (buffer_delay(bh)) {
 			if (!iomp) {
-				delalloc = 1;
 				err = xfs_map_blocks(inode, offset, len, &iomap,
 						BMAPI_ALLOCATE | flags);
 				if (err) {
@@ -882,10 +881,9 @@ xfs_page_state_convert(
 		xfs_submit_page(page, wbc, bh_arr, cnt, 0, 1);
 
 	if (iomp) {
-		tlast = (iomp->iomap_offset + iomp->iomap_bsize - 1) >>
+		offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >>
 					PAGE_CACHE_SHIFT;
-		if (delalloc && (tlast > last_index))
-			tlast = last_index;
+		tlast = min_t(pgoff_t, offset, last_index);
 		xfs_cluster_write(inode, page->index + 1, iomp, wbc,
 					startio, unmapped, tlast);
 	}
-- 
cgit v1.2.2


From 24e17b5fb99d4d1b47fe0847a3a801e36d431ff6 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Thu, 5 May 2005 13:33:20 -0700
Subject: [XFS] Use the right offset when ensuring a delayed allocate
 conversion has covered the offset originally requested.  Can cause data
 corruption when multiple processes are performing writeout on different areas
 of the same file.  Quite difficult to hit though.

SGI Modid: xfs-linux:xfs-kern:22377a

Signed-off-by: Nathan Scott <nathans@sgi.com>
Signed-off-by: Christoph Hellwig <hch@sgi.com>
.
---
 fs/xfs/linux-2.6/xfs_aops.c | 68 ++++++++++++++++++++++++++-------------------
 fs/xfs/xfs_iomap.c          | 20 +++++++------
 fs/xfs/xfs_iomap.h          |  7 ++---
 fs/xfs/xfs_mount.h          |  7 +++--
 4 files changed, 56 insertions(+), 46 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 1edd3b694332..9278e9aba9ba 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -558,7 +558,8 @@ xfs_submit_page(
 	int			i;
 
 	BUG_ON(PageWriteback(page));
-	set_page_writeback(page);
+	if (bh_count)
+		set_page_writeback(page);
 	if (clear_dirty)
 		clear_page_dirty(page);
 	unlock_page(page);
@@ -578,9 +579,6 @@ xfs_submit_page(
 
 		if (probed_page && clear_dirty)
 			wbc->nr_to_write--;	/* Wrote an "extra" page */
-	} else {
-		end_page_writeback(page);
-		wbc->pages_skipped++;	/* We didn't write this page */
 	}
 }
 
@@ -602,21 +600,26 @@ xfs_convert_page(
 {
 	struct buffer_head	*bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
 	xfs_iomap_t		*mp = iomapp, *tmp;
-	unsigned long		end, offset;
-	pgoff_t			end_index;
-	int			i = 0, index = 0;
+	unsigned long		offset, end_offset;
+	int			index = 0;
 	int			bbits = inode->i_blkbits;
+	int			len, page_dirty;
 
-	end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
-	if (page->index < end_index) {
-		end = PAGE_CACHE_SIZE;
-	} else {
-		end = i_size_read(inode) & (PAGE_CACHE_SIZE-1);
-	}
+	end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1));
+
+	/*
+	 * page_dirty is initially a count of buffers on the page before
+	 * EOF and is decrememted as we move each into a cleanable state.
+	 */
+	len = 1 << inode->i_blkbits;
+	end_offset = max(end_offset, PAGE_CACHE_SIZE);
+	end_offset = roundup(end_offset, len);
+	page_dirty = end_offset / len;
+
+	offset = 0;
 	bh = head = page_buffers(page);
 	do {
-		offset = i << bbits;
-		if (offset >= end)
+		if (offset >= end_offset)
 			break;
 		if (!(PageUptodate(page) || buffer_uptodate(bh)))
 			continue;
@@ -625,6 +628,7 @@ xfs_convert_page(
 			if (startio) {
 				lock_buffer(bh);
 				bh_arr[index++] = bh;
+				page_dirty--;
 			}
 			continue;
 		}
@@ -657,10 +661,11 @@ xfs_convert_page(
 			unlock_buffer(bh);
 			mark_buffer_dirty(bh);
 		}
-	} while (i++, (bh = bh->b_this_page) != head);
+		page_dirty--;
+	} while (offset += len, (bh = bh->b_this_page) != head);
 
-	if (startio) {
-		xfs_submit_page(page, wbc, bh_arr, index, 1, index == i);
+	if (startio && index) {
+		xfs_submit_page(page, wbc, bh_arr, index, 1, !page_dirty);
 	} else {
 		unlock_page(page);
 	}
@@ -743,19 +748,22 @@ xfs_page_state_convert(
 		}
 	}
 
-	offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
 	end_offset = min_t(unsigned long long,
-			offset + PAGE_CACHE_SIZE, i_size_read(inode));
-
-	bh = head = page_buffers(page);
-	iomp = NULL;
+			(loff_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
+	offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
 
 	/*
-	 * page_dirty is initially a count of buffers on the page and
-	 * is decrememted as we move each into a cleanable state.
+	 * page_dirty is initially a count of buffers on the page before
+	 * EOF and is decrememted as we move each into a cleanable state.
 	 */
-	len = bh->b_size;
-	page_dirty = PAGE_CACHE_SIZE / len;
+	len = 1 << inode->i_blkbits;
+	p_offset = max(p_offset, PAGE_CACHE_SIZE);
+	p_offset = roundup(p_offset, len);
+	page_dirty = p_offset / len;
+
+	iomp = NULL;
+	p_offset = 0;
+	bh = head = page_buffers(page);
 
 	do {
 		if (offset >= end_offset)
@@ -877,8 +885,10 @@ xfs_page_state_convert(
 	if (uptodate && bh == head)
 		SetPageUptodate(page);
 
-	if (startio)
-		xfs_submit_page(page, wbc, bh_arr, cnt, 0, 1);
+	if (startio) {
+		WARN_ON(page_dirty);
+		xfs_submit_page(page, wbc, bh_arr, cnt, 0, !page_dirty);
+	}
 
 	if (iomp) {
 		offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >>
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 3826e8f0e28a..b291a2b53579 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -308,7 +308,8 @@ phase2:
 			break;
 		}
 
-		error = XFS_IOMAP_WRITE_ALLOCATE(mp, io, &imap, &nimaps);
+		error = XFS_IOMAP_WRITE_ALLOCATE(mp, io, offset, count,
+						 &imap, &nimaps);
 		break;
 	case BMAPI_UNWRITTEN:
 		lockmode = 0;
@@ -746,6 +747,8 @@ write_map:
 int
 xfs_iomap_write_allocate(
 	xfs_inode_t	*ip,
+	loff_t		offset,
+	size_t		count,
 	xfs_bmbt_irec_t *map,
 	int		*retmap)
 {
@@ -770,9 +773,9 @@ xfs_iomap_write_allocate(
 	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
 		return XFS_ERROR(error);
 
-	offset_fsb = map->br_startoff;
+	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 	count_fsb = map->br_blockcount;
-	map_start_fsb = offset_fsb;
+	map_start_fsb = map->br_startoff;
 
 	XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
 
@@ -868,9 +871,9 @@ xfs_iomap_write_allocate(
 					imap[i].br_startoff,
 				        imap[i].br_blockcount,imap[i].br_state);
                         }
-			if ((map->br_startoff >= imap[i].br_startoff) &&
-			    (map->br_startoff < (imap[i].br_startoff +
-						 imap[i].br_blockcount))) {
+			if ((offset_fsb >= imap[i].br_startoff) &&
+			    (offset_fsb < (imap[i].br_startoff +
+					   imap[i].br_blockcount))) {
 				*map = imap[i];
 				*retmap = 1;
 				XFS_STATS_INC(xs_xstrat_quick);
@@ -883,9 +886,8 @@ xfs_iomap_write_allocate(
 		 * file, just surrounding data, try again.
 		 */
 		nimaps--;
-		offset_fsb = imap[nimaps].br_startoff +
-			     imap[nimaps].br_blockcount;
-		map_start_fsb = offset_fsb;
+		map_start_fsb = imap[nimaps].br_startoff +
+				imap[nimaps].br_blockcount;
 	}
 
 trans_cancel:
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 31c91087cb33..287895a3adc1 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -29,9 +29,6 @@
  *
  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
  */
-
-
-
 #ifndef __XFS_IOMAP_H__
 #define __XFS_IOMAP_H__
 
@@ -56,7 +53,7 @@ typedef enum {
 	BMAPI_UNWRITTEN  = (1 << 3),	/* unwritten extents to real extents */
 	/* modifiers */
 	BMAPI_IGNSTATE = (1 << 4),	/* ignore unwritten state on read */
-	BMAPI_DIRECT = (1 << 5),		/* direct instead of buffered write */
+	BMAPI_DIRECT = (1 << 5),	/* direct instead of buffered write */
 	BMAPI_MMAP = (1 << 6),		/* allocate for mmap write */
 	BMAPI_SYNC = (1 << 7),		/* sync write to flush delalloc space */
 	BMAPI_TRYLOCK = (1 << 8),	/* non-blocking request */
@@ -100,7 +97,7 @@ extern int xfs_iomap_write_direct(struct xfs_inode *, loff_t, size_t,
 				  int, struct xfs_bmbt_irec *, int *, int);
 extern int xfs_iomap_write_delay(struct xfs_inode *, loff_t, size_t, int,
 				 struct xfs_bmbt_irec *, int *);
-extern int xfs_iomap_write_allocate(struct xfs_inode *,
+extern int xfs_iomap_write_allocate(struct xfs_inode *, loff_t, size_t,
 				struct xfs_bmbt_irec *, int *);
 extern int xfs_iomap_write_unwritten(struct xfs_inode *, loff_t, size_t);
 
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1b968471ec8b..8ffb65dc110d 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -216,7 +216,8 @@ typedef int		(*xfs_iomap_write_delay_t)(
 				void *, loff_t, size_t, int,
 				struct xfs_bmbt_irec *, int *);
 typedef int		(*xfs_iomap_write_allocate_t)(
-				void *, struct xfs_bmbt_irec *, int *);
+				void *, loff_t, size_t,
+				struct xfs_bmbt_irec *, int *);
 typedef int		(*xfs_iomap_write_unwritten_t)(
 				void *, loff_t, size_t);
 typedef uint		(*xfs_lck_map_shared_t)(void *);
@@ -258,9 +259,9 @@ typedef struct xfs_ioops {
 #define XFS_IOMAP_WRITE_DELAY(mp, io, offset, count, flags, mval, nmap) \
 	(*(mp)->m_io_ops.xfs_iomap_write_delay) \
 		((io)->io_obj, offset, count, flags, mval, nmap)
-#define XFS_IOMAP_WRITE_ALLOCATE(mp, io, mval, nmap) \
+#define XFS_IOMAP_WRITE_ALLOCATE(mp, io, offset, count, mval, nmap) \
 	(*(mp)->m_io_ops.xfs_iomap_write_allocate) \
-		((io)->io_obj, mval, nmap)
+		((io)->io_obj, offset, count, mval, nmap)
 #define XFS_IOMAP_WRITE_UNWRITTEN(mp, io, offset, count) \
 	(*(mp)->m_io_ops.xfs_iomap_write_unwritten) \
 		((io)->io_obj, offset, count)
-- 
cgit v1.2.2


From f403b7f452e4347f6af14c1f3c47bce758eb6337 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Thu, 5 May 2005 13:33:40 -0700
Subject: [XFS] Cleanup use of loff_t vs xfs_off_t in the core code.

SGI Modid: xfs-linux:xfs-kern:22378a

Signed-off-by: Nathan Scott <nathans@sgi.com>
Signed-off-by: Christoph Hellwig <hch@sgi.com>
---
 fs/xfs/xfs_dfrag.c |  2 +-
 fs/xfs/xfs_iomap.c |  8 ++++----
 fs/xfs/xfs_iomap.h | 20 ++++++++++----------
 fs/xfs/xfs_mount.h |  8 ++++----
 4 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 08d551a17347..63abdc2ac7f4 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -182,7 +182,7 @@ xfs_swapext(
 
 	if (VN_CACHED(tvp) != 0)
 		xfs_inval_cached_pages(XFS_ITOV(tip), &(tip->i_iocore),
-						(loff_t)0, 0, 0);
+						(xfs_off_t)0, 0, 0);
 
 	/* Verify O_DIRECT for ftmp */
 	if (VN_CACHED(tvp) != 0) {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index b291a2b53579..991f8a61f7c4 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -366,7 +366,7 @@ xfs_flush_space(
 int
 xfs_iomap_write_direct(
 	xfs_inode_t	*ip,
-	loff_t		offset,
+	xfs_off_t	offset,
 	size_t		count,
 	int		flags,
 	xfs_bmbt_irec_t *ret_imap,
@@ -542,7 +542,7 @@ error_out:
 int
 xfs_iomap_write_delay(
 	xfs_inode_t	*ip,
-	loff_t		offset,
+	xfs_off_t	offset,
 	size_t		count,
 	int		ioflag,
 	xfs_bmbt_irec_t *ret_imap,
@@ -747,7 +747,7 @@ write_map:
 int
 xfs_iomap_write_allocate(
 	xfs_inode_t	*ip,
-	loff_t		offset,
+	xfs_off_t	offset,
 	size_t		count,
 	xfs_bmbt_irec_t *map,
 	int		*retmap)
@@ -901,7 +901,7 @@ error0:
 int
 xfs_iomap_write_unwritten(
 	xfs_inode_t	*ip,
-	loff_t		offset,
+	xfs_off_t	offset,
 	size_t		count)
 {
 	xfs_mount_t	*mp = ip->i_mount;
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 287895a3adc1..4daaa5212102 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003,2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -64,13 +64,13 @@ typedef enum {
 /*
  * xfs_iomap_t:  File system I/O map
  *
- * The iomap_bn field is expressed in 512-byte blocks, and is where the 
+ * The iomap_bn field is expressed in 512-byte blocks, and is where the
  * mapping starts on disk.
  *
  * The iomap_offset, iomap_bsize and iomap_delta fields are in bytes.
  * iomap_offset is the offset of the mapping in the file itself.
- * iomap_bsize is the size of the mapping,  iomap_delta is the 
- * desired data's offset into the mapping, given the offset supplied 
+ * iomap_bsize is the size of the mapping,  iomap_delta is the
+ * desired data's offset into the mapping, given the offset supplied
  * to the file I/O map routine.
  *
  * When a request is made to read beyond the logical end of the object,
@@ -81,8 +81,8 @@ typedef enum {
 typedef struct xfs_iomap {
 	xfs_daddr_t		iomap_bn;	/* first 512b blk of mapping */
 	xfs_buftarg_t		*iomap_target;
-	loff_t			iomap_offset;	/* offset of mapping, bytes */
-	loff_t			iomap_bsize;	/* size of mapping, bytes */
+	xfs_off_t		iomap_offset;	/* offset of mapping, bytes */
+	xfs_off_t		iomap_bsize;	/* size of mapping, bytes */
 	size_t			iomap_delta;	/* offset into mapping, bytes */
 	iomap_flags_t		iomap_flags;
 } xfs_iomap_t;
@@ -93,12 +93,12 @@ struct xfs_bmbt_irec;
 
 extern int xfs_iomap(struct xfs_iocore *, xfs_off_t, ssize_t, int,
 		     struct xfs_iomap *, int *);
-extern int xfs_iomap_write_direct(struct xfs_inode *, loff_t, size_t,
+extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
 				  int, struct xfs_bmbt_irec *, int *, int);
-extern int xfs_iomap_write_delay(struct xfs_inode *, loff_t, size_t, int,
+extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int,
 				 struct xfs_bmbt_irec *, int *);
-extern int xfs_iomap_write_allocate(struct xfs_inode *, loff_t, size_t,
+extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
 				struct xfs_bmbt_irec *, int *);
-extern int xfs_iomap_write_unwritten(struct xfs_inode *, loff_t, size_t);
+extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t);
 
 #endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 8ffb65dc110d..30dd08fb9f57 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -210,16 +210,16 @@ typedef int		(*xfs_bmapi_t)(struct xfs_trans *, void *,
 				struct xfs_bmap_free *);
 typedef int		(*xfs_bmap_eof_t)(void *, xfs_fileoff_t, int, int *);
 typedef int		(*xfs_iomap_write_direct_t)(
-				void *, loff_t, size_t, int,
+				void *, xfs_off_t, size_t, int,
 				struct xfs_bmbt_irec *, int *, int);
 typedef int		(*xfs_iomap_write_delay_t)(
-				void *, loff_t, size_t, int,
+				void *, xfs_off_t, size_t, int,
 				struct xfs_bmbt_irec *, int *);
 typedef int		(*xfs_iomap_write_allocate_t)(
-				void *, loff_t, size_t,
+				void *, xfs_off_t, size_t,
 				struct xfs_bmbt_irec *, int *);
 typedef int		(*xfs_iomap_write_unwritten_t)(
-				void *, loff_t, size_t);
+				void *, xfs_off_t, size_t);
 typedef uint		(*xfs_lck_map_shared_t)(void *);
 typedef void		(*xfs_lock_t)(void *, uint);
 typedef void		(*xfs_lock_demote_t)(void *, uint);
-- 
cgit v1.2.2


From e422fd2c965ad1b0e4eadaabd0adb77e8a93e74e Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <andrea@suse.de>
Date: Thu, 5 May 2005 16:15:04 -0700
Subject: [PATCH] avoid -ENOMEM due reclaimable slab caches

This makes sure that reclaimable buffer headers and reclaimable inodes
are accounted properly during the overcommit checks.

Signed-off-by: Andrea Arcangeli <andrea@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 2 +-
 fs/inode.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 5f525b3c6d9f..6ed59497fd4d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3115,7 +3115,7 @@ void __init buffer_init(void)
 
 	bh_cachep = kmem_cache_create("buffer_head",
 			sizeof(struct buffer_head), 0,
-			SLAB_PANIC, init_buffer_head, NULL);
+			SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_buffer_head, NULL);
 
 	/*
 	 * Limit the bh occupancy to 10% of ZONE_NORMAL
diff --git a/fs/inode.c b/fs/inode.c
index af8fd78d2099..90a110feb713 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1336,7 +1336,7 @@ void __init inode_init(unsigned long mempages)
 
 	/* inode slab cache */
 	inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
-				0, SLAB_PANIC, init_once, NULL);
+				0, SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_once, NULL);
 	set_shrinker(DEFAULT_SEEKS, shrink_icache_memory);
 
 	/* Hash may have been set up in inode_init_early */
-- 
cgit v1.2.2


From 51a141104a37369be2822f423ed4444aa34d26a2 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Thu, 5 May 2005 16:15:34 -0700
Subject: [PATCH] uml: hostfs failed mount handling

This cleans up the error handling and fixes a crash if a hostfs mount fails.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hostfs/hostfs_kern.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index e6c63d9cac7b..14a0d339d036 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -991,13 +991,17 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 		goto out_put;
 
 	err = read_inode(root_inode);
-	if(err)
-		goto out_put;
+	if(err){
+                /* No iput in this case because the dput does that for us */
+                dput(sb->s_root);
+                sb->s_root = NULL;
+		goto out_free;
+        }
 
 	return(0);
 
  out_put:
-	iput(root_inode);
+        iput(root_inode);
  out_free:
 	kfree(name);
  out:
-- 
cgit v1.2.2


From f3ddbdc6267c32223035ea9bb8456a2d86f65ba1 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Thu, 5 May 2005 16:15:45 -0700
Subject: [PATCH] fix race in __block_prepare_write

Fix a race where __block_prepare_write can leak out an in-flight read
against a bh if get_block returns an error.  This can lead to the page
becoming unlocked while the buffer is locked and the read still in flight.
__mpage_writepage BUGs on this condition.

BUG sighted on a 2-way Itanium2 system with 16K PAGE_SIZE running

	fsstress -v -d $DIR/tmp -n 1000 -p 1000 -l 2

where $DIR is a new ext2 filesystem with 4K blocks that is quite
small (causing get_block to fail often with -ENOSPC).

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 6ed59497fd4d..af7c51ded2e1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1953,7 +1953,7 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 		if (!buffer_mapped(bh)) {
 			err = get_block(inode, block, bh, 1);
 			if (err)
-				goto out;
+				break;
 			if (buffer_new(bh)) {
 				clear_buffer_new(bh);
 				unmap_underlying_metadata(bh->b_bdev,
@@ -1995,10 +1995,12 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 	while(wait_bh > wait) {
 		wait_on_buffer(*--wait_bh);
 		if (!buffer_uptodate(*wait_bh))
-			return -EIO;
+			err = -EIO;
 	}
-	return 0;
-out:
+	if (!err)
+		return err;
+
+	/* Error case: */
 	/*
 	 * Zero out any newly allocated blocks to avoid exposing stale
 	 * data.  If BH_New is set, we know that the block was newly
-- 
cgit v1.2.2


From ad576e63e0c8b274a8558b8e05a6d0526e804dc0 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Thu, 5 May 2005 16:15:46 -0700
Subject: [PATCH] __block_write_full_page race fix

When running
	fsstress -v -d $DIR/tmp -n 1000 -p 1000 -l 2
on an ext2 filesystem with 1024 byte block size, on SMP i386 with 4096 byte
page size over loopback to an image file on a tmpfs filesystem, I would
very quickly hit
	BUG_ON(!buffer_async_write(bh));
in fs/buffer.c:end_buffer_async_write

It seems that more than one request would be submitted for a given bh
at a time.

What would happen is the following:
2 threads doing __mpage_writepages on the same page.
Thread 1 - lock the page first, and enter __block_write_full_page.
Thread 1 - (eg.) mark_buffer_async_write on the first 2 buffers.
Thread 1 - set page writeback, unlock page.
Thread 2 - lock page, wait on page writeback
Thread 1 - submit_bh on the first 2 buffers.
=> both requests complete, none of the page buffers are async_write,
   end_page_writeback is called.
Thread 2 - wakes up. enters __block_write_full_page.
Thread 2 - mark_buffer_async_write on (eg.) the last buffer
Thread 1 - finds the last buffer has async_write set, submit_bh on that.
Thread 2 - submit_bh on the last buffer.
=> oops.

So change __block_write_full_page to explicitly keep track of the last bh
we need to issue, so we don't touch anything after issuing the last
request.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index af7c51ded2e1..bc75f2e7b274 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1751,7 +1751,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	int err;
 	sector_t block;
 	sector_t last_block;
-	struct buffer_head *bh, *head;
+	struct buffer_head *bh, *head, *last_bh = NULL;
 	int nr_underway = 0;
 
 	BUG_ON(!PageLocked(page));
@@ -1809,7 +1809,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	} while (bh != head);
 
 	do {
-		get_bh(bh);
 		if (!buffer_mapped(bh))
 			continue;
 		/*
@@ -1827,6 +1826,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		}
 		if (test_clear_buffer_dirty(bh)) {
 			mark_buffer_async_write(bh);
+			get_bh(bh);
+			last_bh = bh;
 		} else {
 			unlock_buffer(bh);
 		}
@@ -1845,10 +1846,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		if (buffer_async_write(bh)) {
 			submit_bh(WRITE, bh);
 			nr_underway++;
+			put_bh(bh);
+			if (bh == last_bh)
+				break;
 		}
-		put_bh(bh);
 		bh = next;
 	} while (bh != head);
+	bh = head;
 
 	err = 0;
 done:
@@ -1887,10 +1891,11 @@ recover:
 	bh = head;
 	/* Recovery: lock and submit the mapped buffers */
 	do {
-		get_bh(bh);
 		if (buffer_mapped(bh) && buffer_dirty(bh)) {
 			lock_buffer(bh);
 			mark_buffer_async_write(bh);
+			get_bh(bh);
+			last_bh = bh;
 		} else {
 			/*
 			 * The buffer may have been set dirty during
@@ -1909,10 +1914,13 @@ recover:
 			clear_buffer_dirty(bh);
 			submit_bh(WRITE, bh);
 			nr_underway++;
+			put_bh(bh);
+			if (bh == last_bh)
+				break;
 		}
-		put_bh(bh);
 		bh = next;
 	} while (bh != head);
+	bh = head;
 	goto done;
 }
 
-- 
cgit v1.2.2


From 05937baae9fc27b64bcd4378da7d2b14edf7931c Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 5 May 2005 16:15:47 -0700
Subject: [PATCH] __block_write_full_page speedup

Remove all those get_bh()'s and put_bh()'s by extending lock_page() to cover
the troublesome regions.

(get_bh() and put_bh() happen every time whereas contention on a page's lock
in there happens basically never).

Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index bc75f2e7b274..6f2c3303a443 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1826,7 +1826,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		}
 		if (test_clear_buffer_dirty(bh)) {
 			mark_buffer_async_write(bh);
-			get_bh(bh);
 			last_bh = bh;
 		} else {
 			unlock_buffer(bh);
@@ -1839,20 +1838,19 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	 */
 	BUG_ON(PageWriteback(page));
 	set_page_writeback(page);
-	unlock_page(page);
 
 	do {
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
 			submit_bh(WRITE, bh);
 			nr_underway++;
-			put_bh(bh);
 			if (bh == last_bh)
 				break;
 		}
 		bh = next;
 	} while (bh != head);
 	bh = head;
+	unlock_page(page);
 
 	err = 0;
 done:
@@ -1894,7 +1892,6 @@ recover:
 		if (buffer_mapped(bh) && buffer_dirty(bh)) {
 			lock_buffer(bh);
 			mark_buffer_async_write(bh);
-			get_bh(bh);
 			last_bh = bh;
 		} else {
 			/*
@@ -1914,7 +1911,6 @@ recover:
 			clear_buffer_dirty(bh);
 			submit_bh(WRITE, bh);
 			nr_underway++;
-			put_bh(bh);
 			if (bh == last_bh)
 				break;
 		}
-- 
cgit v1.2.2


From f0fbd5fc09b20f7ba7bc8c80be33e39925bb38e1 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 5 May 2005 16:15:48 -0700
Subject: [PATCH] __block_write_full_page() simplification

The `last_bh' logic probably isn't worth much.  In those situations where only
the front part of the page is being written out we will save some looping but
in the vastly more common case of an all-page writeout if just adds more code.

Nick Piggin <nickpiggin@yahoo.com.au>

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 6f2c3303a443..91ace8034bf7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1751,7 +1751,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	int err;
 	sector_t block;
 	sector_t last_block;
-	struct buffer_head *bh, *head, *last_bh = NULL;
+	struct buffer_head *bh, *head;
 	int nr_underway = 0;
 
 	BUG_ON(!PageLocked(page));
@@ -1826,7 +1826,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		}
 		if (test_clear_buffer_dirty(bh)) {
 			mark_buffer_async_write(bh);
-			last_bh = bh;
 		} else {
 			unlock_buffer(bh);
 		}
@@ -1844,12 +1843,9 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		if (buffer_async_write(bh)) {
 			submit_bh(WRITE, bh);
 			nr_underway++;
-			if (bh == last_bh)
-				break;
 		}
 		bh = next;
 	} while (bh != head);
-	bh = head;
 	unlock_page(page);
 
 	err = 0;
@@ -1892,7 +1888,6 @@ recover:
 		if (buffer_mapped(bh) && buffer_dirty(bh)) {
 			lock_buffer(bh);
 			mark_buffer_async_write(bh);
-			last_bh = bh;
 		} else {
 			/*
 			 * The buffer may have been set dirty during
@@ -1911,12 +1906,9 @@ recover:
 			clear_buffer_dirty(bh);
 			submit_bh(WRITE, bh);
 			nr_underway++;
-			if (bh == last_bh)
-				break;
 		}
 		bh = next;
 	} while (bh != head);
-	bh = head;
 	goto done;
 }
 
-- 
cgit v1.2.2


From dfc1e148542f6f4951fd59e32c14a7368356db9d Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 5 May 2005 16:15:51 -0700
Subject: [PATCH] remove BK documentation

There's no longer a reason to document the obsolete BK usage.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Jeff Garzik <jgarzik@pobox.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/README | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/README b/fs/cifs/README
index 7b4ac096cd11..e74df0c73256 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -32,9 +32,9 @@ the cifs download to your kernel build directory e.g.
 6) make modules (or "make" if CIFS VFS not to be built as a module)
 
 For Linux 2.6:
-1) Download the kernel (e.g. from http://www.kernel.org or from bitkeeper
-at bk://linux.bkbits.net/linux-2.5) and change directory into the top
-of the kernel directory tree (e.g. /usr/src/linux-2.5.73)
+1) Download the kernel (e.g. from http://www.kernel.org)
+and change directory into the top of the kernel directory tree
+(e.g. /usr/src/linux-2.5.73)
 2) make menuconfig (or make xconfig)
 3) select cifs from within the network filesystem choices
 4) save and exit
-- 
cgit v1.2.2


From 2ef41634def0fcb0def3e3c90220c651ca478cb3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 5 May 2005 16:15:59 -0700
Subject: [PATCH] remove do_sync parameter from __invalidate_device

The only caller that ever sets it can call fsync_bdev itself easily.  Also
update some comments.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: <viro@parcelfarce.linux.theplanet.co.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c |  2 +-
 fs/inode.c     | 21 +++------------------
 2 files changed, 4 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index d19d07c49ad3..c0cbd1bc1a02 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -530,7 +530,7 @@ int check_disk_change(struct block_device *bdev)
 	if (!bdops->media_changed(bdev->bd_disk))
 		return 0;
 
-	if (__invalidate_device(bdev, 0))
+	if (__invalidate_device(bdev))
 		printk("VFS: busy inodes on changed media.\n");
 
 	if (bdops->revalidate_disk)
diff --git a/fs/inode.c b/fs/inode.c
index 90a110feb713..801fe7f36280 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -26,7 +26,6 @@
  * This is needed for the following functions:
  *  - inode_has_buffers
  *  - invalidate_inode_buffers
- *  - fsync_bdev
  *  - invalidate_bdev
  *
  * FIXME: remove all knowledge of the buffer layer from this file
@@ -332,14 +331,6 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
 	return busy;
 }
 
-/*
- * This is a two-stage process. First we collect all
- * offending inodes onto the throw-away list, and in
- * the second stage we actually dispose of them. This
- * is because we don't want to sleep while messing
- * with the global lists..
- */
- 
 /**
  *	invalidate_inodes	- discard the inodes on a device
  *	@sb: superblock
@@ -366,16 +357,11 @@ int invalidate_inodes(struct super_block * sb)
 
 EXPORT_SYMBOL(invalidate_inodes);
  
-int __invalidate_device(struct block_device *bdev, int do_sync)
+int __invalidate_device(struct block_device *bdev)
 {
-	struct super_block *sb;
-	int res;
+	struct super_block *sb = get_super(bdev);
+	int res = 0;
 
-	if (do_sync)
-		fsync_bdev(bdev);
-
-	res = 0;
-	sb = get_super(bdev);
 	if (sb) {
 		/*
 		 * no need to lock the super, get_super holds the
@@ -390,7 +376,6 @@ int __invalidate_device(struct block_device *bdev, int do_sync)
 	invalidate_bdev(bdev, 0);
 	return res;
 }
-
 EXPORT_SYMBOL(__invalidate_device);
 
 static int can_unuse(struct inode *inode)
-- 
cgit v1.2.2


From d17d7fa44dbe1f12031773e27eda9e939024a037 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 5 May 2005 16:16:02 -0700
Subject: [PATCH] revert ext3-writepages-support-for-writeback-mode

This had a fatal lock ranking bug: we do journal_start outside
mpage_writepages()'s lock_page().

Revert the whole thing, think again.

Credit-to: Jan Kara <jack@suse.cz>

For identifying the bug.

Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/inode.c | 46 ----------------------------------------------
 fs/mpage.c      | 12 +-----------
 2 files changed, 1 insertion(+), 57 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ea5888688f94..0d5fa73b18dc 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -844,12 +844,6 @@ get_block:
 	return ret;
 }
 
-static int ext3_writepages_get_block(struct inode *inode, sector_t iblock,
-			struct buffer_head *bh, int create)
-{
-	return ext3_direct_io_get_blocks(inode, iblock, 1, bh, create);
-}
-
 /*
  * `handle' can be NULL if create is zero
  */
@@ -1323,45 +1317,6 @@ out_fail:
 	return ret;
 }
 
-static int
-ext3_writeback_writepage_helper(struct page *page,
-				struct writeback_control *wbc)
-{
-	return block_write_full_page(page, ext3_get_block, wbc);
-}
-
-static int
-ext3_writeback_writepages(struct address_space *mapping,
-				struct writeback_control *wbc)
-{
-	struct inode *inode = mapping->host;
-	handle_t *handle = NULL;
-	int err, ret = 0;
-
-	if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
-		return ret;
-
-	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		return ret;
-	}
-
-        ret = __mpage_writepages(mapping, wbc, ext3_writepages_get_block,
-					ext3_writeback_writepage_helper);
-
-	/*
-	 * Need to reaquire the handle since ext3_writepages_get_block()
-	 * can restart the handle
-	 */
-	handle = journal_current_handle();
-
-	err = ext3_journal_stop(handle);
-	if (!ret)
-		ret = err;
-	return ret;
-}
-
 static int ext3_writeback_writepage(struct page *page,
 				struct writeback_control *wbc)
 {
@@ -1599,7 +1554,6 @@ static struct address_space_operations ext3_writeback_aops = {
 	.readpage	= ext3_readpage,
 	.readpages	= ext3_readpages,
 	.writepage	= ext3_writeback_writepage,
-	.writepages	= ext3_writeback_writepages,
 	.sync_page	= block_sync_page,
 	.prepare_write	= ext3_prepare_write,
 	.commit_write	= ext3_writeback_commit_write,
diff --git a/fs/mpage.c b/fs/mpage.c
index 32c7c8fcfce7..68db5256a727 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -626,15 +626,6 @@ out:
 int
 mpage_writepages(struct address_space *mapping,
 		struct writeback_control *wbc, get_block_t get_block)
-{
-	return __mpage_writepages(mapping, wbc, get_block,
-		mapping->a_ops->writepage);
-}
-
-int
-__mpage_writepages(struct address_space *mapping,
-		struct writeback_control *wbc, get_block_t get_block,
-		writepage_t writepage_fn)
 {
 	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	struct bio *bio = NULL;
@@ -725,7 +716,7 @@ retry:
 			} else {
 				bio = __mpage_writepage(bio, page, get_block,
 						&last_block_in_bio, &ret, wbc,
-						writepage_fn);
+						page->mapping->a_ops->writepage);
 			}
 			if (unlikely(ret == WRITEPAGE_ACTIVATE))
 				unlock_page(page);
@@ -755,7 +746,6 @@ retry:
 	return ret;
 }
 EXPORT_SYMBOL(mpage_writepages);
-EXPORT_SYMBOL(__mpage_writepages);
 
 int mpage_writepage(struct page *page, get_block_t get_block,
 	struct writeback_control *wbc)
-- 
cgit v1.2.2


From 75c96f85845a6707b0f9916cb263cb3584f7d48f Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 5 May 2005 16:16:09 -0700
Subject: [PATCH] make some things static

This patch makes some needlessly global identifiers static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Arjan van de Ven <arjanv@infradead.org>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/bad_inode.c         |  2 +-
 fs/buffer.c            |  2 +-
 fs/char_dev.c          |  2 +-
 fs/dcache.c            |  2 +-
 fs/eventpoll.c         |  2 +-
 fs/exec.c              |  3 ++-
 fs/jffs2/compr_rubin.c | 18 ++++++++++++------
 fs/jffs2/compr_zlib.c  | 12 ++++++++----
 fs/locks.c             |  6 +++---
 fs/mbcache.c           |  2 +-
 fs/mpage.c             |  2 +-
 fs/namei.c             |  8 ++++----
 fs/nfs/idmap.c         |  4 ++--
 fs/nfs/inode.c         |  4 ++--
 fs/nls/nls_base.c      |  2 +-
 fs/select.c            |  6 ++++--
 16 files changed, 45 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 672a31924f3c..e172180a1d8c 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -47,7 +47,7 @@ static struct file_operations bad_file_ops =
 	.get_unmapped_area = EIO_ERROR,
 };
 
-struct inode_operations bad_inode_ops =
+static struct inode_operations bad_inode_ops =
 {
 	.create		= EIO_ERROR,
 	.lookup		= EIO_ERROR,
diff --git a/fs/buffer.c b/fs/buffer.c
index 91ace8034bf7..6f88dcc6d002 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1210,7 +1210,7 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
 	return 1;
 }
 
-struct buffer_head *
+static struct buffer_head *
 __getblk_slow(struct block_device *bdev, sector_t block, int size)
 {
 	/* Size must be multiple of hard sectorsize */
diff --git a/fs/char_dev.c b/fs/char_dev.c
index a745b1d9e545..c1e3537909fc 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -328,7 +328,7 @@ void cd_forget(struct inode *inode)
 	spin_unlock(&cdev_lock);
 }
 
-void cdev_purge(struct cdev *cdev)
+static void cdev_purge(struct cdev *cdev)
 {
 	spin_lock(&cdev_lock);
 	while (!list_empty(&cdev->list)) {
diff --git a/fs/dcache.c b/fs/dcache.c
index 496a4e08369c..3aa8a7e980d8 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -39,7 +39,7 @@ int sysctl_vfs_cache_pressure = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
-seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
+static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
 
 EXPORT_SYMBOL(dcache_lock);
 
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 05b966cd6f76..9900e333655a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -320,7 +320,7 @@ static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type,
 /*
  * This semaphore is used to serialize ep_free() and eventpoll_release_file().
  */
-struct semaphore epsem;
+static struct semaphore epsem;
 
 /* Safe wake up implementation */
 static struct poll_safewake psw;
diff --git a/fs/exec.c b/fs/exec.c
index a8394499926c..52acff3f44f0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -197,7 +197,8 @@ static int count(char __user * __user * argv, int max)
  * memory to free pages in kernel mem. These are in a format ready
  * to be put directly into the top of new user memory.
  */
-int copy_strings(int argc,char __user * __user * argv, struct linux_binprm *bprm)
+static int copy_strings(int argc, char __user * __user * argv,
+			struct linux_binprm *bprm)
 {
 	struct page *kmapped_page = NULL;
 	char *kaddr = NULL;
diff --git a/fs/jffs2/compr_rubin.c b/fs/jffs2/compr_rubin.c
index 450d6624181f..09422388fb96 100644
--- a/fs/jffs2/compr_rubin.c
+++ b/fs/jffs2/compr_rubin.c
@@ -228,8 +228,10 @@ int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out,
 	return rubin_do_compress(BIT_DIVIDER_MIPS, bits_mips, data_in, cpage_out, sourcelen, dstlen);
 }
 #endif
-int jffs2_dynrubin_compress(unsigned char *data_in, unsigned char *cpage_out, 
-		   uint32_t *sourcelen, uint32_t *dstlen, void *model)
+static int jffs2_dynrubin_compress(unsigned char *data_in,
+				   unsigned char *cpage_out,
+				   uint32_t *sourcelen, uint32_t *dstlen,
+				   void *model)
 {
 	int bits[8];
 	unsigned char histo[256];
@@ -306,15 +308,19 @@ static void rubin_do_decompress(int bit_divider, int *bits, unsigned char *cdata
 }		   
 
 
-int jffs2_rubinmips_decompress(unsigned char *data_in, unsigned char *cpage_out, 
-		   uint32_t sourcelen, uint32_t dstlen, void *model)
+static int jffs2_rubinmips_decompress(unsigned char *data_in,
+				      unsigned char *cpage_out,
+				      uint32_t sourcelen, uint32_t dstlen,
+				      void *model)
 {
 	rubin_do_decompress(BIT_DIVIDER_MIPS, bits_mips, data_in, cpage_out, sourcelen, dstlen);
         return 0;
 }
 
-int jffs2_dynrubin_decompress(unsigned char *data_in, unsigned char *cpage_out, 
-		   uint32_t sourcelen, uint32_t dstlen, void *model)
+static int jffs2_dynrubin_decompress(unsigned char *data_in,
+				     unsigned char *cpage_out,
+				     uint32_t sourcelen, uint32_t dstlen,
+				     void *model)
 {
 	int bits[8];
 	int c;
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 9f9932c22adb..078a30e406b5 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -69,8 +69,10 @@ static void free_workspaces(void)
 #define free_workspaces() do { } while(0)
 #endif /* __KERNEL__ */
 
-int jffs2_zlib_compress(unsigned char *data_in, unsigned char *cpage_out, 
-		   uint32_t *sourcelen, uint32_t *dstlen, void *model)
+static int jffs2_zlib_compress(unsigned char *data_in,
+			       unsigned char *cpage_out,
+			       uint32_t *sourcelen, uint32_t *dstlen,
+			       void *model)
 {
 	int ret;
 
@@ -135,8 +137,10 @@ int jffs2_zlib_compress(unsigned char *data_in, unsigned char *cpage_out,
 	return ret;
 }
 
-int jffs2_zlib_decompress(unsigned char *data_in, unsigned char *cpage_out,
-		      uint32_t srclen, uint32_t destlen, void *model)
+static int jffs2_zlib_decompress(unsigned char *data_in,
+				 unsigned char *cpage_out,
+				 uint32_t srclen, uint32_t destlen,
+				 void *model)
 {
 	int ret;
 	int wbits = MAX_WBITS;
diff --git a/fs/locks.c b/fs/locks.c
index 1792ce547af7..3fa6a7ce57a7 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -406,12 +406,12 @@ static void lease_release_private_callback(struct file_lock *fl)
 	fl->fl_file->f_owner.signum = 0;
 }
 
-int lease_mylease_callback(struct file_lock *fl, struct file_lock *try)
+static int lease_mylease_callback(struct file_lock *fl, struct file_lock *try)
 {
 	return fl->fl_file == try->fl_file;
 }
 
-struct lock_manager_operations lease_manager_ops = {
+static struct lock_manager_operations lease_manager_ops = {
 	.fl_break = lease_break_callback,
 	.fl_release_private = lease_release_private_callback,
 	.fl_mylease = lease_mylease_callback,
@@ -1274,7 +1274,7 @@ int fcntl_getlease(struct file *filp)
  *
  *	Called with kernel lock held.
  */
-int __setlease(struct file *filp, long arg, struct file_lock **flp)
+static int __setlease(struct file *filp, long arg, struct file_lock **flp)
 {
 	struct file_lock *fl, **before, **my_before = NULL, *lease = *flp;
 	struct dentry *dentry = filp->f_dentry;
diff --git a/fs/mbcache.c b/fs/mbcache.c
index f9e4d2700cd8..c7170b9221a3 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -57,7 +57,7 @@
 
 #define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
 
-DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
+static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
 		
 MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
 MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
diff --git a/fs/mpage.c b/fs/mpage.c
index 68db5256a727..b92c0e64aefa 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -87,7 +87,7 @@ static int mpage_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
 	return 0;
 }
 
-struct bio *mpage_bio_submit(int rw, struct bio *bio)
+static struct bio *mpage_bio_submit(int rw, struct bio *bio)
 {
 	bio->bi_end_io = mpage_end_io_read;
 	if (rw == WRITE)
diff --git a/fs/namei.c b/fs/namei.c
index 0f76fd75591b..defe6781e003 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2071,8 +2071,8 @@ exit:
  *	   ->i_sem on parents, which works but leads to some truely excessive
  *	   locking].
  */
-int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
-	       struct inode *new_dir, struct dentry *new_dentry)
+static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
+			  struct inode *new_dir, struct dentry *new_dentry)
 {
 	int error = 0;
 	struct inode *target;
@@ -2116,8 +2116,8 @@ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 	return error;
 }
 
-int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
-	       struct inode *new_dir, struct dentry *new_dentry)
+static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
+			    struct inode *new_dir, struct dentry *new_dentry)
 {
 	struct inode *target;
 	int error;
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index b74c4e3a64e2..87f4f9aeac86 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -79,7 +79,7 @@ static ssize_t   idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *,
 		     char __user *, size_t);
 static ssize_t   idmap_pipe_downcall(struct file *, const char __user *,
 		     size_t);
-void             idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
+static void      idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
 
 static unsigned int fnvhash32(const void *, size_t);
 
@@ -434,7 +434,7 @@ out:
 	return ret;
 }
 
-void
+static void
 idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
 {
 	struct idmap_msg *im = msg->data;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6345f26e87ee..f2317f3e29f9 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1904,7 +1904,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 	}
 }
  
-int nfs_init_inodecache(void)
+static int nfs_init_inodecache(void)
 {
 	nfs_inode_cachep = kmem_cache_create("nfs_inode_cache",
 					     sizeof(struct nfs_inode),
@@ -1916,7 +1916,7 @@ int nfs_init_inodecache(void)
 	return 0;
 }
 
-void nfs_destroy_inodecache(void)
+static void nfs_destroy_inodecache(void)
 {
 	if (kmem_cache_destroy(nfs_inode_cachep))
 		printk(KERN_INFO "nfs_inode_cache: not all structures were freed\n");
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 897512796edb..a912debcd20b 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -243,7 +243,7 @@ void unload_nls(struct nls_table *nls)
 	module_put(nls->owner);
 }
 
-wchar_t charset2uni[256] = {
+static wchar_t charset2uni[256] = {
 	/* 0x00*/
 	0x0000, 0x0001, 0x0002, 0x0003,
 	0x0004, 0x0005, 0x0006, 0x0007,
diff --git a/fs/select.c b/fs/select.c
index 25b1ccac2f2c..b80e7eb0ac0d 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -55,7 +55,8 @@ struct poll_table_page {
  * as all select/poll functions have to call it to add an entry to the
  * poll table.
  */
-void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p);
+static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
+		       poll_table *p);
 
 void poll_initwait(struct poll_wqueues *pwq)
 {
@@ -87,7 +88,8 @@ void poll_freewait(struct poll_wqueues *pwq)
 
 EXPORT_SYMBOL(poll_freewait);
 
-void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *_p)
+static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
+		       poll_table *_p)
 {
 	struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt);
 	struct poll_table_page *table = p->table;
-- 
cgit v1.2.2


From 291c4a75ce7632ee5c565359fb875ba0597f76be Mon Sep 17 00:00:00 2001
From: "Randy.Dunlap" <rddunlap@osdl.org>
Date: Thu, 5 May 2005 16:16:11 -0700
Subject: [PATCH] reiserfs: use NULL instead of 0

Use NULL instead of 0 for pointer (sparse warning):
fs/reiserfs/namei.c:611:50: warning: Using plain integer as NULL pointer

Signed-off-by: Randy Dunlap <rddunlap@osdl.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 80e92d9b81cb..7d4dc5f5aa8b 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -608,7 +608,7 @@ static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode,
         goto out_failed;
     }
 
-    retval = reiserfs_new_inode (&th, dir, mode, 0, 0/*i_size*/, dentry, inode);
+    retval = reiserfs_new_inode (&th, dir, mode, NULL, 0/*i_size*/, dentry, inode);
     if (retval)
         goto out_failed;
 	
-- 
cgit v1.2.2


From 3677209239ed71d2654e73eecfab1dbec2af11a9 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Thu, 5 May 2005 16:16:12 -0700
Subject: [PATCH] comments on locking of task->comm

Add some comments about task->comm, to explain what it is near its definition
and provide some important pointers to its uses.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 52acff3f44f0..e56ee2437025 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -869,9 +869,11 @@ int flush_old_exec(struct linux_binprm * bprm)
 	if (current->euid == current->uid && current->egid == current->gid)
 		current->mm->dumpable = 1;
 	name = bprm->filename;
+
+	/* Copies the binary name from after last slash */
 	for (i=0; (ch = *(name++)) != '\0';) {
 		if (ch == '/')
-			i = 0;
+			i = 0; /* overwrite what we wrote */
 		else
 			if (i < (sizeof(tcomm) - 1))
 				tcomm[i++] = ch;
-- 
cgit v1.2.2


From f59154c53fac0bfee52393247beadf0474770351 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 5 May 2005 16:16:21 -0700
Subject: [PATCH] fs/udf/udftime.c: fix off by one error

This patch fixes an off by one error found by the Coverity checker.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/udf/udftime.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c
index c2634bda6b50..457a8fe28575 100644
--- a/fs/udf/udftime.c
+++ b/fs/udf/udftime.c
@@ -103,7 +103,7 @@ udf_stamp_to_time(time_t *dest, long *dest_usec, kernel_timestamp src)
 		offset = 0;
 
 	if ((src.year < EPOCH_YEAR) ||
-		(src.year > EPOCH_YEAR+MAX_YEAR_SECONDS))
+		(src.year >= EPOCH_YEAR+MAX_YEAR_SECONDS))
 	{
 		*dest = -1;
 		*dest_usec = -1;
-- 
cgit v1.2.2


From b2411dd202e854d1f3be541135af8bb9872ea8b6 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 6 May 2005 17:41:01 -0700
Subject: [PATCH] revert msdos partitioning fix

This change from March 3rd causes the partition parsing code to ignore
partitions which have a signature byte of zero.  Turns out that more people
have such partitions than we expected, and their device numbering is coming up
wrong in post-2.6.11 kernels.

So revert the change while we think about the problem a bit more.

Cc: Andries Brouwer <Andries.Brouwer@cwi.nl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/msdos.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'fs')

diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 17ee1b4ff087..584a27b2bbd5 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -114,9 +114,6 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev,
 		 */
 		for (i=0; i<4; i++, p++) {
 			u32 offs, size, next;
-
-			if (SYS_IND(p) == 0)
-				continue;
 			if (!NR_SECTS(p) || is_extended_partition(p))
 				continue;
 
@@ -433,8 +430,6 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
 	for (slot = 1 ; slot <= 4 ; slot++, p++) {
 		u32 start = START_SECT(p)*sector_size;
 		u32 size = NR_SECTS(p)*sector_size;
-		if (SYS_IND(p) == 0)
-			continue;
 		if (!size)
 			continue;
 		if (is_extended_partition(p)) {
-- 
cgit v1.2.2


From a84a505956f5c795a9ab3d60d97b6b91a27aa571 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Wed, 11 May 2005 00:10:44 -0700
Subject: [PATCH] fix Linux kernel ELF core dump privilege elevation

As reported by Paul Starzetz <ihaquer@isec.pl>

Reference: CAN-2005-1263

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/binfmt_elf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index ce9423bb2de3..c374be51b041 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -251,7 +251,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr * exec,
 	}
 
 	/* Populate argv and envp */
-	p = current->mm->arg_start;
+	p = current->mm->arg_end = current->mm->arg_start;
 	while (argc-- > 0) {
 		size_t len;
 		__put_user((elf_addr_t)p, argv++);
@@ -1301,7 +1301,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 		       struct mm_struct *mm)
 {
-	int i, len;
+	unsigned int i, len;
 	
 	/* first copy the parameters from user space */
 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
-- 
cgit v1.2.2


From 64d13c00cf1f7c3d2c1ff449e2a0500ab568d319 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Mon, 16 May 2005 21:53:09 -0700
Subject: [PATCH] fix impossible VmallocChunk

VmallocTotal: 34359738367 kB
VmallocUsed:    266288 kB
VmallocChunk: 18014366299193295 kB
is unsettling - x86_64 and some other architectures keep a separate address
range for modules in vmalloc's vmlist, which /proc/meminfo should pass over.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/mmu.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/mmu.c b/fs/proc/mmu.c
index a7041038ad56..25d2d9c6e329 100644
--- a/fs/proc/mmu.c
+++ b/fs/proc/mmu.c
@@ -50,13 +50,23 @@ void get_vmalloc_info(struct vmalloc_info *vmi)
 		read_lock(&vmlist_lock);
 
 		for (vma = vmlist; vma; vma = vma->next) {
+			unsigned long addr = (unsigned long) vma->addr;
+
+			/*
+			 * Some archs keep another range for modules in vmlist
+			 */
+			if (addr < VMALLOC_START)
+				continue;
+			if (addr >= VMALLOC_END)
+				break;
+
 			vmi->used += vma->size;
 
-			free_area_size = (unsigned long) vma->addr - prev_end;
+			free_area_size = addr - prev_end;
 			if (vmi->largest_chunk < free_area_size)
 				vmi->largest_chunk = free_area_size;
 
-			prev_end = vma->size + (unsigned long) vma->addr;
+			prev_end = vma->size + addr;
 		}
 
 		if (VMALLOC_END - prev_end > vmi->largest_chunk)
-- 
cgit v1.2.2


From c64610ba585fabb36be78782868277f3d9741a2e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 16 May 2005 21:53:49 -0700
Subject: [PATCH] block_read_full_page() get_block() error handling fix

If block_read_full_page() detects an error when running get_block() it will
run SetPageError(), then it will zero out the block in pagecache and will mark
the buffer_head uptodate.

So at the end of readahead we end up with a non-uptodate pagecache page which
is marked PageError.  But it has uptodate buffers.

The pagefault code will run ClearPageError, will launch readpage a second time
and block_read_full_page() will notice the uptodate buffers and will mark the
page uptodate as well.  We end up with an uptodate, !PageError page full of
zeros and the error is lost.

(It seems a little odd that filemap_nopage() runs ClearPageError().  I guess
all of this adds up to meaning that for each attempted access to the page, the
pagefault handler will retry the I/O.  Which is good and bad.  If the app is
ignoring SIGBUS for some reason we could get a lot of back-to-back I/O
errors.)

Fix it by not marking the pagecache buffer_head as uptodate if the attempt to
map that buffer to a disk block failed.

Credit-to: Qu Fuping <fs@ercist.iscas.ac.cn>

  For reporting the bug and identifying its source.

Signed-off-by: Qu Fuping <fs@ercist.iscas.ac.cn>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 6f88dcc6d002..7e9e409feaa7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2094,9 +2094,12 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 			continue;
 
 		if (!buffer_mapped(bh)) {
+			int err = 0;
+
 			fully_mapped = 0;
 			if (iblock < lblock) {
-				if (get_block(inode, iblock, bh, 0))
+				err = get_block(inode, iblock, bh, 0);
+				if (err)
 					SetPageError(page);
 			}
 			if (!buffer_mapped(bh)) {
@@ -2104,7 +2107,8 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 				memset(kaddr + i * blocksize, 0, blocksize);
 				flush_dcache_page(page);
 				kunmap_atomic(kaddr, KM_USER0);
-				set_buffer_uptodate(bh);
+				if (!err)
+					set_buffer_uptodate(bh);
 				continue;
 			}
 			/*
-- 
cgit v1.2.2


From 301216244b1e39c4346e56d38b079ca53d528580 Mon Sep 17 00:00:00 2001
From: Stephen Tweedie <sct@redhat.com>
Date: Wed, 18 May 2005 11:47:17 -0400
Subject: [PATCH] Avoid console spam with ext3 aborted journal.

Avoid console spam with ext3 aborted journal.

ext3 usually reports error conditions that it detects in its environment.
But when its journal gets aborted due to such errors, it can sometimes
continue to report that condition forever, spamming the console to such
an extent that the initial first cause of the journal abort can be lost.

When the journal aborts, we put the filesystem into readonly mode.  Most
subsequent filesystem operations will get rejected immediately by checks
for MS_RDONLY either in the filesystem or in the VFS.  But some paths do
not have such checks --- for example, if we continue to write to a file
handle that was opened before the fs went readonly.  (We only check for
the ROFS condition when the file is first opened.)  In these cases, we
can continue to generate log errors similar to

EXT3-fs error (device $DEV) in start_transaction: Journal has aborted

for each subsequent write.

There is really no point in generating these errors after the initial
error has been fully reported.  Specifically, if we're starting a
completely new filesystem operation, and the filesystem is *already*
readonly (ie. the ext3 layer has already detected and handled the
underlying jbd abort), and we see an EROFS error, then there is simply
no point in reporting it again.

Signed-off-by: Stephen Tweedie <sct@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/super.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 545b440a2d2f..981ccb233ef5 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -225,8 +225,16 @@ void __ext3_std_error (struct super_block * sb, const char * function,
 		       int errno)
 {
 	char nbuf[16];
-	const char *errstr = ext3_decode_error(sb, errno, nbuf);
+	const char *errstr;
+
+	/* Special case: if the error is EROFS, and we're not already
+	 * inside a transaction, then there's really no point in logging
+	 * an error. */
+	if (errno == -EROFS && journal_current_handle() == NULL &&
+	    (sb->s_flags & MS_RDONLY))
+		return;
 
+	errstr = ext3_decode_error(sb, errno, nbuf);
 	printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n",
 		sb->s_id, function, errstr);
 
-- 
cgit v1.2.2


From f81a0bffa116ea22149aa7cfb0b4ee09096d9d92 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 19 May 2005 12:26:43 -0700
Subject: [AF_UNIX]: Use lookup_create().

currently it opencodes it, but that's in the way of chaning the
lookup_hash interface.

I'd prefer to disallow modular af_unix over exporting lookup_create,
but I'll leave that to you.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/namei.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index defe6781e003..dd78f01b6de8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1580,6 +1580,7 @@ enoent:
 fail:
 	return dentry;
 }
+EXPORT_SYMBOL_GPL(lookup_create);
 
 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 {
-- 
cgit v1.2.2