From 991114c6fa6a21d1fa4d544abe78592352860c82 Mon Sep 17 00:00:00 2001
From: Alexander Viro <aviro@redhat.com>
Date: Thu, 23 Jun 2005 00:09:01 -0700
Subject: [PATCH] fix for prune_icache()/forced final iput() races

Based on analysis and a patch from Russ Weight <rweight@us.ibm.com>

There is a race condition that can occur if an inode is allocated and then
released (using iput) during the ->fill_super functions.  The race
condition is between kswapd and mount.

For most filesystems this can only happen in an error path when kswapd is
running concurrently.  For isofs, however, the error can occur in a more
common code path (which is how the bug was found).

The logic here is "we want final iput() to free inode *now* instead of
letting it sit in cache if fs is going down or had not quite come up".  The
problem is with kswapd seeing such inodes in the middle of being killed and
happily taking over.

The clean solution would be to tell kswapd to leave those inodes alone and
let our final iput deal with them.  I.e.  add a new flag
(I_FORCED_FREEING), set it before write_inode_now() there and make
prune_icache() leave those alone.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 801fe7f362..1f9a3a2b89 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -500,7 +500,7 @@ repeat:
 			continue;
 		if (!test(inode, data))
 			continue;
-		if (inode->i_state & (I_FREEING|I_CLEAR)) {
+		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
@@ -525,7 +525,7 @@ repeat:
 			continue;
 		if (inode->i_sb != sb)
 			continue;
-		if (inode->i_state & (I_FREEING|I_CLEAR)) {
+		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
@@ -727,7 +727,7 @@ EXPORT_SYMBOL(iunique);
 struct inode *igrab(struct inode *inode)
 {
 	spin_lock(&inode_lock);
-	if (!(inode->i_state & I_FREEING))
+	if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
 		__iget(inode);
 	else
 		/*
@@ -1024,17 +1024,21 @@ static void generic_forget_inode(struct inode *inode)
 		if (!(inode->i_state & (I_DIRTY|I_LOCK)))
 			list_move(&inode->i_list, &inode_unused);
 		inodes_stat.nr_unused++;
-		spin_unlock(&inode_lock);
-		if (!sb || (sb->s_flags & MS_ACTIVE))
+		if (!sb || (sb->s_flags & MS_ACTIVE)) {
+			spin_unlock(&inode_lock);
 			return;
+		}
+		inode->i_state |= I_WILL_FREE;
+		spin_unlock(&inode_lock);
 		write_inode_now(inode, 1);
 		spin_lock(&inode_lock);
+		inode->i_state &= ~I_WILL_FREE;
 		inodes_stat.nr_unused--;
 		hlist_del_init(&inode->i_hash);
 	}
 	list_del_init(&inode->i_list);
 	list_del_init(&inode->i_sb_list);
-	inode->i_state|=I_FREEING;
+	inode->i_state |= I_FREEING;
 	inodes_stat.nr_inodes--;
 	spin_unlock(&inode_lock);
 	if (inode->i_data.nrpages)
-- 
cgit v1.2.2


From ac20427ef6aa63da663bdc88b71d16f7394f5e23 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@redhat.com>
Date: Thu, 23 Jun 2005 00:09:11 -0700
Subject: [PATCH] add check to /proc/devices read routines

Patch to add check to get_chrdev_list and get_blkdev_list to prevent reads
of /proc/devices from spilling over the provided page if more than 4096
bytes of string data are generated from all the registered character and
block devices in a system

Signed-off-by: Neil Horman <nhorman@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: <viro@parcelfarce.linux.theplanet.co.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/char_dev.c       | 13 ++++++++++++-
 fs/proc/proc_misc.c |  2 +-
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/char_dev.c b/fs/char_dev.c
index c1e3537909..e82aac9cc2 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -56,10 +56,21 @@ int get_chrdev_list(char *page)
 
 	down(&chrdevs_lock);
 	for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) {
-		for (cd = chrdevs[i]; cd; cd = cd->next)
+		for (cd = chrdevs[i]; cd; cd = cd->next) {
+			/*
+			 * if the current name, plus the 5 extra characters
+			 * in the device line for this entry
+			 * would run us off the page, we're done
+			 */
+			if ((len+strlen(cd->name) + 5) >= PAGE_SIZE)
+				goto page_full;
+
+
 			len += sprintf(page+len, "%3d %s\n",
 				       cd->major, cd->name);
+		}
 	}
+page_full:
 	up(&chrdevs_lock);
 
 	return len;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 63a9fbf1ac..94b570ad03 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -451,7 +451,7 @@ static int devices_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
 	int len = get_chrdev_list(page);
-	len += get_blkdev_list(page+len);
+	len += get_blkdev_list(page+len, len);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
-- 
cgit v1.2.2


From 5f45f1a78fbac3cc859ec10c5366e97d20d40fa2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jun 2005 00:09:12 -0700
Subject: [PATCH] remove duplicate get_dentry functions in various places

Various filesystem drivers have grown a get_dentry() function that's a
duplicate of lookup_one_len, except that it doesn't take a maximum length
argument and doesn't check for \0 or / in the passed in filename.

Switch all these places to use lookup_one_len.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: Greg KH <greg@kroah.com>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/debugfs/inode.c | 12 +-----------
 fs/sysfs/dir.c     |  5 +++--
 fs/sysfs/file.c    |  5 +++--
 fs/sysfs/group.c   |  4 +++-
 fs/sysfs/inode.c   | 10 ----------
 fs/sysfs/sysfs.h   |  1 -
 6 files changed, 10 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b529786699..a86ac4aeae 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -110,16 +110,6 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
 	return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
 }
 
-static struct dentry * get_dentry(struct dentry *parent, const char *name)
-{               
-	struct qstr qstr;
-
-	qstr.name = name;
-	qstr.len = strlen(name);
-	qstr.hash = full_name_hash(name,qstr.len);
-	return lookup_hash(&qstr,parent);
-}               
-
 static struct super_block *debug_get_sb(struct file_system_type *fs_type,
 				        int flags, const char *dev_name,
 					void *data)
@@ -157,7 +147,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
 
 	*dentry = NULL;
 	down(&parent->d_inode->i_sem);
-	*dentry = get_dentry (parent, name);
+	*dentry = lookup_one_len(name, parent, strlen(name));
 	if (!IS_ERR(dentry)) {
 		if ((mode & S_IFMT) == S_IFDIR)
 			error = debugfs_mkdir(parent->d_inode, *dentry, mode);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 37d7a6875d..59734ba1ee 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -8,6 +8,7 @@
 #include <linux/mount.h>
 #include <linux/module.h>
 #include <linux/kobject.h>
+#include <linux/namei.h>
 #include "sysfs.h"
 
 DECLARE_RWSEM(sysfs_rename_sem);
@@ -99,7 +100,7 @@ static int create_dir(struct kobject * k, struct dentry * p,
 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
 
 	down(&p->d_inode->i_sem);
-	*d = sysfs_get_dentry(p,n);
+	*d = lookup_one_len(n, p, strlen(n));
 	if (!IS_ERR(*d)) {
 		error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR);
 		if (!error) {
@@ -315,7 +316,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
 
 	down(&parent->d_inode->i_sem);
 
-	new_dentry = sysfs_get_dentry(parent, new_name);
+	new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
 	if (!IS_ERR(new_dentry)) {
   		if (!new_dentry->d_inode) {
 			error = kobject_set_name(kobj, "%s", new_name);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 849aac1154..e9cfa39f40 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -5,6 +5,7 @@
 #include <linux/module.h>
 #include <linux/dnotify.h>
 #include <linux/kobject.h>
+#include <linux/namei.h>
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
 
@@ -400,7 +401,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
 	int res = -ENOENT;
 
 	down(&dir->d_inode->i_sem);
-	victim = sysfs_get_dentry(dir, attr->name);
+	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
 	if (!IS_ERR(victim)) {
 		/* make sure dentry is really there */
 		if (victim->d_inode && 
@@ -443,7 +444,7 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
 	int res = -ENOENT;
 
 	down(&dir->d_inode->i_sem);
-	victim = sysfs_get_dentry(dir, attr->name);
+	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
 	if (!IS_ERR(victim)) {
 		if (victim->d_inode &&
 		    (victim->d_parent->d_inode == dir->d_inode)) {
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index f11ac5ea70..122145b089 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -11,6 +11,7 @@
 #include <linux/kobject.h>
 #include <linux/module.h>
 #include <linux/dcache.h>
+#include <linux/namei.h>
 #include <linux/err.h>
 #include "sysfs.h"
 
@@ -68,7 +69,8 @@ void sysfs_remove_group(struct kobject * kobj,
 	struct dentry * dir;
 
 	if (grp->name)
-		dir = sysfs_get_dentry(kobj->dentry,grp->name);
+		dir = lookup_one_len(grp->name, kobj->dentry,
+				strlen(grp->name));
 	else
 		dir = dget(kobj->dentry);
 
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 565cac1d42..8de13bafaa 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -166,16 +166,6 @@ int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
 	return error;
 }
 
-struct dentry * sysfs_get_dentry(struct dentry * parent, const char * name)
-{
-	struct qstr qstr;
-
-	qstr.name = name;
-	qstr.len = strlen(name);
-	qstr.hash = full_name_hash(name,qstr.len);
-	return lookup_hash(&qstr,parent);
-}
-
 /*
  * Get the name for corresponding element represented by the given sysfs_dirent
  */
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 29da6f5f07..3f8953e0e5 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -7,7 +7,6 @@ extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
 
 extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
 				umode_t, int);
-extern struct dentry * sysfs_get_dentry(struct dentry *, const char *);
 
 extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
 extern void sysfs_hash_and_remove(struct dentry * dir, const char * name);
-- 
cgit v1.2.2


From 84de856ed30c568c2bb7b9ac0679772bd2737d9b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jun 2005 00:09:16 -0700
Subject: [PATCH] quota: consolidate code surrounding vfs_quota_on_mount

Move some code duplicated in both callers into vfs_quota_on_mount

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jan Kara <jack@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dquot.c          | 23 +++++++++++++++++++----
 fs/ext3/super.c     | 18 ++----------------
 fs/reiserfs/super.c | 21 +++------------------
 3 files changed, 24 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/dquot.c b/fs/dquot.c
index 3995ce7907..343c036556 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1519,14 +1519,29 @@ out_path:
  * This function is used when filesystem needs to initialize quotas
  * during mount time.
  */
-int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry)
+int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
+		int format_id, int type)
 {
+	struct qstr name = {.name = qf_name, .len = 0, .len = strlen(qf_name)};
+	struct dentry *dentry;
 	int error;
 
+	dentry = lookup_hash(&name, sb->s_root);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
 	error = security_quota_on(dentry);
-	if (error)
-		return error;
-	return vfs_quota_on_inode(dentry->d_inode, type, format_id);
+	if (!error)
+		error = vfs_quota_on_inode(dentry->d_inode, type, format_id);
+
+	/*
+	 * Now invalidate and put the dentry - quota got its own reference
+	 * to inode and dentry has at least wrong hash so we had better
+	 * throw it away.
+	 */
+	d_invalidate(dentry);
+	dput(dentry);
+	return error;
 }
 
 /* Generic routine for getting common part of quota structure */
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 981ccb233e..9630fbfdc2 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2348,22 +2348,8 @@ static int ext3_write_info(struct super_block *sb, int type)
  */
 static int ext3_quota_on_mount(struct super_block *sb, int type)
 {
-	int err;
-	struct dentry *dentry;
-	struct qstr name = { .name = EXT3_SB(sb)->s_qf_names[type],
-			     .hash = 0,
-			     .len = strlen(EXT3_SB(sb)->s_qf_names[type])};
-
-	dentry = lookup_hash(&name, sb->s_root);
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
-	err = vfs_quota_on_mount(type, EXT3_SB(sb)->s_jquota_fmt, dentry);
-	/* Now invalidate and put the dentry - quota got its own reference
-	 * to inode and dentry has at least wrong hash so we had better
-	 * throw it away */
-	d_invalidate(dentry);
-	dput(dentry);
-	return err;
+	return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
+			EXT3_SB(sb)->s_jquota_fmt, type);
 }
 
 /*
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b35b877449..aae0779ed5 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1932,27 +1932,12 @@ static int reiserfs_write_info(struct super_block *sb, int type)
 }
 
 /*
- * Turn on quotas during mount time - we need to find
- * the quota file and such...
+ * Turn on quotas during mount time - we need to find the quota file and such...
  */
 static int reiserfs_quota_on_mount(struct super_block *sb, int type)
 {
-    int err;
-    struct dentry *dentry;
-    struct qstr name = { .name = REISERFS_SB(sb)->s_qf_names[type],
-                         .hash = 0,
-                         .len = strlen(REISERFS_SB(sb)->s_qf_names[type])};
-
-    dentry = lookup_hash(&name, sb->s_root);
-    if (IS_ERR(dentry))
-            return PTR_ERR(dentry);
-    err = vfs_quota_on_mount(type, REISERFS_SB(sb)->s_jquota_fmt, dentry);
-    /* Now invalidate and put the dentry - quota got its own reference
-     * to inode and dentry has at least wrong hash so we had better
-     * throw it away */
-    d_invalidate(dentry);
-    dput(dentry);
-    return err;
+	return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type],
+			REISERFS_SB(sb)->s_jquota_fmt, type);
 }
 
 /*
-- 
cgit v1.2.2


From 2fa389c5eb8c97d621653184d2adf5fdbd4a3167 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jun 2005 00:09:16 -0700
Subject: [PATCH] quota: sanitize dentry handling in vfs_quota_on_mount

Use lookup_one_len instead of opencoding a simplified lookup using
lookup_hash with a fake hash.

Also there's no need anymore for the d_invalidate as we have a completely
valid dentry now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dquot.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/dquot.c b/fs/dquot.c
index 343c036556..37212b039a 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1522,11 +1522,10 @@ out_path:
 int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
 		int format_id, int type)
 {
-	struct qstr name = {.name = qf_name, .len = 0, .len = strlen(qf_name)};
 	struct dentry *dentry;
 	int error;
 
-	dentry = lookup_hash(&name, sb->s_root);
+	dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
@@ -1534,12 +1533,6 @@ int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
 	if (!error)
 		error = vfs_quota_on_inode(dentry->d_inode, type, format_id);
 
-	/*
-	 * Now invalidate and put the dentry - quota got its own reference
-	 * to inode and dentry has at least wrong hash so we had better
-	 * throw it away.
-	 */
-	d_invalidate(dentry);
 	dput(dentry);
 	return error;
 }
-- 
cgit v1.2.2


From d6e711448137ca3301512cec41a2c2ce852b3d0a Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Thu, 23 Jun 2005 00:09:43 -0700
Subject: [PATCH] setuid core dump

Add a new `suid_dumpable' sysctl:

This value can be used to query and set the core dump mode for setuid
or otherwise protected/tainted binaries. The modes are

0 - (default) - traditional behaviour.  Any process which has changed
    privilege levels or is execute only will not be dumped

1 - (debug) - all processes dump core when possible.  The core dump is
    owned by the current user and no security is applied.  This is intended
    for system debugging situations only.  Ptrace is unchecked.

2 - (suidsafe) - any binary which normally would not be dumped is dumped
    readable by root only.  This allows the end user to remove such a dump but
    not access it directly.  For security reasons core dumps in this mode will
    not overwrite one another or other files.  This mode is appropriate when
    adminstrators are attempting to debug problems in a normal environment.

(akpm:

> > +EXPORT_SYMBOL(suid_dumpable);
>
> EXPORT_SYMBOL_GPL?

No problem to me.

> >  	if (current->euid == current->uid && current->egid == current->gid)
> >  		current->mm->dumpable = 1;
>
> Should this be SUID_DUMP_USER?

Actually the feedback I had from last time was that the SUID_ defines
should go because its clearer to follow the numbers. They can go
everywhere (and there are lots of places where dumpable is tested/used
as a bool in untouched code)

> Maybe this should be renamed to `dump_policy' or something.  Doing that
> would help us catch any code which isn't using the #defines, too.

Fair comment. The patch was designed to be easy to maintain for Red Hat
rather than for merging. Changing that field would create a gigantic
diff because it is used all over the place.

)

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c      | 23 +++++++++++++++++++++--
 fs/proc/base.c |  6 ++++--
 2 files changed, 25 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 3a4b35a14c..48871917d3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -58,6 +58,9 @@
 
 int core_uses_pid;
 char core_pattern[65] = "core";
+int suid_dumpable = 0;
+
+EXPORT_SYMBOL(suid_dumpable);
 /* The maximal length of core_pattern is also specified in sysctl.c */
 
 static struct linux_binfmt *formats;
@@ -864,6 +867,9 @@ int flush_old_exec(struct linux_binprm * bprm)
 
 	if (current->euid == current->uid && current->egid == current->gid)
 		current->mm->dumpable = 1;
+	else
+		current->mm->dumpable = suid_dumpable;
+
 	name = bprm->filename;
 
 	/* Copies the binary name from after last slash */
@@ -884,7 +890,7 @@ int flush_old_exec(struct linux_binprm * bprm)
 	    permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL) ||
 	    (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
 		suid_keys(current);
-		current->mm->dumpable = 0;
+		current->mm->dumpable = suid_dumpable;
 	}
 
 	/* An exec changes our domain. We are no longer part of the thread
@@ -1432,6 +1438,8 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	struct inode * inode;
 	struct file * file;
 	int retval = 0;
+	int fsuid = current->fsuid;
+	int flag = 0;
 
 	binfmt = current->binfmt;
 	if (!binfmt || !binfmt->core_dump)
@@ -1441,6 +1449,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 		up_write(&mm->mmap_sem);
 		goto fail;
 	}
+
+	/*
+	 *	We cannot trust fsuid as being the "true" uid of the
+	 *	process nor do we know its entire history. We only know it
+	 *	was tainted so we dump it as root in mode 2.
+	 */
+	if (mm->dumpable == 2) {	/* Setuid core dump mode */
+		flag = O_EXCL;		/* Stop rewrite attacks */
+		current->fsuid = 0;	/* Dump root private */
+	}
 	mm->dumpable = 0;
 	init_completion(&mm->core_done);
 	spin_lock_irq(&current->sighand->siglock);
@@ -1466,7 +1484,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
  	lock_kernel();
 	format_corename(corename, core_pattern, signr);
 	unlock_kernel();
-	file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE, 0600);
+	file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600);
 	if (IS_ERR(file))
 		goto fail_unlock;
 	inode = file->f_dentry->d_inode;
@@ -1491,6 +1509,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 close_fail:
 	filp_close(file, NULL);
 fail_unlock:
+	current->fsuid = fsuid;
 	complete_all(&mm->core_done);
 fail:
 	return retval;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index e31903aadd..ace151fa48 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -314,7 +314,7 @@ static int may_ptrace_attach(struct task_struct *task)
 	     (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
 		goto out;
 	rmb();
-	if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
+	if (task->mm->dumpable != 1 && !capable(CAP_SYS_PTRACE))
 		goto out;
 	if (security_ptrace(current, task))
 		goto out;
@@ -1113,7 +1113,9 @@ static int task_dumpable(struct task_struct *task)
 	if (mm)
 		dumpable = mm->dumpable;
 	task_unlock(task);
-	return dumpable;
+	if(dumpable == 1)
+		return 1;
+	return 0;
 }
 
 
-- 
cgit v1.2.2


From acfa1823d33859b0db77701726c9ca5ccc6e6f25 Mon Sep 17 00:00:00 2001
From: Andreas Dilger <adilger@clusterfs.com>
Date: Thu, 23 Jun 2005 00:09:45 -0700
Subject: [PATCH] Support for dx directories in ext3_get_parent (NFSD)

Henrik Grubbstrom noted:

The 2.6.10 ext3_get_parent attempts to use ext3_find_entry to look up the
entry "..", which fails for dx directories since ".." is not present in the
directory hash table.  The patch below solves this by looking up the dotdot
entry in the dx_root block.

Typical symptoms of the above bug are intermittent claims by nfsd that
files or directories are missing on exported ext3 filesystems.

cf https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=3D150759 and
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=3D144556

ext3_get_parent() is IMHO the wrong place to fix this bug as it introduces
a lot of internals from htree into that function.  Instead, I think this
should be fixed in ext3_find_entry() as in the below patch.  This has the
added advantage that it works for any callers of ext3_find_entry() and not
just ext3_lookup_parent().

Signed-off-by: Andreas Dilger <adilger@clusterfs.com>
Signed-off-by: Henrik Grubbstrom <grubba@grubba.org>
Cc: <ext2-devel@lists.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/namei.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 79742d824a..60e44e6dd7 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -932,8 +932,16 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
 	struct inode *dir = dentry->d_parent->d_inode;
 
 	sb = dir->i_sb;
-	if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
-		return NULL;
+	/* NFS may look up ".." - look at dx_root directory block */
+	if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
+		if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
+			return NULL;
+	} else {
+		frame = frames;
+		frame->bh = NULL;			/* for dx_release() */
+		frame->at = (struct dx_entry *)frames;	/* hack for zero entry*/
+		dx_set_block(frame->at, 0);		/* dx_root block is 0 */
+	}
 	hash = hinfo.hash;
 	do {
 		block = dx_get_block(frame->at);
-- 
cgit v1.2.2


From c663e5d80ebec426916ad2aa5400c7ec99aa572e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jun 2005 00:09:49 -0700
Subject: [PATCH] add some comments to lookup_create()

In a duplicate of lookup_create in the af_unix code Al commented what's
going on nicely, so let's bring that over to lookup_create before the copy
is going away (I'll send a patch soon)

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index a7f7f44119..fa8df81ce8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1577,19 +1577,35 @@ do_link:
  *
  * Simple function to lookup and return a dentry and create it
  * if it doesn't exist.  Is SMP-safe.
+ *
+ * Returns with nd->dentry->d_inode->i_sem locked.
  */
 struct dentry *lookup_create(struct nameidata *nd, int is_dir)
 {
-	struct dentry *dentry;
+	struct dentry *dentry = ERR_PTR(-EEXIST);
 
 	down(&nd->dentry->d_inode->i_sem);
-	dentry = ERR_PTR(-EEXIST);
+	/*
+	 * Yucky last component or no last component at all?
+	 * (foo/., foo/.., /////)
+	 */
 	if (nd->last_type != LAST_NORM)
 		goto fail;
 	nd->flags &= ~LOOKUP_PARENT;
+
+	/*
+	 * Do the final lookup.
+	 */
 	dentry = lookup_hash(&nd->last, nd->dentry);
 	if (IS_ERR(dentry))
 		goto fail;
+
+	/*
+	 * Special case - lookup gave negative, but... we had foo/bar/
+	 * From the vfs_mknod() POV we just have a negative dentry -
+	 * all is fine. Let's be bastards - you had / on the end, you've
+	 * been asking for (non-existent) directory. -ENOENT for you.
+	 */
 	if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
 		goto enoent;
 	return dentry;
-- 
cgit v1.2.2


From af4d2ecbf007b7df3db7a41eedccdc05b8006d0b Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@sw.ru>
Date: Thu, 23 Jun 2005 00:09:50 -0700
Subject: [PATCH] Fix of bogus file max limit messages

This patch fixes incorrect and bogus kernel messages that file-max limit
reached when the allocation fails

Signed-Off-By: Kirill Korotaev <dev@sw.ru>
Signed-Off-By: Denis Lunev <den@sw.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/file_table.c | 57 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 30 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/file_table.c b/fs/file_table.c
index 03d83cb686..fa7849fae1 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -63,42 +63,45 @@ static inline void file_free(struct file *f)
  */
 struct file *get_empty_filp(void)
 {
-static int old_max;
+	static int old_max;
 	struct file * f;
 
 	/*
 	 * Privileged users can go above max_files
 	 */
-	if (files_stat.nr_files < files_stat.max_files ||
-				capable(CAP_SYS_ADMIN)) {
-		f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
-		if (f) {
-			memset(f, 0, sizeof(*f));
-			if (security_file_alloc(f)) {
-				file_free(f);
-				goto fail;
-			}
-			eventpoll_init_file(f);
-			atomic_set(&f->f_count, 1);
-			f->f_uid = current->fsuid;
-			f->f_gid = current->fsgid;
-			rwlock_init(&f->f_owner.lock);
-			/* f->f_version: 0 */
-			INIT_LIST_HEAD(&f->f_list);
-			f->f_maxcount = INT_MAX;
-			return f;
-		}
-	}
-
+	if (files_stat.nr_files >= files_stat.max_files &&
+				!capable(CAP_SYS_ADMIN))
+		goto over;
+
+	f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
+	if (f == NULL)
+		goto fail;
+
+	memset(f, 0, sizeof(*f));
+	if (security_file_alloc(f))
+		goto fail_sec;
+
+	eventpoll_init_file(f);
+	atomic_set(&f->f_count, 1);
+	f->f_uid = current->fsuid;
+	f->f_gid = current->fsgid;
+	rwlock_init(&f->f_owner.lock);
+	/* f->f_version: 0 */
+	INIT_LIST_HEAD(&f->f_list);
+	f->f_maxcount = INT_MAX;
+	return f;
+
+over:
 	/* Ran out of filps - report that */
-	if (files_stat.max_files >= old_max) {
+	if (files_stat.nr_files > old_max) {
 		printk(KERN_INFO "VFS: file-max limit %d reached\n",
 					files_stat.max_files);
-		old_max = files_stat.max_files;
-	} else {
-		/* Big problems... */
-		printk(KERN_WARNING "VFS: filp allocation failed\n");
+		old_max = files_stat.nr_files;
 	}
+	goto fail;
+
+fail_sec:
+	file_free(f);
 fail:
 	return NULL;
 }
-- 
cgit v1.2.2


From 618f06362ae3f60f95d7b0e666de25ee6ae35679 Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@sw.ru>
Date: Thu, 23 Jun 2005 00:09:54 -0700
Subject: [PATCH] O(1) sb list traversing on syncs

This patch removes O(n^2) super block loops in sync_inodes(),
sync_filesystems() etc.  in favour of using __put_super_and_need_restart()
which I introduced earlier.  We faced a noticably long freezes on sb
syncing when there are thousands of super blocks in the system.

Signed-Off-By: Kirill Korotaev <dev@sw.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fs-writeback.c | 64 ++++++++++++++++++------------------------
 fs/quota.c        | 60 ++++++++++++++++------------------------
 fs/super.c        | 83 ++++++++++++++++++++++++++++++-------------------------
 3 files changed, 96 insertions(+), 111 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8e050fa582..e94ab398b7 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -485,32 +485,6 @@ static void set_sb_syncing(int val)
 	spin_unlock(&sb_lock);
 }
 
-/*
- * Find a superblock with inodes that need to be synced
- */
-static struct super_block *get_super_to_sync(void)
-{
-	struct super_block *sb;
-restart:
-	spin_lock(&sb_lock);
-	sb = sb_entry(super_blocks.prev);
-	for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
-		if (sb->s_syncing)
-			continue;
-		sb->s_syncing = 1;
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (!sb->s_root) {
-			drop_super(sb);
-			goto restart;
-		}
-		return sb;
-	}
-	spin_unlock(&sb_lock);
-	return NULL;
-}
-
 /**
  * sync_inodes - writes all inodes to disk
  * @wait: wait for completion
@@ -530,23 +504,39 @@ restart:
  * outstanding dirty inodes, the writeback goes block-at-a-time within the
  * filesystem's write_inode().  This is extremely slow.
  */
-void sync_inodes(int wait)
+static void __sync_inodes(int wait)
 {
 	struct super_block *sb;
 
-	set_sb_syncing(0);
-	while ((sb = get_super_to_sync()) != NULL) {
-		sync_inodes_sb(sb, 0);
-		sync_blockdev(sb->s_bdev);
-		drop_super(sb);
+	spin_lock(&sb_lock);
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_syncing)
+			continue;
+		sb->s_syncing = 1;
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+		down_read(&sb->s_umount);
+		if (sb->s_root) {
+			sync_inodes_sb(sb, wait);
+			sync_blockdev(sb->s_bdev);
+		}
+		up_read(&sb->s_umount);
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
+			goto restart;
 	}
+	spin_unlock(&sb_lock);
+}
+
+void sync_inodes(int wait)
+{
+	set_sb_syncing(0);
+	__sync_inodes(0);
+
 	if (wait) {
 		set_sb_syncing(0);
-		while ((sb = get_super_to_sync()) != NULL) {
-			sync_inodes_sb(sb, 1);
-			sync_blockdev(sb->s_bdev);
-			drop_super(sb);
-		}
+		__sync_inodes(1);
 	}
 }
 
diff --git a/fs/quota.c b/fs/quota.c
index 3f0333a51a..f5d1cff551 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -149,36 +149,6 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t
 	return error;
 }
 
-static struct super_block *get_super_to_sync(int type)
-{
-	struct list_head *head;
-	int cnt, dirty;
-
-restart:
-	spin_lock(&sb_lock);
-	list_for_each(head, &super_blocks) {
-		struct super_block *sb = list_entry(head, struct super_block, s_list);
-
-		/* This test just improves performance so it needn't be reliable... */
-		for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
-			if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
-			    && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
-				dirty = 1;
-		if (!dirty)
-			continue;
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (!sb->s_root) {
-			drop_super(sb);
-			goto restart;
-		}
-		return sb;
-	}
-	spin_unlock(&sb_lock);
-	return NULL;
-}
-
 static void quota_sync_sb(struct super_block *sb, int type)
 {
 	int cnt;
@@ -219,17 +189,35 @@ static void quota_sync_sb(struct super_block *sb, int type)
 
 void sync_dquots(struct super_block *sb, int type)
 {
+	int cnt, dirty;
+
 	if (sb) {
 		if (sb->s_qcop->quota_sync)
 			quota_sync_sb(sb, type);
+		return;
 	}
-	else {
-		while ((sb = get_super_to_sync(type)) != NULL) {
-			if (sb->s_qcop->quota_sync)
-				quota_sync_sb(sb, type);
-			drop_super(sb);
-		}
+
+	spin_lock(&sb_lock);
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		/* This test just improves performance so it needn't be reliable... */
+		for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
+			if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
+			    && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
+				dirty = 1;
+		if (!dirty)
+			continue;
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+		down_read(&sb->s_umount);
+		if (sb->s_root && sb->s_qcop->quota_sync)
+			quota_sync_sb(sb, type);
+		up_read(&sb->s_umount);
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
+			goto restart;
 	}
+	spin_unlock(&sb_lock);
 }
 
 /* Copy parameters and call proper function */
diff --git a/fs/super.c b/fs/super.c
index 573bcc81bb..25bc1ec6bc 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -341,20 +341,22 @@ static inline void write_super(struct super_block *sb)
  */
 void sync_supers(void)
 {
-	struct super_block * sb;
-restart:
+	struct super_block *sb;
+
 	spin_lock(&sb_lock);
-	sb = sb_entry(super_blocks.next);
-	while (sb != sb_entry(&super_blocks))
+restart:
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (sb->s_dirt) {
 			sb->s_count++;
 			spin_unlock(&sb_lock);
 			down_read(&sb->s_umount);
 			write_super(sb);
-			drop_super(sb);
-			goto restart;
-		} else
-			sb = sb_entry(sb->s_list.next);
+			up_read(&sb->s_umount);
+			spin_lock(&sb_lock);
+			if (__put_super_and_need_restart(sb))
+				goto restart;
+		}
+	}
 	spin_unlock(&sb_lock);
 }
 
@@ -381,20 +383,16 @@ void sync_filesystems(int wait)
 
 	down(&mutex);		/* Could be down_interruptible */
 	spin_lock(&sb_lock);
-	for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
-			sb = sb_entry(sb->s_list.next)) {
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (!sb->s_op->sync_fs)
 			continue;
 		if (sb->s_flags & MS_RDONLY)
 			continue;
 		sb->s_need_sync_fs = 1;
 	}
-	spin_unlock(&sb_lock);
 
 restart:
-	spin_lock(&sb_lock);
-	for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
-			sb = sb_entry(sb->s_list.next)) {
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (!sb->s_need_sync_fs)
 			continue;
 		sb->s_need_sync_fs = 0;
@@ -405,8 +403,11 @@ restart:
 		down_read(&sb->s_umount);
 		if (sb->s_root && (wait || sb->s_dirt))
 			sb->s_op->sync_fs(sb, wait);
-		drop_super(sb);
-		goto restart;
+		up_read(&sb->s_umount);
+		/* restart only when sb is no longer on the list */
+		spin_lock(&sb_lock);
+		if (__put_super_and_need_restart(sb))
+			goto restart;
 	}
 	spin_unlock(&sb_lock);
 	up(&mutex);
@@ -422,21 +423,25 @@ restart:
 
 struct super_block * get_super(struct block_device *bdev)
 {
-	struct list_head *p;
+	struct super_block *sb;
+
 	if (!bdev)
 		return NULL;
-rescan:
+
 	spin_lock(&sb_lock);
-	list_for_each(p, &super_blocks) {
-		struct super_block *s = sb_entry(p);
-		if (s->s_bdev == bdev) {
-			s->s_count++;
+rescan:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_bdev == bdev) {
+			sb->s_count++;
 			spin_unlock(&sb_lock);
-			down_read(&s->s_umount);
-			if (s->s_root)
-				return s;
-			drop_super(s);
-			goto rescan;
+			down_read(&sb->s_umount);
+			if (sb->s_root)
+				return sb;
+			up_read(&sb->s_umount);
+			/* restart only when sb is no longer on the list */
+			spin_lock(&sb_lock);
+			if (__put_super_and_need_restart(sb))
+				goto rescan;
 		}
 	}
 	spin_unlock(&sb_lock);
@@ -447,20 +452,22 @@ EXPORT_SYMBOL(get_super);
  
 struct super_block * user_get_super(dev_t dev)
 {
-	struct list_head *p;
+	struct super_block *sb;
 
-rescan:
 	spin_lock(&sb_lock);
-	list_for_each(p, &super_blocks) {
-		struct super_block *s = sb_entry(p);
-		if (s->s_dev ==  dev) {
-			s->s_count++;
+rescan:
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_dev ==  dev) {
+			sb->s_count++;
 			spin_unlock(&sb_lock);
-			down_read(&s->s_umount);
-			if (s->s_root)
-				return s;
-			drop_super(s);
-			goto rescan;
+			down_read(&sb->s_umount);
+			if (sb->s_root)
+				return sb;
+			up_read(&sb->s_umount);
+			/* restart only when sb is no longer on the list */
+			spin_lock(&sb_lock);
+			if (__put_super_and_need_restart(sb))
+				goto rescan;
 		}
 	}
 	spin_unlock(&sb_lock);
-- 
cgit v1.2.2


From ef3daeda7b58f046f94b26637d500354038d39f4 Mon Sep 17 00:00:00 2001
From: Yoav Zach <yoav_zach@yahoo.com>
Date: Thu, 23 Jun 2005 00:09:58 -0700
Subject: [PATCH] Don't force O_LARGEFILE for 32 bit processes on ia64

In ia64 kernel, the O_LARGEFILE flag is forced when opening a file.  This
is problematic for execution of 32 bit processes, which are not largefile
aware, either by SW emulation or by HW execution.

For such processes, the problem is two-fold:

1) When trying to open a file that is larger than 4G
   the operation should fail, but it's not
2) Writing to offset larger than 4G should fail, but
   it's not

The proposed patch takes advantage of the way 32 bit processes are
identified in ia64 systems.  Such processes have PER_LINUX32 for their
personality.  With the patch, the ia64 kernel will not enforce the
O_LARGEFILE flag if the current process has PER_LINUX32 set.  The behavior
for all other architectures remains unchanged.

Signed-off-by: Yoav Zach <yoav.zach@intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/open.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 963bd81a44..2ebb72c1a8 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -21,6 +21,7 @@
 #include <linux/vfs.h>
 #include <asm/uaccess.h>
 #include <linux/fs.h>
+#include <linux/personality.h>
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
 
@@ -935,9 +936,9 @@ asmlinkage long sys_open(const char __user * filename, int flags, int mode)
 	char * tmp;
 	int fd, error;
 
-#if BITS_PER_LONG != 32
-	flags |= O_LARGEFILE;
-#endif
+	if (force_o_largefile())
+		flags |= O_LARGEFILE;
+
 	tmp = getname(filename);
 	fd = PTR_ERR(tmp);
 	if (!IS_ERR(tmp)) {
-- 
cgit v1.2.2


From dfb388bf8a328f206bba33933dd97230f412238b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Thu, 23 Jun 2005 00:10:02 -0700
Subject: [PATCH] factor out common code in sys_fsync/sys_fdatasync

This patch consolidates sys_fsync and sys_fdatasync.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 45 ++++++++++-----------------------------------
 1 file changed, 10 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 0befa724ab..12bdb27911 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -331,7 +331,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
 	return ret;
 }
 
-asmlinkage long sys_fsync(unsigned int fd)
+static long do_fsync(unsigned int fd, int datasync)
 {
 	struct file * file;
 	struct address_space *mapping;
@@ -342,14 +342,14 @@ asmlinkage long sys_fsync(unsigned int fd)
 	if (!file)
 		goto out;
 
-	mapping = file->f_mapping;
-
 	ret = -EINVAL;
 	if (!file->f_op || !file->f_op->fsync) {
 		/* Why?  We can still call filemap_fdatawrite */
 		goto out_putf;
 	}
 
+	mapping = file->f_mapping;
+
 	current->flags |= PF_SYNCWRITE;
 	ret = filemap_fdatawrite(mapping);
 
@@ -358,7 +358,7 @@ asmlinkage long sys_fsync(unsigned int fd)
 	 * which could cause livelocks in fsync_buffers_list
 	 */
 	down(&mapping->host->i_sem);
-	err = file->f_op->fsync(file, file->f_dentry, 0);
+	err = file->f_op->fsync(file, file->f_dentry, datasync);
 	if (!ret)
 		ret = err;
 	up(&mapping->host->i_sem);
@@ -373,39 +373,14 @@ out:
 	return ret;
 }
 
-asmlinkage long sys_fdatasync(unsigned int fd)
+asmlinkage long sys_fsync(unsigned int fd)
 {
-	struct file * file;
-	struct address_space *mapping;
-	int ret, err;
-
-	ret = -EBADF;
-	file = fget(fd);
-	if (!file)
-		goto out;
-
-	ret = -EINVAL;
-	if (!file->f_op || !file->f_op->fsync)
-		goto out_putf;
-
-	mapping = file->f_mapping;
-
-	current->flags |= PF_SYNCWRITE;
-	ret = filemap_fdatawrite(mapping);
-	down(&mapping->host->i_sem);
-	err = file->f_op->fsync(file, file->f_dentry, 1);
-	if (!ret)
-		ret = err;
-	up(&mapping->host->i_sem);
-	err = filemap_fdatawait(mapping);
-	if (!ret)
-		ret = err;
-	current->flags &= ~PF_SYNCWRITE;
+	return do_fsync(fd, 0);
+}
 
-out_putf:
-	fput(file);
-out:
-	return ret;
+asmlinkage long sys_fdatasync(unsigned int fd)
+{
+	return do_fsync(fd, 1);
 }
 
 /*
-- 
cgit v1.2.2


From b030a4dd609e167da7f73c2d1fa5af864a0aea17 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 23 Jun 2005 00:10:03 -0700
Subject: [PATCH] Remove eventpoll macro obfuscation

This patch gets rid of some macro obfuscation from fs/eventpoll.c by
removing slab allocator wrappers and converting macros to static inline
functions.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/eventpoll.c | 195 ++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 110 insertions(+), 85 deletions(-)

(limited to 'fs')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9900e33365..6ab1dd0ca9 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -101,57 +101,6 @@
 /* Maximum number of poll wake up nests we are allowing */
 #define EP_MAX_POLLWAKE_NESTS 4
 
-/* Macro to allocate a "struct epitem" from the slab cache */
-#define EPI_MEM_ALLOC()	(struct epitem *) kmem_cache_alloc(epi_cache, SLAB_KERNEL)
-
-/* Macro to free a "struct epitem" to the slab cache */
-#define EPI_MEM_FREE(p) kmem_cache_free(epi_cache, p)
-
-/* Macro to allocate a "struct eppoll_entry" from the slab cache */
-#define PWQ_MEM_ALLOC()	(struct eppoll_entry *) kmem_cache_alloc(pwq_cache, SLAB_KERNEL)
-
-/* Macro to free a "struct eppoll_entry" to the slab cache */
-#define PWQ_MEM_FREE(p) kmem_cache_free(pwq_cache, p)
-
-/* Fast test to see if the file is an evenpoll file */
-#define IS_FILE_EPOLL(f) ((f)->f_op == &eventpoll_fops)
-
-/* Setup the structure that is used as key for the rb-tree */
-#define EP_SET_FFD(p, f, d) do { (p)->file = (f); (p)->fd = (d); } while (0)
-
-/* Compare rb-tree keys */
-#define EP_CMP_FFD(p1, p2) ((p1)->file > (p2)->file ? +1: \
-			    ((p1)->file < (p2)->file ? -1: (p1)->fd - (p2)->fd))
-
-/* Special initialization for the rb-tree node to detect linkage */
-#define EP_RB_INITNODE(n) (n)->rb_parent = (n)
-
-/* Removes a node from the rb-tree and marks it for a fast is-linked check */
-#define EP_RB_ERASE(n, r) do { rb_erase(n, r); (n)->rb_parent = (n); } while (0)
-
-/* Fast check to verify that the item is linked to the main rb-tree */
-#define EP_RB_LINKED(n) ((n)->rb_parent != (n))
-
-/*
- * Remove the item from the list and perform its initialization.
- * This is useful for us because we can test if the item is linked
- * using "EP_IS_LINKED(p)".
- */
-#define EP_LIST_DEL(p) do { list_del(p); INIT_LIST_HEAD(p); } while (0)
-
-/* Tells us if the item is currently linked */
-#define EP_IS_LINKED(p) (!list_empty(p))
-
-/* Get the "struct epitem" from a wait queue pointer */
-#define EP_ITEM_FROM_WAIT(p) ((struct epitem *) container_of(p, struct eppoll_entry, wait)->base)
-
-/* Get the "struct epitem" from an epoll queue wrapper */
-#define EP_ITEM_FROM_EPQUEUE(p) (container_of(p, struct ep_pqueue, pt)->epi)
-
-/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
-#define EP_OP_HASH_EVENT(op) ((op) != EPOLL_CTL_DEL)
-
-
 struct epoll_filefd {
 	struct file *file;
 	int fd;
@@ -357,6 +306,82 @@ static struct dentry_operations eventpollfs_dentry_operations = {
 
 
 
+/* Fast test to see if the file is an evenpoll file */
+static inline int is_file_epoll(struct file *f)
+{
+	return f->f_op == &eventpoll_fops;
+}
+
+/* Setup the structure that is used as key for the rb-tree */
+static inline void ep_set_ffd(struct epoll_filefd *ffd,
+			      struct file *file, int fd)
+{
+	ffd->file = file;
+	ffd->fd = fd;
+}
+
+/* Compare rb-tree keys */
+static inline int ep_cmp_ffd(struct epoll_filefd *p1,
+			     struct epoll_filefd *p2)
+{
+	return (p1->file > p2->file ? +1:
+	        (p1->file < p2->file ? -1 : p1->fd - p2->fd));
+}
+
+/* Special initialization for the rb-tree node to detect linkage */
+static inline void ep_rb_initnode(struct rb_node *n)
+{
+	n->rb_parent = n;
+}
+
+/* Removes a node from the rb-tree and marks it for a fast is-linked check */
+static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r)
+{
+	rb_erase(n, r);
+	n->rb_parent = n;
+}
+
+/* Fast check to verify that the item is linked to the main rb-tree */
+static inline int ep_rb_linked(struct rb_node *n)
+{
+	return n->rb_parent != n;
+}
+
+/*
+ * Remove the item from the list and perform its initialization.
+ * This is useful for us because we can test if the item is linked
+ * using "ep_is_linked(p)".
+ */
+static inline void ep_list_del(struct list_head *p)
+{
+	list_del(p);
+	INIT_LIST_HEAD(p);
+}
+
+/* Tells us if the item is currently linked */
+static inline int ep_is_linked(struct list_head *p)
+{
+	return !list_empty(p);
+}
+
+/* Get the "struct epitem" from a wait queue pointer */
+static inline struct epitem * ep_item_from_wait(wait_queue_t *p)
+{
+	return container_of(p, struct eppoll_entry, wait)->base;
+}
+
+/* Get the "struct epitem" from an epoll queue wrapper */
+static inline struct epitem * ep_item_from_epqueue(poll_table *p)
+{
+	return container_of(p, struct ep_pqueue, pt)->epi;
+}
+
+/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
+static inline int ep_op_hash_event(int op)
+{
+	return op != EPOLL_CTL_DEL;
+}
+
 /* Initialize the poll safe wake up structure */
 static void ep_poll_safewake_init(struct poll_safewake *psw)
 {
@@ -456,7 +481,7 @@ void eventpoll_release_file(struct file *file)
 		epi = list_entry(lsthead->next, struct epitem, fllink);
 
 		ep = epi->ep;
-		EP_LIST_DEL(&epi->fllink);
+		ep_list_del(&epi->fllink);
 		down_write(&ep->sem);
 		ep_remove(ep, epi);
 		up_write(&ep->sem);
@@ -534,7 +559,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
 		     current, epfd, op, fd, event));
 
 	error = -EFAULT;
-	if (EP_OP_HASH_EVENT(op) &&
+	if (ep_op_hash_event(op) &&
 	    copy_from_user(&epds, event, sizeof(struct epoll_event)))
 		goto eexit_1;
 
@@ -560,7 +585,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
 	 * adding an epoll file descriptor inside itself.
 	 */
 	error = -EINVAL;
-	if (file == tfile || !IS_FILE_EPOLL(file))
+	if (file == tfile || !is_file_epoll(file))
 		goto eexit_3;
 
 	/*
@@ -656,7 +681,7 @@ asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
 	 * the user passed to us _is_ an eventpoll file.
 	 */
 	error = -EINVAL;
-	if (!IS_FILE_EPOLL(file))
+	if (!is_file_epoll(file))
 		goto eexit_2;
 
 	/*
@@ -831,11 +856,11 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
 	struct epitem *epi, *epir = NULL;
 	struct epoll_filefd ffd;
 
-	EP_SET_FFD(&ffd, file, fd);
+	ep_set_ffd(&ffd, file, fd);
 	read_lock_irqsave(&ep->lock, flags);
 	for (rbp = ep->rbr.rb_node; rbp; ) {
 		epi = rb_entry(rbp, struct epitem, rbn);
-		kcmp = EP_CMP_FFD(&ffd, &epi->ffd);
+		kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
 		if (kcmp > 0)
 			rbp = rbp->rb_right;
 		else if (kcmp < 0)
@@ -875,7 +900,7 @@ static void ep_release_epitem(struct epitem *epi)
 {
 
 	if (atomic_dec_and_test(&epi->usecnt))
-		EPI_MEM_FREE(epi);
+		kmem_cache_free(epi_cache, epi);
 }
 
 
@@ -886,10 +911,10 @@ static void ep_release_epitem(struct epitem *epi)
 static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
 				 poll_table *pt)
 {
-	struct epitem *epi = EP_ITEM_FROM_EPQUEUE(pt);
+	struct epitem *epi = ep_item_from_epqueue(pt);
 	struct eppoll_entry *pwq;
 
-	if (epi->nwait >= 0 && (pwq = PWQ_MEM_ALLOC())) {
+	if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, SLAB_KERNEL))) {
 		init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
 		pwq->whead = whead;
 		pwq->base = epi;
@@ -912,7 +937,7 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
 	while (*p) {
 		parent = *p;
 		epic = rb_entry(parent, struct epitem, rbn);
-		kcmp = EP_CMP_FFD(&epi->ffd, &epic->ffd);
+		kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd);
 		if (kcmp > 0)
 			p = &parent->rb_right;
 		else
@@ -932,17 +957,17 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 	struct ep_pqueue epq;
 
 	error = -ENOMEM;
-	if (!(epi = EPI_MEM_ALLOC()))
+	if (!(epi = kmem_cache_alloc(epi_cache, SLAB_KERNEL)))
 		goto eexit_1;
 
 	/* Item initialization follow here ... */
-	EP_RB_INITNODE(&epi->rbn);
+	ep_rb_initnode(&epi->rbn);
 	INIT_LIST_HEAD(&epi->rdllink);
 	INIT_LIST_HEAD(&epi->fllink);
 	INIT_LIST_HEAD(&epi->txlink);
 	INIT_LIST_HEAD(&epi->pwqlist);
 	epi->ep = ep;
-	EP_SET_FFD(&epi->ffd, tfile, fd);
+	ep_set_ffd(&epi->ffd, tfile, fd);
 	epi->event = *event;
 	atomic_set(&epi->usecnt, 1);
 	epi->nwait = 0;
@@ -978,7 +1003,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 	ep_rbtree_insert(ep, epi);
 
 	/* If the file is already "ready" we drop it inside the ready list */
-	if ((revents & event->events) && !EP_IS_LINKED(&epi->rdllink)) {
+	if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
 		list_add_tail(&epi->rdllink, &ep->rdllist);
 
 		/* Notify waiting tasks that events are available */
@@ -1007,11 +1032,11 @@ eexit_2:
 	 * allocated wait queue.
 	 */
 	write_lock_irqsave(&ep->lock, flags);
-	if (EP_IS_LINKED(&epi->rdllink))
-		EP_LIST_DEL(&epi->rdllink);
+	if (ep_is_linked(&epi->rdllink))
+		ep_list_del(&epi->rdllink);
 	write_unlock_irqrestore(&ep->lock, flags);
 
-	EPI_MEM_FREE(epi);
+	kmem_cache_free(epi_cache, epi);
 eexit_1:
 	return error;
 }
@@ -1050,14 +1075,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
 	 * If the item is not linked to the hash it means that it's on its
 	 * way toward the removal. Do nothing in this case.
 	 */
-	if (EP_RB_LINKED(&epi->rbn)) {
+	if (ep_rb_linked(&epi->rbn)) {
 		/*
 		 * If the item is "hot" and it is not registered inside the ready
 		 * list, push it inside. If the item is not "hot" and it is currently
 		 * registered inside the ready list, unlink it.
 		 */
 		if (revents & event->events) {
-			if (!EP_IS_LINKED(&epi->rdllink)) {
+			if (!ep_is_linked(&epi->rdllink)) {
 				list_add_tail(&epi->rdllink, &ep->rdllist);
 
 				/* Notify waiting tasks that events are available */
@@ -1097,9 +1122,9 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
 		while (!list_empty(lsthead)) {
 			pwq = list_entry(lsthead->next, struct eppoll_entry, llink);
 
-			EP_LIST_DEL(&pwq->llink);
+			ep_list_del(&pwq->llink);
 			remove_wait_queue(pwq->whead, &pwq->wait);
-			PWQ_MEM_FREE(pwq);
+			kmem_cache_free(pwq_cache, pwq);
 		}
 	}
 }
@@ -1118,7 +1143,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
 	 * The check protect us from doing a double unlink ( crash ).
 	 */
 	error = -ENOENT;
-	if (!EP_RB_LINKED(&epi->rbn))
+	if (!ep_rb_linked(&epi->rbn))
 		goto eexit_1;
 
 	/*
@@ -1133,14 +1158,14 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
 	 * This operation togheter with the above check closes the door to
 	 * double unlinks.
 	 */
-	EP_RB_ERASE(&epi->rbn, &ep->rbr);
+	ep_rb_erase(&epi->rbn, &ep->rbr);
 
 	/*
 	 * If the item we are going to remove is inside the ready file descriptors
 	 * we want to remove it from this list to avoid stale events.
 	 */
-	if (EP_IS_LINKED(&epi->rdllink))
-		EP_LIST_DEL(&epi->rdllink);
+	if (ep_is_linked(&epi->rdllink))
+		ep_list_del(&epi->rdllink);
 
 	error = 0;
 eexit_1:
@@ -1174,8 +1199,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
 
 	/* Remove the current item from the list of epoll hooks */
 	spin_lock(&file->f_ep_lock);
-	if (EP_IS_LINKED(&epi->fllink))
-		EP_LIST_DEL(&epi->fllink);
+	if (ep_is_linked(&epi->fllink))
+		ep_list_del(&epi->fllink);
 	spin_unlock(&file->f_ep_lock);
 
 	/* We need to acquire the write IRQ lock before calling ep_unlink() */
@@ -1210,7 +1235,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 {
 	int pwake = 0;
 	unsigned long flags;
-	struct epitem *epi = EP_ITEM_FROM_WAIT(wait);
+	struct epitem *epi = ep_item_from_wait(wait);
 	struct eventpoll *ep = epi->ep;
 
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
@@ -1228,7 +1253,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 		goto is_disabled;
 
 	/* If this file is already in the ready list we exit soon */
-	if (EP_IS_LINKED(&epi->rdllink))
+	if (ep_is_linked(&epi->rdllink))
 		goto is_linked;
 
 	list_add_tail(&epi->rdllink, &ep->rdllist);
@@ -1307,7 +1332,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist
 		lnk = lnk->next;
 
 		/* If this file is already in the ready list we exit soon */
-		if (!EP_IS_LINKED(&epi->txlink)) {
+		if (!ep_is_linked(&epi->txlink)) {
 			/*
 			 * This is initialized in this way so that the default
 			 * behaviour of the reinjecting code will be to push back
@@ -1322,7 +1347,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist
 			/*
 			 * Unlink the item from the ready list.
 			 */
-			EP_LIST_DEL(&epi->rdllink);
+			ep_list_del(&epi->rdllink);
 		}
 	}
 
@@ -1401,7 +1426,7 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
 		epi = list_entry(txlist->next, struct epitem, txlink);
 
 		/* Unlink the current item from the transfer list */
-		EP_LIST_DEL(&epi->txlink);
+		ep_list_del(&epi->txlink);
 
 		/*
 		 * If the item is no more linked to the interest set, we don't
@@ -1410,8 +1435,8 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
 		 * item is set to have an Edge Triggered behaviour, we don't have
 		 * to push it back either.
 		 */
-		if (EP_RB_LINKED(&epi->rbn) && !(epi->event.events & EPOLLET) &&
-		    (epi->revents & epi->event.events) && !EP_IS_LINKED(&epi->rdllink)) {
+		if (ep_rb_linked(&epi->rbn) && !(epi->event.events & EPOLLET) &&
+		    (epi->revents & epi->event.events) && !ep_is_linked(&epi->rdllink)) {
 			list_add_tail(&epi->rdllink, &ep->rdllist);
 			ricnt++;
 		}
-- 
cgit v1.2.2


From bb93e3a52f8db7210258a1a2134cced0b78a46e1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 23 Jun 2005 00:10:15 -0700
Subject: [PATCH] block: add unlocked_ioctl support for block devices

This patch allows block device drivers to convert their ioctl functions to
unlocked_ioctl() like character devices and other subsystems.  All
functions that were called with the BKL held before are still used that
way, but I would not be surprised if it could be removed from the ioctl
functions in drivers/block/ioctl.c themselves.

As a side note, I found that compat_blkdev_ioctl() acquires the BKL as
well, which looks like a bug.  I have checked that every user of
disk->fops->compat_ioctl() in the current git tree gets the BKL itself, so
it could easily be removed from compat_blkdev_ioctl().

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/block_dev.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index c0cbd1bc1a..e0df94c37b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -777,8 +777,7 @@ static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf,
 	return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
 }
 
-static int block_ioctl(struct inode *inode, struct file *file, unsigned cmd,
-			unsigned long arg)
+static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
 	return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
 }
@@ -803,7 +802,7 @@ struct file_operations def_blk_fops = {
   	.aio_write	= blkdev_file_aio_write, 
 	.mmap		= generic_file_mmap,
 	.fsync		= block_fsync,
-	.ioctl		= block_ioctl,
+	.unlocked_ioctl	= block_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= compat_blkdev_ioctl,
 #endif
-- 
cgit v1.2.2


From 45778ca819accab1a4a3378b3566cab0f189164f Mon Sep 17 00:00:00 2001
From: Christoph Lameter <christoph@graphe.net>
Date: Thu, 23 Jun 2005 00:10:17 -0700
Subject: [PATCH] Remove f_error field from struct file

The following patch removes the f_error field and all checks of f_error.

Trond said:

  f_error was introduced for NFS, and made sense when we were guaranteed
  always to have a file pointer around when write errors occurred.  Since
  then, we have (for various reasons) had to introduce the nfs_open_context in
  order to track the file read/write state, and it made sense to move our
  f_error tracking there too.

Signed-off-by: Christoph Lameter <christoph@lameter.com>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/direct.c |  5 -----
 fs/open.c       | 16 ++++------------
 2 files changed, 4 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index d6a30c844d..6537f2c4ae 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -751,11 +751,6 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
 	retval = -EFAULT;
 	if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
 		goto out;
-        if (file->f_error) {
-                retval = file->f_error;
-                file->f_error = 0;
-                goto out;
-        }
 	retval = -EFBIG;
 	if (limit != RLIM_INFINITY) {
 		if (pos >= limit) {
diff --git a/fs/open.c b/fs/open.c
index 2ebb72c1a8..5dd411b084 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -981,23 +981,15 @@ asmlinkage long sys_creat(const char __user * pathname, int mode)
  */
 int filp_close(struct file *filp, fl_owner_t id)
 {
-	int retval;
-
-	/* Report and clear outstanding errors */
-	retval = filp->f_error;
-	if (retval)
-		filp->f_error = 0;
+	int retval = 0;
 
 	if (!file_count(filp)) {
 		printk(KERN_ERR "VFS: Close: file count is 0\n");
-		return retval;
+		return 0;
 	}
 
-	if (filp->f_op && filp->f_op->flush) {
-		int err = filp->f_op->flush(filp);
-		if (!retval)
-			retval = err;
-	}
+	if (filp->f_op && filp->f_op->flush)
+		retval = filp->f_op->flush(filp);
 
 	dnotify_flush(filp, id);
 	locks_remove_posix(filp, id);
-- 
cgit v1.2.2


From 9a59f452abe11f569e13ec16c51e6d61c54b9838 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jun 2005 00:10:19 -0700
Subject: [PATCH] remove <linux/xattr_acl.h>

This file duplicates <linux/posix_acl_xattr.h>, using slightly different
names.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/acl.c           | 12 ++++++------
 fs/ext2/acl.h           |  2 +-
 fs/ext3/acl.c           | 12 ++++++------
 fs/ext3/acl.h           |  2 +-
 fs/jfs/acl.c            | 11 ++++++-----
 fs/jfs/jfs_acl.h        |  2 --
 fs/jfs/super.c          |  1 +
 fs/jfs/xattr.c          |  7 ++++---
 fs/nfsd/vfs.c           |  9 ++++-----
 fs/reiserfs/xattr_acl.c | 26 +++++++++++++-------------
 10 files changed, 42 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 25f4a64fd6..213148c36e 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -396,12 +396,12 @@ static size_t
 ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
 			   const char *name, size_t name_len)
 {
-	const size_t size = sizeof(XATTR_NAME_ACL_ACCESS);
+	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_size)
-		memcpy(list, XATTR_NAME_ACL_ACCESS, size);
+		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
 	return size;
 }
 
@@ -409,12 +409,12 @@ static size_t
 ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
 			    const char *name, size_t name_len)
 {
-	const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT);
+	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_size)
-		memcpy(list, XATTR_NAME_ACL_DEFAULT, size);
+		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
 	return size;
 }
 
@@ -506,14 +506,14 @@ ext2_xattr_set_acl_default(struct inode *inode, const char *name,
 }
 
 struct xattr_handler ext2_xattr_acl_access_handler = {
-	.prefix	= XATTR_NAME_ACL_ACCESS,
+	.prefix	= POSIX_ACL_XATTR_ACCESS,
 	.list	= ext2_xattr_list_acl_access,
 	.get	= ext2_xattr_get_acl_access,
 	.set	= ext2_xattr_set_acl_access,
 };
 
 struct xattr_handler ext2_xattr_acl_default_handler = {
-	.prefix	= XATTR_NAME_ACL_DEFAULT,
+	.prefix	= POSIX_ACL_XATTR_DEFAULT,
 	.list	= ext2_xattr_list_acl_default,
 	.get	= ext2_xattr_get_acl_default,
 	.set	= ext2_xattr_set_acl_default,
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index fed96ae81a..0bde85bafe 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -4,7 +4,7 @@
   (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
 */
 
-#include <linux/xattr_acl.h>
+#include <linux/posix_acl_xattr.h>
 
 #define EXT2_ACL_VERSION	0x0001
 
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 638c13a26c..133f5aa581 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -417,12 +417,12 @@ static size_t
 ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
 			   const char *name, size_t name_len)
 {
-	const size_t size = sizeof(XATTR_NAME_ACL_ACCESS);
+	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
 
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
-		memcpy(list, XATTR_NAME_ACL_ACCESS, size);
+		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
 	return size;
 }
 
@@ -430,12 +430,12 @@ static size_t
 ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
 			    const char *name, size_t name_len)
 {
-	const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT);
+	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
 
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return 0;
 	if (list && size <= list_len)
-		memcpy(list, XATTR_NAME_ACL_DEFAULT, size);
+		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
 	return size;
 }
 
@@ -535,14 +535,14 @@ ext3_xattr_set_acl_default(struct inode *inode, const char *name,
 }
 
 struct xattr_handler ext3_xattr_acl_access_handler = {
-	.prefix	= XATTR_NAME_ACL_ACCESS,
+	.prefix	= POSIX_ACL_XATTR_ACCESS,
 	.list	= ext3_xattr_list_acl_access,
 	.get	= ext3_xattr_get_acl_access,
 	.set	= ext3_xattr_set_acl_access,
 };
 
 struct xattr_handler ext3_xattr_acl_default_handler = {
-	.prefix	= XATTR_NAME_ACL_DEFAULT,
+	.prefix	= POSIX_ACL_XATTR_DEFAULT,
 	.list	= ext3_xattr_list_acl_default,
 	.get	= ext3_xattr_get_acl_default,
 	.set	= ext3_xattr_set_acl_default,
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 98af0c0d0b..92d50b53a9 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -4,7 +4,7 @@
   (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
 */
 
-#include <linux/xattr_acl.h>
+#include <linux/posix_acl_xattr.h>
 
 #define EXT3_ACL_VERSION	0x0001
 
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 30a2bf9eed..e892dab40c 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -21,6 +21,7 @@
 #include <linux/sched.h>
 #include <linux/fs.h>
 #include <linux/quotaops.h>
+#include <linux/posix_acl_xattr.h>
 #include "jfs_incore.h"
 #include "jfs_xattr.h"
 #include "jfs_acl.h"
@@ -36,11 +37,11 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
 
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			ea_name = XATTR_NAME_ACL_ACCESS;
+			ea_name = POSIX_ACL_XATTR_ACCESS;
 			p_acl = &ji->i_acl;
 			break;
 		case ACL_TYPE_DEFAULT:
-			ea_name = XATTR_NAME_ACL_DEFAULT;
+			ea_name = POSIX_ACL_XATTR_DEFAULT;
 			p_acl = &ji->i_default_acl;
 			break;
 		default:
@@ -88,11 +89,11 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			ea_name = XATTR_NAME_ACL_ACCESS;
+			ea_name = POSIX_ACL_XATTR_ACCESS;
 			p_acl = &ji->i_acl;
 			break;
 		case ACL_TYPE_DEFAULT:
-			ea_name = XATTR_NAME_ACL_DEFAULT;
+			ea_name = POSIX_ACL_XATTR_DEFAULT;
 			p_acl = &ji->i_default_acl;
 			if (!S_ISDIR(inode->i_mode))
 				return acl ? -EACCES : 0;
@@ -101,7 +102,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 			return -EINVAL;
 	}
 	if (acl) {
-		size = xattr_acl_size(acl->a_count);
+		size = posix_acl_xattr_size(acl->a_count);
 		value = kmalloc(size, GFP_KERNEL);
 		if (!value)
 			return -ENOMEM;
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index d2ae430ade..a3acd3eec0 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,8 +20,6 @@
 
 #ifdef CONFIG_JFS_POSIX_ACL
 
-#include <linux/xattr_acl.h>
-
 int jfs_permission(struct inode *, int, struct nameidata *);
 int jfs_init_acl(struct inode *, struct inode *);
 int jfs_setattr(struct dentry *, struct iattr *);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 810a3653d8..ee32211288 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -24,6 +24,7 @@
 #include <linux/completion.h>
 #include <linux/vfs.h>
 #include <linux/moduleparam.h>
+#include <linux/posix_acl.h>
 #include <asm/uaccess.h>
 
 #include "jfs_incore.h"
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 6016373701..ee438d429d 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -19,6 +19,7 @@
 
 #include <linux/fs.h>
 #include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
 #include <linux/quotaops.h>
 #include "jfs_incore.h"
 #include "jfs_superblock.h"
@@ -718,9 +719,9 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 		return -EPERM;
 
 	/*
-	 * XATTR_NAME_ACL_ACCESS is tied to i_mode
+	 * POSIX_ACL_XATTR_ACCESS is tied to i_mode
 	 */
-	if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) {
+	if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) {
 		acl = posix_acl_from_xattr(value, value_len);
 		if (IS_ERR(acl)) {
 			rc = PTR_ERR(acl);
@@ -750,7 +751,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 		JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED;
 
 		return 0;
-	} else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) {
+	} else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
 		acl = posix_acl_from_xattr(value, value_len);
 		if (IS_ERR(acl)) {
 			rc = PTR_ERR(acl);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ae3940dc85..de340ffd33 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -50,7 +50,6 @@
 #include <linux/posix_acl.h>
 #ifdef CONFIG_NFSD_V4
 #include <linux/posix_acl_xattr.h>
-#include <linux/xattr_acl.h>
 #include <linux/xattr.h>
 #include <linux/nfs4.h>
 #include <linux/nfs4_acl.h>
@@ -425,13 +424,13 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		goto out_nfserr;
 
 	if (pacl) {
-		error = set_nfsv4_acl_one(dentry, pacl, XATTR_NAME_ACL_ACCESS);
+		error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
 		if (error < 0)
 			goto out_nfserr;
 	}
 
 	if (dpacl) {
-		error = set_nfsv4_acl_one(dentry, dpacl, XATTR_NAME_ACL_DEFAULT);
+		error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
 		if (error < 0)
 			goto out_nfserr;
 	}
@@ -498,7 +497,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
 	struct posix_acl *pacl = NULL, *dpacl = NULL;
 	unsigned int flags = 0;
 
-	pacl = _get_posix_acl(dentry, XATTR_NAME_ACL_ACCESS);
+	pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS);
 	if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA)
 		pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
 	if (IS_ERR(pacl)) {
@@ -508,7 +507,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
 	}
 
 	if (S_ISDIR(inode->i_mode)) {
-		dpacl = _get_posix_acl(dentry, XATTR_NAME_ACL_DEFAULT);
+		dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT);
 		if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA)
 			dpacl = NULL;
 		else if (IS_ERR(dpacl)) {
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index e302071903..c312881c5f 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -4,7 +4,7 @@
 #include <linux/errno.h>
 #include <linux/pagemap.h>
 #include <linux/xattr.h>
-#include <linux/xattr_acl.h>
+#include <linux/posix_acl_xattr.h>
 #include <linux/reiserfs_xattr.h>
 #include <linux/reiserfs_acl.h>
 #include <asm/uaccess.h>
@@ -192,11 +192,11 @@ reiserfs_get_acl(struct inode *inode, int type)
 
         switch (type) {
             case ACL_TYPE_ACCESS:
-                name = XATTR_NAME_ACL_ACCESS;
+                name = POSIX_ACL_XATTR_ACCESS;
                 p_acl = &reiserfs_i->i_acl_access;
                 break;
             case ACL_TYPE_DEFAULT:
-                name = XATTR_NAME_ACL_DEFAULT;
+                name = POSIX_ACL_XATTR_DEFAULT;
                 p_acl = &reiserfs_i->i_acl_default;
                 break;
             default:
@@ -260,7 +260,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 
         switch (type) {
             case ACL_TYPE_ACCESS:
-                name = XATTR_NAME_ACL_ACCESS;
+                name = POSIX_ACL_XATTR_ACCESS;
                 p_acl = &reiserfs_i->i_acl_access;
                 if (acl) {
                     mode_t mode = inode->i_mode;
@@ -275,7 +275,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
                 }
                 break;
             case ACL_TYPE_DEFAULT:
-                name = XATTR_NAME_ACL_DEFAULT;
+                name = POSIX_ACL_XATTR_DEFAULT;
                 p_acl = &reiserfs_i->i_acl_default;
                 if (!S_ISDIR (inode->i_mode))
                     return acl ? -EACCES : 0;
@@ -468,7 +468,7 @@ static int
 posix_acl_access_get(struct inode *inode, const char *name,
 			  void *buffer, size_t size)
 {
-	if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1)
+	if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1)
 		return -EINVAL;
 	return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
 }
@@ -477,7 +477,7 @@ static int
 posix_acl_access_set(struct inode *inode, const char *name,
 			  const void *value, size_t size, int flags)
 {
-	if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1)
+	if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1)
 		return -EINVAL;
 	return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
 }
@@ -487,7 +487,7 @@ posix_acl_access_del (struct inode *inode, const char *name)
 {
     struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
     struct posix_acl **acl = &reiserfs_i->i_acl_access;
-    if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1)
+    if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1)
 	return -EINVAL;
     if (!IS_ERR (*acl) && *acl) {
         posix_acl_release (*acl);
@@ -510,7 +510,7 @@ posix_acl_access_list (struct inode *inode, const char *name, int namelen, char
 }
 
 struct reiserfs_xattr_handler posix_acl_access_handler = {
-	.prefix = XATTR_NAME_ACL_ACCESS,
+	.prefix = POSIX_ACL_XATTR_ACCESS,
 	.get = posix_acl_access_get,
 	.set = posix_acl_access_set,
 	.del = posix_acl_access_del,
@@ -521,7 +521,7 @@ static int
 posix_acl_default_get (struct inode *inode, const char *name,
 			   void *buffer, size_t size)
 {
-	if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1)
+	if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1)
 		return -EINVAL;
 	return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
 }
@@ -530,7 +530,7 @@ static int
 posix_acl_default_set(struct inode *inode, const char *name,
 			   const void *value, size_t size, int flags)
 {
-	if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1)
+	if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1)
 		return -EINVAL;
 	return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
 }
@@ -540,7 +540,7 @@ posix_acl_default_del (struct inode *inode, const char *name)
 {
     struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
     struct posix_acl **acl = &reiserfs_i->i_acl_default;
-    if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1)
+    if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1)
 	return -EINVAL;
     if (!IS_ERR (*acl) && *acl) {
         posix_acl_release (*acl);
@@ -563,7 +563,7 @@ posix_acl_default_list (struct inode *inode, const char *name, int namelen, char
 }
 
 struct reiserfs_xattr_handler posix_acl_default_handler = {
-	.prefix = XATTR_NAME_ACL_DEFAULT,
+	.prefix = POSIX_ACL_XATTR_DEFAULT,
 	.get = posix_acl_default_get,
 	.set = posix_acl_default_set,
 	.del = posix_acl_default_del,
-- 
cgit v1.2.2


From 152becd26e0563aefdbc4fd1fe491928efe92d1f Mon Sep 17 00:00:00 2001
From: Anton Altaparmakov <aia21@cam.ac.uk>
Date: Thu, 23 Jun 2005 00:10:21 -0700
Subject: [PATCH] Bug in error recovery in fs/buffer.c::__block_prepare_write()

fs/buffer.c::__block_prepare_write() has broken error recovery.  It calls
the get_block() callback with "create = 1" and if that succeeds it
immediately clears buffer_new on the just allocated buffer (which has
buffer_new set).

The bug is that if an error occurs and get_block() returns != 0, we break
from this loop and go into recovery code.  This code has this comment:

/* Error case: */
/*
 * Zero out any newly allocated blocks to avoid exposing stale
 * data.  If BH_New is set, we know that the block was newly
 * allocated in the above loop.
 */

So the intent is obviously good in that it wants to clear just allocated
and hence not zeroed buffers.  However the code recognises allocated
buffers by checking for buffer_new being set.

Unfortunately __block_prepare_write() as discussed above already cleared
buffer_new on all allocated buffers thus no buffers will be cleared during
error recovery and old data will be leaked.

The simplest way I can see to fix this is to make the current recovery code
work by _not_ clearing buffer_new after calling get_block() in
__block_prepare_write().

We cannot safely allow buffer_new buffers to "leak out" of
__block_prepare_write(), thus we simply do a quick loop over the buffers
clearing buffer_new on each of them if it is set just before returning
"success" from __block_prepare_write().

Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 12bdb27911..13e5938a64 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1926,7 +1926,6 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 			if (err)
 				break;
 			if (buffer_new(bh)) {
-				clear_buffer_new(bh);
 				unmap_underlying_metadata(bh->b_bdev,
 							bh->b_blocknr);
 				if (PageUptodate(page)) {
@@ -1968,9 +1967,14 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 		if (!buffer_uptodate(*wait_bh))
 			err = -EIO;
 	}
-	if (!err)
-		return err;
-
+	if (!err) {
+		bh = head;
+		do {
+			if (buffer_new(bh))
+				clear_buffer_new(bh);
+		} while ((bh = bh->b_this_page) != head);
+		return 0;
+	}
 	/* Error case: */
 	/*
 	 * Zero out any newly allocated blocks to avoid exposing stale
-- 
cgit v1.2.2


From 63e6880918e75dcb92d60aff218a76e063a471ef Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Thu, 23 Jun 2005 00:10:27 -0700
Subject: [PATCH] aio: fix do_sync_(read|write) to properly handle aio retries

When do_sync_(read|write) encounters an aio method that makes use of the
retry mechanism, they fail to correctly retry the operation.  This fixes
that by adding the appropriate sleep and retry mechanism.

Signed-off-by: Benjamin LaHaise <benjamin.c.lahaise@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/read_write.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/read_write.c b/fs/read_write.c
index c4c2bee373..9292f5fa4d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -203,6 +203,16 @@ Einval:
 	return -EINVAL;
 }
 
+static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
+{
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	if (!kiocbIsKicked(iocb))
+		schedule();
+	else
+		kiocbClearKicked(iocb);
+	__set_current_state(TASK_RUNNING);
+}
+
 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 {
 	struct kiocb kiocb;
@@ -210,7 +220,10 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos);
+	while (-EIOCBRETRY ==
+		(ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos)))
+		wait_on_retry_sync_kiocb(&kiocb);
+
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&kiocb);
 	*ppos = kiocb.ki_pos;
@@ -258,7 +271,10 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos);
+	while (-EIOCBRETRY ==
+	       (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos)))
+		wait_on_retry_sync_kiocb(&kiocb);
+
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&kiocb);
 	*ppos = kiocb.ki_pos;
-- 
cgit v1.2.2


From fed2fc18a4567d613cd35115322257c6c6c710e9 Mon Sep 17 00:00:00 2001
From: Telemaque Ndizihiwe <telendiz@eircom.net>
Date: Thu, 23 Jun 2005 00:10:33 -0700
Subject: [PATCH] sys_open() cleanup

Clean up tortured logic in sys_open().

Signed-off-by: Telemaque Ndizihiwe <telendiz@eircom.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/open.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 5dd411b084..8ec63f7359 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -934,7 +934,7 @@ EXPORT_SYMBOL(fd_install);
 asmlinkage long sys_open(const char __user * filename, int flags, int mode)
 {
 	char * tmp;
-	int fd, error;
+	int fd;
 
 	if (force_o_largefile())
 		flags |= O_LARGEFILE;
@@ -945,20 +945,16 @@ asmlinkage long sys_open(const char __user * filename, int flags, int mode)
 		fd = get_unused_fd();
 		if (fd >= 0) {
 			struct file *f = filp_open(tmp, flags, mode);
-			error = PTR_ERR(f);
-			if (IS_ERR(f))
-				goto out_error;
-			fd_install(fd, f);
+			if (IS_ERR(f)) {
+				put_unused_fd(fd);
+				fd = PTR_ERR(f);
+			} else {
+				fd_install(fd, f);
+			}
 		}
-out:
 		putname(tmp);
 	}
 	return fd;
-
-out_error:
-	put_unused_fd(fd);
-	fd = error;
-	goto out;
 }
 EXPORT_SYMBOL_GPL(sys_open);
 
-- 
cgit v1.2.2


From 92198f7eaa5df3479341dd8fa20c2c81aa3b1e25 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Jun 2005 22:00:59 -0700
Subject: [PATCH] pass iocb to dio_iodone_t

XFS will have to look at iocb->private to fix aio+dio.  No other filesystem
is using the blockdev_direct_IO* end_io callback.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/direct-io.c              | 2 +-
 fs/xfs/linux-2.6/xfs_aops.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1d55e7e673..0d06097bc9 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -215,7 +215,7 @@ static struct page *dio_get_page(struct dio *dio)
 static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
 {
 	if (dio->end_io && dio->result)
-		dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private);
+		dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private);
 	if (dio->lock_type == DIO_LOCKING)
 		up_read(&dio->inode->i_alloc_sem);
 }
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 93ce257cd1..a3a4b5aaf5 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -149,11 +149,12 @@ linvfs_unwritten_convert(
  */
 STATIC void
 linvfs_unwritten_convert_direct(
-	struct inode	*inode,
+	struct kiocb	*iocb,
 	loff_t		offset,
 	ssize_t		size,
 	void		*private)
 {
+	struct inode	*inode = iocb->ki_filp->f_dentry->d_inode;
 	ASSERT(!private || inode == (struct inode *)private);
 
 	/* private indicates an unwritten extent lay beneath this IO */
-- 
cgit v1.2.2


From bd6a1f16fffdfe010fdc2979fd01f12357816762 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2005 22:01:01 -0700
Subject: [PATCH] reiserfs: add checking of journal_begin() return value

Check return values of journal_begin() and journal_end() in the quota code
for reiserfs.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/inode.c |  6 +++--
 fs/reiserfs/super.c | 66 ++++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 52 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 2711dff1b7..073425e6e0 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2798,7 +2798,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
 		    struct reiserfs_transaction_handle th;
 
 		    /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
-		    journal_begin(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+		    error = journal_begin(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+ 		    if (error)
+ 			goto out;
                     error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
 		    if (error) {
 			journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
@@ -2811,7 +2813,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
 		    if (attr->ia_valid & ATTR_GID)
 			inode->i_gid = attr->ia_gid;
 		    mark_inode_dirty(inode);
-		    journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+		    error = journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
 		}
         }
         if (!error)
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index aae0779ed5..031577fb41 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1841,13 +1841,18 @@ static int reiserfs_statfs (struct super_block * s, struct kstatfs * buf)
 static int reiserfs_dquot_initialize(struct inode *inode, int type)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     /* We may create quota structure so we need to reserve enough blocks */
     reiserfs_write_lock(inode->i_sb);
-    journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    if (ret)
+	goto out;
     ret = dquot_initialize(inode, type);
-    journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(inode->i_sb);
     return ret;
 }
@@ -1855,13 +1860,18 @@ static int reiserfs_dquot_initialize(struct inode *inode, int type)
 static int reiserfs_dquot_drop(struct inode *inode)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     /* We may delete quota structure so we need to reserve enough blocks */
     reiserfs_write_lock(inode->i_sb);
-    journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    if (ret)
+ 	goto out;
     ret = dquot_drop(inode);
-    journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(inode->i_sb);
     return ret;
 }
@@ -1869,12 +1879,17 @@ static int reiserfs_dquot_drop(struct inode *inode)
 static int reiserfs_write_dquot(struct dquot *dquot)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     reiserfs_write_lock(dquot->dq_sb);
-    journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS);
+    if (ret)
+	goto out;
     ret = dquot_commit(dquot);
-    journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS);
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(dquot->dq_sb);
     return ret;
 }
@@ -1882,12 +1897,17 @@ static int reiserfs_write_dquot(struct dquot *dquot)
 static int reiserfs_acquire_dquot(struct dquot *dquot)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     reiserfs_write_lock(dquot->dq_sb);
-    journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    if (ret)
+	goto out;
     ret = dquot_acquire(dquot);
-    journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(dquot->dq_sb);
     return ret;
 }
@@ -1895,12 +1915,17 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
 static int reiserfs_release_dquot(struct dquot *dquot)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     reiserfs_write_lock(dquot->dq_sb);
-    journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    if (ret)
+ 	goto out;
     ret = dquot_release(dquot);
-    journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(dquot->dq_sb);
     return ret;
 }
@@ -1920,13 +1945,18 @@ static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
 static int reiserfs_write_info(struct super_block *sb, int type)
 {
     struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
 
     /* Data block + inode block */
     reiserfs_write_lock(sb);
-    journal_begin(&th, sb, 2);
+    ret = journal_begin(&th, sb, 2);
+    if (ret)
+	goto out;
     ret = dquot_commit_info(sb, type);
-    journal_end(&th, sb, 2);
+    err = journal_end(&th, sb, 2);
+    if (!ret && err)
+	ret = err;
+out:
     reiserfs_write_unlock(sb);
     return ret;
 }
-- 
cgit v1.2.2


From 1f54587bea84a35125c95e19b98c2f464c50871b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2005 22:01:04 -0700
Subject: [PATCH] quota: ext3: Improve quota credit estimates

Use improved credits estimates for quota operations.  Also reserve a space
for a quota operation in a transaction only if filesystem was mounted with
some quota options.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/acl.c   |  5 +++--
 fs/ext3/inode.c |  7 ++++---
 fs/ext3/namei.c | 25 +++++++++++++------------
 fs/ext3/super.c | 37 +++++++++++++++++++++++++++----------
 fs/ext3/xattr.c |  2 +-
 5 files changed, 48 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 133f5aa581..3ac38266fc 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -393,7 +393,8 @@ ext3_acl_chmod(struct inode *inode)
 		int retries = 0;
 
 	retry:
-		handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
+		handle = ext3_journal_start(inode,
+				EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
 			ext3_std_error(inode->i_sb, error);
@@ -503,7 +504,7 @@ ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
 		acl = NULL;
 
 retry:
-	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
+	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	error = ext3_set_acl(handle, inode, type, acl);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 0d5fa73b18..0b2db4f618 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -128,7 +128,7 @@ static unsigned long blocks_for_truncate(struct inode *inode)
 	if (needed > EXT3_MAX_TRANS_DATA) 
 		needed = EXT3_MAX_TRANS_DATA;
 
-	return EXT3_DATA_TRANS_BLOCKS + needed;
+	return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
 }
 
 /* 
@@ -2763,7 +2763,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 
 		/* (user+group)*(old+new) structure, inode write (sb,
 		 * inode block, ? - but truncate inode update has it) */
-		handle = ext3_journal_start(inode, 4*EXT3_QUOTA_INIT_BLOCKS+3);
+		handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+
+					EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
 			goto err_out;
@@ -2861,7 +2862,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
 #ifdef CONFIG_QUOTA
 	/* We know that structure was already allocated during DQUOT_INIT so
 	 * we will be updating only the data blocks + inodes */
-	ret += 2*EXT3_QUOTA_TRANS_BLOCKS;
+	ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
 #endif
 
 	return ret;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 60e44e6dd7..50378d8ff8 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1645,9 +1645,9 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
 	int err, retries = 0;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS);
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -1679,9 +1679,9 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
 		return -EINVAL;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 			 		EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS);
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -1715,9 +1715,9 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 		return -EMLINK;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS);
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2006,7 +2006,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
 	/* Initialize quotas before so that eventual writes go in
 	 * separate transaction */
 	DQUOT_INIT(dentry->d_inode);
-	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2065,7 +2065,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
 	DQUOT_INIT(dentry->d_inode);
-	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2120,9 +2120,9 @@ static int ext3_symlink (struct inode * dir,
 		return -ENAMETOOLONG;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 			 		EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
-					2*EXT3_QUOTA_INIT_BLOCKS);
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2174,7 +2174,7 @@ static int ext3_link (struct dentry * old_dentry,
 		return -EMLINK;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
 					EXT3_INDEX_EXTRA_TRANS_BLOCKS);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -2216,7 +2216,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 	 * in separate transaction */
 	if (new_dentry->d_inode)
 		DQUOT_INIT(new_dentry->d_inode);
-	handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS +
+	handle = ext3_journal_start(old_dir, 2 *
+					EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
 			 		EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 9630fbfdc2..b4b3e8a391 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -589,7 +589,7 @@ enum {
 	Opt_commit, Opt_journal_update, Opt_journal_inum,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
-	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
 	Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
 };
 
@@ -634,10 +634,10 @@ static match_table_t tokens = {
 	{Opt_grpjquota, "grpjquota=%s"},
 	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
 	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
-	{Opt_ignore, "grpquota"},
-	{Opt_ignore, "noquota"},
-	{Opt_ignore, "quota"},
-	{Opt_ignore, "usrquota"},
+	{Opt_quota, "grpquota"},
+	{Opt_noquota, "noquota"},
+	{Opt_quota, "quota"},
+	{Opt_quota, "usrquota"},
 	{Opt_barrier, "barrier=%u"},
 	{Opt_err, NULL},
 	{Opt_resize, "resize"},
@@ -876,6 +876,7 @@ set_qf_name:
 				sbi->s_qf_names[qtype] = NULL;
 				return 0;
 			}
+			set_opt(sbi->s_mount_opt, QUOTA);
 			break;
 		case Opt_offusrjquota:
 			qtype = USRQUOTA;
@@ -898,6 +899,17 @@ clear_qf_name:
 		case Opt_jqfmt_vfsv0:
 			sbi->s_jquota_fmt = QFMT_VFS_V0;
 			break;
+		case Opt_quota:
+			set_opt(sbi->s_mount_opt, QUOTA);
+			break;
+		case Opt_noquota:
+			if (sb_any_quota_enabled(sb)) {
+				printk(KERN_ERR "EXT3-fs: Cannot change quota "
+					"options when quota turned on.\n");
+				return 0;
+			}
+			clear_opt(sbi->s_mount_opt, QUOTA);
+			break;
 #else
 		case Opt_usrjquota:
 		case Opt_grpjquota:
@@ -909,6 +921,9 @@ clear_qf_name:
 				"EXT3-fs: journalled quota options not "
 				"supported.\n");
 			break;
+		case Opt_quota:
+		case Opt_noquota:
+			break;
 #endif
 		case Opt_abort:
 			set_opt(sbi->s_mount_opt, ABORT);
@@ -2238,7 +2253,7 @@ static int ext3_dquot_initialize(struct inode *inode, int type)
 	int ret, err;
 
 	/* We may create quota structure so we need to reserve enough blocks */
-	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS);
+	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_initialize(inode, type);
@@ -2254,7 +2269,7 @@ static int ext3_dquot_drop(struct inode *inode)
 	int ret, err;
 
 	/* We may delete quota structure so we need to reserve enough blocks */
-	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS);
+	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_drop(inode);
@@ -2272,7 +2287,7 @@ static int ext3_write_dquot(struct dquot *dquot)
 
 	inode = dquot_to_inode(dquot);
 	handle = ext3_journal_start(inode,
-					EXT3_QUOTA_TRANS_BLOCKS);
+					EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_commit(dquot);
@@ -2288,7 +2303,7 @@ static int ext3_acquire_dquot(struct dquot *dquot)
 	handle_t *handle;
 
 	handle = ext3_journal_start(dquot_to_inode(dquot),
-					EXT3_QUOTA_INIT_BLOCKS);
+					EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_acquire(dquot);
@@ -2304,7 +2319,7 @@ static int ext3_release_dquot(struct dquot *dquot)
 	handle_t *handle;
 
 	handle = ext3_journal_start(dquot_to_inode(dquot),
-					EXT3_QUOTA_INIT_BLOCKS);
+					EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_release(dquot);
@@ -2361,6 +2376,8 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 	int err;
 	struct nameidata nd;
 
+	if (!test_opt(sb, QUOTA))
+		return -EINVAL;
 	/* Not journalling quota? */
 	if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
 	    !EXT3_SB(sb)->s_qf_names[GRPQUOTA])
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 4cbc6d0212..3f9dfa643b 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -1044,7 +1044,7 @@ ext3_xattr_set(struct inode *inode, int name_index, const char *name,
 	int error, retries = 0;
 
 retry:
-	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
+	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle)) {
 		error = PTR_ERR(handle);
 	} else {
-- 
cgit v1.2.2


From 556a2a45bce1740f035befaa7201e4ad836c7257 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2005 22:01:06 -0700
Subject: [PATCH] quota: reiserfs: improve quota credit estimates

Use improved credits estimates for quota operations.  Also reserve space
for a quota operation in a transaction only if filesystem was mounted with
some quota option.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/file.c  |  4 ++--
 fs/reiserfs/inode.c | 11 ++++++-----
 fs/reiserfs/namei.c | 25 ++++++++++++++-----------
 fs/reiserfs/super.c | 35 +++++++++++++++++++++++------------
 4 files changed, 45 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 2230afff18..12e9120954 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -201,7 +201,7 @@ static int reiserfs_allocate_blocks_for_region(
     /* If we came here, it means we absolutely need to open a transaction,
        since we need to allocate some blocks */
     reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that.
-    res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS); // Wish I know if this number enough
+    res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); // Wish I know if this number enough
     if (res)
         goto error_exit;
     reiserfs_update_inode_transaction(inode) ;
@@ -576,7 +576,7 @@ error_exit:
         int err;
         // update any changes we made to blk count
         reiserfs_update_sd(th, inode);
-        err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS);
+        err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb));
         if (err)
             res = err;
     }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 073425e6e0..0d5817f819 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -28,7 +28,7 @@ static int reiserfs_prepare_write(struct file *f, struct page *page,
 void reiserfs_delete_inode (struct inode * inode)
 {
     /* We need blocks for transaction + (user+group) quota update (possibly delete) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
     struct reiserfs_transaction_handle th ;
   
     reiserfs_write_lock(inode->i_sb);
@@ -591,7 +591,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
        XXX in practically impossible worst case direct2indirect()
        can incur (much) more than 3 balancings.
        quota update for user, group */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
     int version;
     int dangle = 1;
     loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ;
@@ -2796,14 +2796,15 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
 
                 if (!error) {
 		    struct reiserfs_transaction_handle th;
+		    int jbegin_count = 2*(REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb)+REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb))+2;
 
 		    /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
-		    error = journal_begin(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+		    error = journal_begin(&th, inode->i_sb, jbegin_count);
  		    if (error)
  			goto out;
                     error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
 		    if (error) {
-			journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+			journal_end(&th, inode->i_sb, jbegin_count);
 			goto out;
 		    }
 		    /* Update corresponding info in inode so that everything is in
@@ -2813,7 +2814,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
 		    if (attr->ia_valid & ATTR_GID)
 			inode->i_gid = attr->ia_gid;
 		    mark_inode_dirty(inode);
-		    error = journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+		    error = journal_end(&th, inode->i_sb, jbegin_count);
 		}
         }
         if (!error)
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 7d4dc5f5aa..4a333255f2 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -586,7 +586,7 @@ static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode,
     int retval;
     struct inode * inode;
     /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
     struct reiserfs_transaction_handle th ;
     int locked;
 
@@ -653,7 +653,7 @@ static int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode,
     struct inode * inode;
     struct reiserfs_transaction_handle th ;
     /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
     int locked;
 
     if (!new_valid_dev(rdev))
@@ -727,7 +727,7 @@ static int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode)
     struct inode * inode;
     struct reiserfs_transaction_handle th ;
     /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
     int locked;
 
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
@@ -829,8 +829,10 @@ static int reiserfs_rmdir (struct inode * dir, struct dentry *dentry)
 
 
     /* we will be doing 2 balancings and update 2 stat data, we change quotas
-     * of the owner of the directory and of the owner of the parent directory */
-    jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+     * of the owner of the directory and of the owner of the parent directory.
+     * The quota structure is possibly deleted only on last iput => outside
+     * of this transaction */
+    jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
     reiserfs_write_lock(dir->i_sb);
     retval = journal_begin(&th, dir->i_sb, jbegin_count) ;
@@ -913,9 +915,10 @@ static int reiserfs_unlink (struct inode * dir, struct dentry *dentry)
     inode = dentry->d_inode;
 
     /* in this transaction we can be doing at max two balancings and update
-       two stat datas, we change quotas of the owner of the directory and of
-       the owner of the parent directory */
-    jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+     * two stat datas, we change quotas of the owner of the directory and of
+     * the owner of the parent directory. The quota structure is possibly
+     * deleted only on iput => outside of this transaction */
+    jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
     reiserfs_write_lock(dir->i_sb);
     retval = journal_begin(&th, dir->i_sb, jbegin_count) ;
@@ -1000,7 +1003,7 @@ static int reiserfs_symlink (struct inode * parent_dir,
     struct reiserfs_transaction_handle th ;
     int mode = S_IFLNK | S_IRWXUGO;
     /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb));
 
     if (!(inode = new_inode(parent_dir->i_sb))) {
 	return -ENOMEM ;
@@ -1076,7 +1079,7 @@ static int reiserfs_link (struct dentry * old_dentry, struct inode * dir, struct
     struct inode *inode = old_dentry->d_inode;
     struct reiserfs_transaction_handle th ;
     /* We need blocks for transaction + update of quotas for the owners of the directory */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * REISERFS_QUOTA_TRANS_BLOCKS;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
     reiserfs_write_lock(dir->i_sb);
     if (inode->i_nlink >= REISERFS_LINK_MAX) {
@@ -1196,7 +1199,7 @@ static int reiserfs_rename (struct inode * old_dir, struct dentry *old_dentry,
        pointed initially and (5) maybe block containing ".." of
        renamed directory
        quota updates: two parent directories */
-    jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 5 + 4 * REISERFS_QUOTA_TRANS_BLOCKS;
+    jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 5 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
 
     old_inode = old_dentry->d_inode;
     new_dentry_inode = new_dentry->d_inode;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 031577fb41..660aefca1f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -866,8 +866,9 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
 	{"jdev",	.arg_required = 'j', .values = NULL},
 	{"nolargeio",	.arg_required = 'w', .values = NULL},
 	{"commit",	.arg_required = 'c', .values = NULL},
-	{"usrquota",},
-	{"grpquota",},
+	{"usrquota",	.setmask = 1<<REISERFS_QUOTA},
+	{"grpquota",	.setmask = 1<<REISERFS_QUOTA},
+	{"noquota",	.clrmask = 1<<REISERFS_QUOTA},
 	{"errors", 	.arg_required = 'e', .values = error_actions},
 	{"usrjquota",	.arg_required = 'u'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL},
 	{"grpjquota",	.arg_required = 'g'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL},
@@ -964,6 +965,7 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
 		    return 0;
 		}
 		strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg);
+		*mount_options |= 1<<REISERFS_QUOTA;
 	    }
 	    else {
 		if (REISERFS_SB(s)->s_qf_names[qtype]) {
@@ -995,7 +997,13 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
 	reiserfs_warning(s, "reiserfs_parse_options: journalled quota format not specified.");
 	return 0;
     }
+    /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
+    if (!(*mount_options & (1<<REISERFS_QUOTA)) && sb_any_quota_enabled(s)) {
+	reiserfs_warning(s, "reiserfs_parse_options: quota options must be present when quota is turned on.");
+	return 0;
+    }
 #endif
+
     return 1;
 }
 
@@ -1105,6 +1113,7 @@ static int reiserfs_remount (struct super_block * s, int * mount_flags, char * a
   safe_mask |= 1 << REISERFS_ERROR_RO;
   safe_mask |= 1 << REISERFS_ERROR_CONTINUE;
   safe_mask |= 1 << REISERFS_ERROR_PANIC;
+  safe_mask |= 1 << REISERFS_QUOTA;
 
   /* Update the bitmask, taking care to keep
    * the bits we're not allowed to change here */
@@ -1845,11 +1854,11 @@ static int reiserfs_dquot_initialize(struct inode *inode, int type)
 
     /* We may create quota structure so we need to reserve enough blocks */
     reiserfs_write_lock(inode->i_sb);
-    ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb));
     if (ret)
 	goto out;
     ret = dquot_initialize(inode, type);
-    err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb));
     if (!ret && err)
 	ret = err;
 out:
@@ -1864,11 +1873,11 @@ static int reiserfs_dquot_drop(struct inode *inode)
 
     /* We may delete quota structure so we need to reserve enough blocks */
     reiserfs_write_lock(inode->i_sb);
-    ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
     if (ret)
  	goto out;
     ret = dquot_drop(inode);
-    err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
     if (!ret && err)
 	ret = err;
 out:
@@ -1882,11 +1891,11 @@ static int reiserfs_write_dquot(struct dquot *dquot)
     int ret, err;
 
     reiserfs_write_lock(dquot->dq_sb);
-    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
     if (ret)
 	goto out;
     ret = dquot_commit(dquot);
-    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
     if (!ret && err)
 	ret = err;
 out:
@@ -1900,11 +1909,11 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
     int ret, err;
 
     reiserfs_write_lock(dquot->dq_sb);
-    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
     if (ret)
 	goto out;
     ret = dquot_acquire(dquot);
-    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
     if (!ret && err)
 	ret = err;
 out:
@@ -1918,11 +1927,11 @@ static int reiserfs_release_dquot(struct dquot *dquot)
     int ret, err;
 
     reiserfs_write_lock(dquot->dq_sb);
-    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
     if (ret)
  	goto out;
     ret = dquot_release(dquot);
-    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
     if (!ret && err)
 	ret = err;
 out:
@@ -1978,6 +1987,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, ch
     int err;
     struct nameidata nd;
 
+    if (!(REISERFS_SB(sb)->s_mount_opt & (1<<REISERFS_QUOTA)))
+	return -EINVAL;
     err = path_lookup(path, LOOKUP_FOLLOW, &nd);
     if (err)
         return err;
-- 
cgit v1.2.2


From bdd5b29c6bc835dab71148afd5952f9cd278eef1 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 23 Jun 2005 22:01:37 -0700
Subject: [PATCH] Make reiserfs BUG on too big transaction

Make reiserfs BUG() when somebody tries to start a larger transaction than
it's allowed (currently the code just silently deadlocks).

Signed-off-by: Jan Kara <jack@suse.cz>
Acked-by: Chris Mason <mason@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/journal.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3072cfdee9..7b87707acc 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2631,6 +2631,8 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct sup
   int retval;
 
   reiserfs_check_lock_depth(p_s_sb, "journal_begin") ;
+  if (nblocks > journal->j_trans_max)
+	BUG();
 
   PROC_INFO_INC( p_s_sb, journal.journal_being );
   /* set here for journal_join */
-- 
cgit v1.2.2


From 52f4fb43063c182f3ef7e257ab336a8be8066bb0 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:02:49 -0700
Subject: [PATCH] nfsd4: find_delegation_file()

Factor out a bit of common code that will be useful elsewhere.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 75e8b13758..bb95275e7b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1526,6 +1526,18 @@ out:
 	return status;
 }
 
+static struct nfs4_delegation *
+find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
+{
+	struct nfs4_delegation *dp;
+
+	list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
+		if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
+			return dp;
+	}
+	return NULL;
+}
+
 static int
 nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp)
 {
@@ -2419,24 +2431,15 @@ find_stateid(stateid_t *stid, int flags)
 static struct nfs4_delegation *
 find_delegation_stateid(struct inode *ino, stateid_t *stid)
 {
-	struct nfs4_delegation *dp = NULL;
 	struct nfs4_file *fp = NULL;
-	u32 st_id;
 
 	dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
                     stid->si_boot, stid->si_stateownerid,
                     stid->si_fileid, stid->si_generation);
 
-	st_id = stid->si_stateownerid;
 	fp = find_file(ino);
-	if (fp) {
-		list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
-			if(dp->dl_stateid.si_stateownerid == st_id) {
-				dprintk("NFSD: find_delegation dp %p\n",dp);
-				return dp;
-			}
-		}
-	}
+	if (fp)
+		return find_delegation_file(fp, stid);
 	return NULL;
 }
 
-- 
cgit v1.2.2


From 4a6e43e6d4e43723699879f421d321e39eab5e41 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:02:50 -0700
Subject: [PATCH] nfsd4: nfs4_check_delegmode

Additional minor code reshuffling to prepare for claim_deleg_cur support.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bb95275e7b..31f7082df4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1526,6 +1526,15 @@ out:
 	return status;
 }
 
+static inline int
+nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
+{
+	if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
+		return nfserr_openmode;
+	else
+		return nfs_ok;
+}
+
 static struct nfs4_delegation *
 find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
 {
@@ -1960,15 +1969,6 @@ out:
 	return status;
 }
 
-static inline int
-nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
-{
-	if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
-		return nfserr_openmode;
-	else
-		return nfs_ok;
-}
-
 static inline int
 check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
 {
-- 
cgit v1.2.2


From 567d98292e81033182e3da4c33b41ada9c113447 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:02:53 -0700
Subject: [PATCH] nfsd4: don't reopen for delegated client

We don't really need to be doing a separate open for every stateid.  And in
the case of an open from a client that already has a delegation on a file, it
unnecessarily results in a delegation recall.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 44 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 36 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 31f7082df4..3791c9d84d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1547,6 +1547,24 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
 	return NULL;
 }
 
+static void
+nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open,
+		struct nfs4_delegation **dp)
+{
+	int flags;
+	int status;
+
+	*dp = find_delegation_file(fp, &open->op_delegate_stateid);
+	if (*dp == NULL)
+		return;
+	flags = open->op_share_access == NFS4_SHARE_ACCESS_READ ?
+						RD_STATE : WR_STATE;
+	status = nfs4_check_delegmode(*dp, flags);
+	if (status)
+		*dp = NULL;
+	return;
+}
+
 static int
 nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp)
 {
@@ -1572,21 +1590,28 @@ out:
 
 static int
 nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
+		struct nfs4_delegation *dp,
 		struct svc_fh *cur_fh, int flags)
 {
 	struct nfs4_stateid *stp;
-	int status;
 
 	stp = kmalloc(sizeof(struct nfs4_stateid), GFP_KERNEL);
 	if (stp == NULL)
 		return nfserr_resource;
 
-	status = nfsd_open(rqstp, cur_fh, S_IFREG, flags, &stp->st_vfs_file);
-	if (status) {
-		if (status == nfserr_dropit)
-			status = nfserr_jukebox;
-		kfree(stp);
-		return status;
+	if (dp) {
+		get_file(dp->dl_vfs_file);
+		stp->st_vfs_file = dp->dl_vfs_file;
+	} else {
+		int status;
+		status = nfsd_open(rqstp, cur_fh, S_IFREG, flags,
+				&stp->st_vfs_file);
+		if (status) {
+			if (status == nfserr_dropit)
+				status = nfserr_jukebox;
+			kfree(stp);
+			return status;
+		}
 	}
 	vfsopen++;
 	*stpp = stp;
@@ -1720,6 +1745,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	struct nfs4_file *fp = NULL;
 	struct inode *ino = current_fh->fh_dentry->d_inode;
 	struct nfs4_stateid *stp = NULL;
+	struct nfs4_delegation *dp = NULL;
 	int status;
 
 	status = nfserr_inval;
@@ -1734,6 +1760,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	if (fp) {
 		if ((status = nfs4_check_open(fp, open, &stp)))
 			goto out;
+		nfs4_check_deleg(fp, open, &dp);
 	} else {
 		status = nfserr_resource;
 		fp = alloc_init_file(ino);
@@ -1757,7 +1784,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 			flags = MAY_WRITE;
 		else
 			flags = MAY_READ;
-		if ((status = nfs4_new_open(rqstp, &stp, current_fh, flags)))
+		status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
+		if (status)
 			goto out;
 		init_stateid(stp, fp, open);
 		status = nfsd4_truncate(rqstp, current_fh, open);
-- 
cgit v1.2.2


From c44c5eeb2c022ddac98a8543c08dc8ff820561dc Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:02:54 -0700
Subject: [PATCH] nfsd4: add open state code for CLAIM_DELEGATE_CUR

State logic for OPEN with claim type CLAIM_DELEGATE_CUR, which the NFSv4
client uses to report local OPENs on a delegated file back to the NFSv4
server.

nfs4_check_deleg() performs input delegation stateid lookup and sanity check.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3791c9d84d..8ac0c9abe9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1547,22 +1547,28 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
 	return NULL;
 }
 
-static void
+static int
 nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open,
 		struct nfs4_delegation **dp)
 {
 	int flags;
-	int status;
+	int status = nfserr_bad_stateid;
 
 	*dp = find_delegation_file(fp, &open->op_delegate_stateid);
 	if (*dp == NULL)
-		return;
+		goto out;
 	flags = open->op_share_access == NFS4_SHARE_ACCESS_READ ?
 						RD_STATE : WR_STATE;
 	status = nfs4_check_delegmode(*dp, flags);
 	if (status)
 		*dp = NULL;
-	return;
+out:
+	if (open->op_claim_type != NFS4_OPEN_CLAIM_DELEGATE_CUR)
+		return nfs_ok;
+	if (status)
+		return status;
+	open->op_stateowner->so_confirmed = 1;
+	return nfs_ok;
 }
 
 static int
@@ -1760,8 +1766,13 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	if (fp) {
 		if ((status = nfs4_check_open(fp, open, &stp)))
 			goto out;
-		nfs4_check_deleg(fp, open, &dp);
+		status = nfs4_check_deleg(fp, open, &dp);
+		if (status)
+			goto out;
 	} else {
+		status = nfserr_bad_stateid;
+		if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
+			goto out;
 		status = nfserr_resource;
 		fp = alloc_init_file(ino);
 		if (fp == NULL)
-- 
cgit v1.2.2


From 0dd3c19212961453817f219cd6200981c38564bc Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:02:56 -0700
Subject: [PATCH] nfsd4: support CLAIM_DELEGATE_CUR

Add OPEN claim type NFS4_OPEN_CLAIM_DELEGATE_CUR to nfsd4_open().

A delegation stateid and a name are provided.  OPEN with O_CREAT is not legal
with this claim type; otherwise, use the NFS4_OPEN_CLAIM_NULL code path to
lookup the filename to be opened.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e8158741e8..eb8c1337d9 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -198,6 +198,11 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 	if (status)
 		goto out;
 	switch (open->op_claim_type) {
+		case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+			status = nfserr_inval;
+			if (open->op_create)
+				goto out;
+			/* fall through */
 		case NFS4_OPEN_CLAIM_NULL:
 			/*
 			 * (1) set CURRENT_FH to the file being opened,
@@ -220,7 +225,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 			if (status)
 				goto out;
 			break;
-		case NFS4_OPEN_CLAIM_DELEGATE_CUR:
              	case NFS4_OPEN_CLAIM_DELEGATE_PREV:
 			printk("NFSD: unsupported OPEN claim type %d\n",
 				open->op_claim_type);
-- 
cgit v1.2.2


From 496400014f22c4dbdbc1e89249a2feba46939708 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:02:58 -0700
Subject: [PATCH] nfsd4: fix fh_expire_type

We're returning NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME for the
fh_expire_type attribute.  This is incorrect:
	1. The spec actually only allows NOEXPIRE_WITH_OPEN when
	   VOLATILE_ANY is also set.
	2. Filehandles for open files can expire, if the file is removed
	   and there is a reboot.
	3. Filehandles are only volatile on rename in the nosubtree check
	   case.

Unfortunately, there's no way to indicate that we only expire on remove.  So
our only choice is FH4_VOLATILE_ANY.  Although it's redundant, we also set
FH4_VOL_RENAME in the subtree check case, since subtreecheck does actually
cause problems in practice and it seems possibly useful to give clients some
way to distinguish that case.

Fix a mispelled #define while we're at it.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4xdr.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 36a058a112..0ae1467c3b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1366,7 +1366,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 	if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
 		if ((buflen -= 4) < 0)
 			goto out_resource;
-		WRITE32( NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME );
+		if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
+			WRITE32(NFS4_FH_VOLATILE_ANY);
+		else
+			WRITE32(NFS4_FH_VOLATILE_ANY|NFS4_FH_VOL_RENAME);
 	}
 	if (bmval0 & FATTR4_WORD0_CHANGE) {
 		/*
-- 
cgit v1.2.2


From c815afc73eeef089922449857ca4ed4d2e8950cb Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:00 -0700
Subject: [PATCH] nfsd4: block metadata ops during grace period

We currently return err_grace if a user attempts a non-reclaim open during the
grace period.  But we also need to prevent renames and removes, at least, to
ensure clients have the chance to recover state on files before they are moved
or deleted.

Of course, local users could also do renames and removes during the lease
period, and there's not much we can do about that.  This at least will help
with remote users.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index eb8c1337d9..ac4878ac22 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -536,6 +536,8 @@ nfsd4_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_rem
 {
 	int status;
 
+	if (nfs4_in_grace())
+		return nfserr_grace;
 	status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen);
 	if (status == nfserr_symlink)
 		return nfserr_notdir;
@@ -554,6 +556,9 @@ nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh,
 
 	if (!save_fh->fh_dentry)
 		return status;
+	if (nfs4_in_grace() && !(save_fh->fh_export->ex_flags
+					& NFSEXP_NOSUBTREECHECK))
+		return nfserr_grace;
 	status = nfsd_rename(rqstp, save_fh, rename->rn_sname,
 			     rename->rn_snamelen, current_fh,
 			     rename->rn_tname, rename->rn_tnamelen);
-- 
cgit v1.2.2


From e60d4398a7c20fbe9c4a6cc39d7188ef9f65d2f1 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:01 -0700
Subject: [PATCH] nfsd4: slabify nfs4_files

The structures the server uses to keep track of various pieces of nfsv4 state
(open files, outstanding delegations, etc.) are likely to be allocated and
deallocated frequently and seem reasonable candidates for slab caches.

While we're at it, the slab code keeps statistics that help catch leaks and
such, so we may as well take this chance to eliminate some debugging counters
that we've been keeping ourselves.

Start with the struct nfs4_file.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 58 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 34 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8ac0c9abe9..260c1cbe25 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -70,8 +70,6 @@ u32 list_add_perfile = 0;
 u32 list_del_perfile = 0;
 u32 add_perclient = 0;
 u32 del_perclient = 0;
-u32 alloc_file = 0;
-u32 free_file = 0;
 u32 vfsopen = 0;
 u32 vfsclose = 0;
 u32 alloc_delegation= 0;
@@ -90,6 +88,9 @@ static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
  */
 static DECLARE_MUTEX(client_sema);
 
+kmem_cache_t *stateowner_slab = NULL;
+kmem_cache_t *file_slab = NULL;
+
 void
 nfs4_lock_state(void)
 {
@@ -961,14 +962,14 @@ alloc_init_file(struct inode *ino)
 	struct nfs4_file *fp;
 	unsigned int hashval = file_hashval(ino);
 
-	if ((fp = kmalloc(sizeof(struct nfs4_file),GFP_KERNEL))) {
+	fp = kmem_cache_alloc(file_slab, GFP_KERNEL);
+	if (fp) {
 		INIT_LIST_HEAD(&fp->fi_hash);
 		INIT_LIST_HEAD(&fp->fi_perfile);
 		INIT_LIST_HEAD(&fp->fi_del_perfile);
 		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
 		fp->fi_inode = igrab(ino);
 		fp->fi_id = current_fileid++;
-		alloc_file++;
 		return fp;
 	}
 	return NULL;
@@ -992,29 +993,41 @@ release_all_files(void)
 	}
 }
 
-kmem_cache_t *stateowner_slab = NULL;
-
-static int
-nfsd4_init_slabs(void)
+static void
+nfsd4_free_slab(kmem_cache_t **slab)
 {
-	stateowner_slab = kmem_cache_create("nfsd4_stateowners",
-			sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL);
-	if (stateowner_slab == NULL) {
-		dprintk("nfsd4: out of memory while initializing nfsv4\n");
-		return -ENOMEM;
-	}
-	return 0;
+	int status;
+
+	if (*slab == NULL)
+		return;
+	status = kmem_cache_destroy(*slab);
+	*slab = NULL;
+	WARN_ON(status);
 }
 
 static void
 nfsd4_free_slabs(void)
 {
-	int status = 0;
+	nfsd4_free_slab(&stateowner_slab);
+	nfsd4_free_slab(&file_slab);
+}
 
-	if (stateowner_slab)
-		status = kmem_cache_destroy(stateowner_slab);
-	stateowner_slab = NULL;
-	BUG_ON(status);
+static int
+nfsd4_init_slabs(void)
+{
+	stateowner_slab = kmem_cache_create("nfsd4_stateowners",
+			sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL);
+	if (stateowner_slab == NULL)
+		goto out_nomem;
+	file_slab = kmem_cache_create("nfsd4_files",
+			sizeof(struct nfs4_file), 0, 0, NULL, NULL);
+	if (file_slab == NULL)
+		goto out_nomem;
+	return 0;
+out_nomem:
+	nfsd4_free_slabs();
+	dprintk("nfsd4: out of memory while initializing nfsv4\n");
+	return -ENOMEM;
 }
 
 void
@@ -1167,10 +1180,9 @@ release_stateid(struct nfs4_stateid *stp, int flags)
 static void
 release_file(struct nfs4_file *fp)
 {
-	free_file++;
 	list_del(&fp->fi_hash);
 	iput(fp->fi_inode);
-	kfree(fp);
+	kmem_cache_free(file_slab, fp);
 }	
 
 void
@@ -3286,8 +3298,6 @@ __nfs4_state_shutdown(void)
 			list_add_perfile, list_del_perfile);
 	dprintk("NFSD: add_perclient %d del_perclient %d\n",
 			add_perclient, del_perclient);
-	dprintk("NFSD: alloc_file %d free_file %d\n",
-			alloc_file, free_file);
 	dprintk("NFSD: vfsopen %d vfsclose %d\n",
 			vfsopen, vfsclose);
 	dprintk("NFSD: alloc_delegation %d free_delegation %d\n",
-- 
cgit v1.2.2


From 5ac049ac66416bbe84923f7c2384f23f6ee4aa88 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:03 -0700
Subject: [PATCH] nfsd4: slabify stateids

Allocate stateid's from a slab cache.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 260c1cbe25..c5fce309d8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -90,6 +90,7 @@ static DECLARE_MUTEX(client_sema);
 
 kmem_cache_t *stateowner_slab = NULL;
 kmem_cache_t *file_slab = NULL;
+kmem_cache_t *stateid_slab = NULL;
 
 void
 nfs4_lock_state(void)
@@ -1010,6 +1011,7 @@ nfsd4_free_slabs(void)
 {
 	nfsd4_free_slab(&stateowner_slab);
 	nfsd4_free_slab(&file_slab);
+	nfsd4_free_slab(&stateid_slab);
 }
 
 static int
@@ -1023,6 +1025,10 @@ nfsd4_init_slabs(void)
 			sizeof(struct nfs4_file), 0, 0, NULL, NULL);
 	if (file_slab == NULL)
 		goto out_nomem;
+	stateid_slab = kmem_cache_create("nfsd4_stateids",
+			sizeof(struct nfs4_stateid), 0, 0, NULL, NULL);
+	if (stateid_slab == NULL)
+		goto out_nomem;
 	return 0;
 out_nomem:
 	nfsd4_free_slabs();
@@ -1173,7 +1179,7 @@ release_stateid(struct nfs4_stateid *stp, int flags)
 		vfsclose++;
 	} else if (flags & LOCK_STATE)
 		locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
-	kfree(stp);
+	kmem_cache_free(stateid_slab, stp);
 	stp = NULL;
 }
 
@@ -1606,6 +1612,12 @@ out:
 	return status;
 }
 
+static inline struct nfs4_stateid *
+nfs4_alloc_stateid(void)
+{
+	return kmem_cache_alloc(stateid_slab, GFP_KERNEL);
+}
+
 static int
 nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
 		struct nfs4_delegation *dp,
@@ -1613,7 +1625,7 @@ nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
 {
 	struct nfs4_stateid *stp;
 
-	stp = kmalloc(sizeof(struct nfs4_stateid), GFP_KERNEL);
+	stp = nfs4_alloc_stateid();
 	if (stp == NULL)
 		return nfserr_resource;
 
@@ -1627,7 +1639,7 @@ nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
 		if (status) {
 			if (status == nfserr_dropit)
 				status = nfserr_jukebox;
-			kfree(stp);
+			kmem_cache_free(stateid_slab, stp);
 			return status;
 		}
 	}
@@ -2627,8 +2639,8 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
 	struct nfs4_stateid *stp;
 	unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
 
-	if ((stp = kmalloc(sizeof(struct nfs4_stateid), 
-					GFP_KERNEL)) == NULL)
+	stp = nfs4_alloc_stateid();
+	if (stp == NULL)
 		goto out;
 	INIT_LIST_HEAD(&stp->st_hash);
 	INIT_LIST_HEAD(&stp->st_perfile);
-- 
cgit v1.2.2


From 5b2d21c1965859acc881dd862b6ebbfae67cdc14 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:04 -0700
Subject: [PATCH] nfsd4: slabify delegations

Allocate delegations from a slab cache.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c5fce309d8..927d2007d5 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -72,8 +72,6 @@ u32 add_perclient = 0;
 u32 del_perclient = 0;
 u32 vfsopen = 0;
 u32 vfsclose = 0;
-u32 alloc_delegation= 0;
-u32 free_delegation= 0;
 
 /* forward declarations */
 struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
@@ -91,6 +89,7 @@ static DECLARE_MUTEX(client_sema);
 kmem_cache_t *stateowner_slab = NULL;
 kmem_cache_t *file_slab = NULL;
 kmem_cache_t *stateid_slab = NULL;
+kmem_cache_t *deleg_slab = NULL;
 
 void
 nfs4_lock_state(void)
@@ -138,8 +137,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 	struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
 
 	dprintk("NFSD alloc_init_deleg\n");
-	if ((dp = kmalloc(sizeof(struct nfs4_delegation),
-		GFP_KERNEL)) == NULL)
+	dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
+	if (dp == NULL)
 		return dp;
 	INIT_LIST_HEAD(&dp->dl_del_perfile);
 	INIT_LIST_HEAD(&dp->dl_del_perclnt);
@@ -164,7 +163,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 	atomic_set(&dp->dl_count, 1);
 	list_add(&dp->dl_del_perfile, &fp->fi_del_perfile);
 	list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt);
-	alloc_delegation++;
 	return dp;
 }
 
@@ -173,8 +171,7 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
 {
 	if (atomic_dec_and_test(&dp->dl_count)) {
 		dprintk("NFSD: freeing dp %p\n",dp);
-		kfree(dp);
-		free_delegation++;
+		kmem_cache_free(deleg_slab, dp);
 	}
 }
 
@@ -1012,6 +1009,7 @@ nfsd4_free_slabs(void)
 	nfsd4_free_slab(&stateowner_slab);
 	nfsd4_free_slab(&file_slab);
 	nfsd4_free_slab(&stateid_slab);
+	nfsd4_free_slab(&deleg_slab);
 }
 
 static int
@@ -1029,6 +1027,10 @@ nfsd4_init_slabs(void)
 			sizeof(struct nfs4_stateid), 0, 0, NULL, NULL);
 	if (stateid_slab == NULL)
 		goto out_nomem;
+	deleg_slab = kmem_cache_create("nfsd4_delegations",
+			sizeof(struct nfs4_delegation), 0, 0, NULL, NULL);
+	if (deleg_slab == NULL)
+		goto out_nomem;
 	return 0;
 out_nomem:
 	nfsd4_free_slabs();
@@ -3312,9 +3314,6 @@ __nfs4_state_shutdown(void)
 			add_perclient, del_perclient);
 	dprintk("NFSD: vfsopen %d vfsclose %d\n",
 			vfsopen, vfsclose);
-	dprintk("NFSD: alloc_delegation %d free_delegation %d\n",
-			alloc_delegation, free_delegation);
-
 }
 
 void
-- 
cgit v1.2.2


From 6fa305ded4cc859deb4727ad9b25df0bbc064e99 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:06 -0700
Subject: [PATCH] nfsd4: remove debugging counters

These remaining debugging counters haven't proved that useful.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 25 +------------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 927d2007d5..f03a4180fa 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -65,14 +65,6 @@ static u32 nfs4_init;
 stateid_t zerostateid;             /* bits all 0 */
 stateid_t onestateid;              /* bits all 1 */
 
-/* debug counters */
-u32 list_add_perfile = 0; 
-u32 list_del_perfile = 0;
-u32 add_perclient = 0;
-u32 del_perclient = 0;
-u32 vfsopen = 0;
-u32 vfsclose = 0;
-
 /* forward declarations */
 struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
 static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
@@ -192,7 +184,6 @@ nfs4_close_delegation(struct nfs4_delegation *dp)
 	if (dp->dl_flock)
 		setlease(filp, F_UNLCK, &dp->dl_flock);
 	nfsd_close(filp);
-	vfsclose++;
 }
 
 /* Called under the state lock. */
@@ -1083,7 +1074,6 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
 	list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]);
 	list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]);
 	list_add(&sop->so_perclient, &clp->cl_perclient);
-	add_perclient++;
 	sop->so_is_open_owner = 1;
 	sop->so_id = current_ownerid++;
 	sop->so_client = clp;
@@ -1117,10 +1107,8 @@ unhash_stateowner(struct nfs4_stateowner *sop)
 
 	list_del(&sop->so_idhash);
 	list_del(&sop->so_strhash);
-	if (sop->so_is_open_owner) {
+	if (sop->so_is_open_owner)
 		list_del(&sop->so_perclient);
-		del_perclient++;
-	}
 	list_del(&sop->so_perlockowner);
 	while (!list_empty(&sop->so_perfilestate)) {
 		stp = list_entry(sop->so_perfilestate.next, 
@@ -1151,7 +1139,6 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
 	INIT_LIST_HEAD(&stp->st_perfile);
 	list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
 	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
-	list_add_perfile++;
 	list_add(&stp->st_perfile, &fp->fi_perfile);
 	stp->st_stateowner = sop;
 	stp->st_file = fp;
@@ -1171,14 +1158,12 @@ release_stateid(struct nfs4_stateid *stp, int flags)
 	struct file *filp = stp->st_vfs_file;
 
 	list_del(&stp->st_hash);
-	list_del_perfile++;
 	list_del(&stp->st_perfile);
 	list_del(&stp->st_perfilestate);
 	if (flags & OPEN_STATE) {
 		release_stateid_lockowners(stp);
 		stp->st_vfs_file = NULL;
 		nfsd_close(filp);
-		vfsclose++;
 	} else if (flags & LOCK_STATE)
 		locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
 	kmem_cache_free(stateid_slab, stp);
@@ -1645,7 +1630,6 @@ nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
 			return status;
 		}
 	}
-	vfsopen++;
 	*stpp = stp;
 	return 0;
 }
@@ -2650,7 +2634,6 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
 	INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */
 	list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
 	list_add(&stp->st_perfile, &fp->fi_perfile);
-	list_add_perfile++;
 	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
 	stp->st_stateowner = sop;
 	stp->st_file = fp;
@@ -3308,12 +3291,6 @@ __nfs4_state_shutdown(void)
 	cancel_delayed_work(&laundromat_work);
 	flush_scheduled_work();
 	nfs4_init = 0;
-	dprintk("NFSD: list_add_perfile %d list_del_perfile %d\n",
-			list_add_perfile, list_del_perfile);
-	dprintk("NFSD: add_perclient %d del_perclient %d\n",
-			add_perclient, del_perclient);
-	dprintk("NFSD: vfsopen %d vfsclose %d\n",
-			vfsopen, vfsclose);
 }
 
 void
-- 
cgit v1.2.2


From 8beefa249371f55432394ac96864c83b0b309c28 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:08 -0700
Subject: [PATCH] nfsd4: rename nfs4_file fields

Trivial renaming patch:

I can never remember, while looking at various lists relating the nfsd4 state
structures, which are the "heads" and which are items on other lists, or which
structures are actually on the various lists.  The following convention helps
me: given structures foo and bar, with foo containing the head of a list of
bars, use "bars" for the name of the head of the list contained in the struct
foo, and use "per_foo" for the entries in the struct bars.

Go ahead and do this for struct nfs4_file.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f03a4180fa..a84a80e8c0 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -153,7 +153,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 		        current_fh->fh_handle.fh_size);
 	dp->dl_time = 0;
 	atomic_set(&dp->dl_count, 1);
-	list_add(&dp->dl_del_perfile, &fp->fi_del_perfile);
+	list_add(&dp->dl_del_perfile, &fp->fi_delegations);
 	list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt);
 	return dp;
 }
@@ -954,8 +954,8 @@ alloc_init_file(struct inode *ino)
 	fp = kmem_cache_alloc(file_slab, GFP_KERNEL);
 	if (fp) {
 		INIT_LIST_HEAD(&fp->fi_hash);
-		INIT_LIST_HEAD(&fp->fi_perfile);
-		INIT_LIST_HEAD(&fp->fi_del_perfile);
+		INIT_LIST_HEAD(&fp->fi_stateids);
+		INIT_LIST_HEAD(&fp->fi_delegations);
 		list_add(&fp->fi_hash, &file_hashtbl[hashval]);
 		fp->fi_inode = igrab(ino);
 		fp->fi_id = current_fileid++;
@@ -974,7 +974,7 @@ release_all_files(void)
 		while (!list_empty(&file_hashtbl[i])) {
 			fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash);
 			/* this should never be more than once... */
-			if (!list_empty(&fp->fi_perfile) || !list_empty(&fp->fi_del_perfile)) {
+			if (!list_empty(&fp->fi_stateids) || !list_empty(&fp->fi_delegations)) {
 				printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp);
 			}
 			release_file(fp);
@@ -1139,7 +1139,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
 	INIT_LIST_HEAD(&stp->st_perfile);
 	list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
 	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
-	list_add(&stp->st_perfile, &fp->fi_perfile);
+	list_add(&stp->st_perfile, &fp->fi_stateids);
 	stp->st_stateowner = sop;
 	stp->st_file = fp;
 	stp->st_stateid.si_boot = boot_time;
@@ -1204,7 +1204,7 @@ release_state_owner(struct nfs4_stateid *stp, int flag)
 	if (sop->so_confirmed && list_empty(&sop->so_perfilestate))
 		move_to_close_lru(sop);
 	/* unused nfs4_file's are releseed. XXX slab cache? */
-	if (list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) {
+	if (list_empty(&fp->fi_stateids) && list_empty(&fp->fi_delegations)) {
 		release_file(fp);
 	}
 }
@@ -1294,7 +1294,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
 	fp = find_file(ino);
 	if (fp) {
 	/* Search for conflicting share reservations */
-		list_for_each_entry(stp, &fp->fi_perfile, st_perfile) {
+		list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
 			if (test_bit(deny_type, &stp->st_deny_bmap) ||
 			    test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
 				return nfserr_share_denied;
@@ -1545,7 +1545,7 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
 {
 	struct nfs4_delegation *dp;
 
-	list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
+	list_for_each_entry(dp, &fp->fi_delegations, dl_del_perfile) {
 		if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
 			return dp;
 	}
@@ -1583,7 +1583,7 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_state
 	int status = nfserr_share_denied;
 	struct nfs4_stateowner *sop = open->op_stateowner;
 
-	list_for_each_entry(local, &fp->fi_perfile, st_perfile) {
+	list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
 		/* ignore lock owners */
 		if (local->st_stateowner->so_is_open_owner == 0)
 			continue;
@@ -1830,7 +1830,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	            stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
 out:
 	/* take the opportunity to clean up unused state */
-	if (fp && list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile))
+	if (fp && list_empty(&fp->fi_stateids) && list_empty(&fp->fi_delegations))
 		release_file(fp);
 
 	/* CLAIM_PREVIOUS has different error returns */
@@ -2633,7 +2633,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
 	INIT_LIST_HEAD(&stp->st_perfilestate);
 	INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */
 	list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
-	list_add(&stp->st_perfile, &fp->fi_perfile);
+	list_add(&stp->st_perfile, &fp->fi_stateids);
 	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
 	stp->st_stateowner = sop;
 	stp->st_file = fp;
-- 
cgit v1.2.2


From 13cd21845d6a9729ca95e36ae6e8c669623fbfd4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:10 -0700
Subject: [PATCH] nfsd4: reference count struct nfs4_file

Add a struct kref to each nfs4_file and take a reference to it from each
stateid and delegation that refers to it.  The atomicity guarantees are
overkill given that all this stuff is done under the single nfsd4 state lock,
but a) we'd like finer-grained locking some day, and b) this simplifies the
cleanup of the structures a bit, something that has previously been a bit
complicated and bug-prone.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 100 +++++++++++++++++++++++++++-------------------------
 1 file changed, 51 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a84a80e8c0..6ba428afa4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -111,7 +111,6 @@ opaque_hashval(const void *ptr, int nbytes)
 /* forward declarations */
 static void release_stateowner(struct nfs4_stateowner *sop);
 static void release_stateid(struct nfs4_stateid *stp, int flags);
-static void release_file(struct nfs4_file *fp);
 
 /*
  * Delegation state
@@ -121,6 +120,27 @@ static void release_file(struct nfs4_file *fp);
 spinlock_t recall_lock;
 static struct list_head del_recall_lru;
 
+static void
+free_nfs4_file(struct kref *kref)
+{
+	struct nfs4_file *fp = container_of(kref, struct nfs4_file, fi_ref);
+	list_del(&fp->fi_hash);
+	iput(fp->fi_inode);
+	kmem_cache_free(file_slab, fp);
+}
+
+static inline void
+put_nfs4_file(struct nfs4_file *fi)
+{
+	kref_put(&fi->fi_ref, free_nfs4_file);
+}
+
+static inline void
+get_nfs4_file(struct nfs4_file *fi)
+{
+	kref_get(&fi->fi_ref);
+}
+
 static struct nfs4_delegation *
 alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
 {
@@ -136,6 +156,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 	INIT_LIST_HEAD(&dp->dl_del_perclnt);
 	INIT_LIST_HEAD(&dp->dl_recall_lru);
 	dp->dl_client = clp;
+	get_nfs4_file(fp);
 	dp->dl_file = fp;
 	dp->dl_flock = NULL;
 	get_file(stp->st_vfs_file);
@@ -163,6 +184,7 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
 {
 	if (atomic_dec_and_test(&dp->dl_count)) {
 		dprintk("NFSD: freeing dp %p\n",dp);
+		put_nfs4_file(dp->dl_file);
 		kmem_cache_free(deleg_slab, dp);
 	}
 }
@@ -953,6 +975,7 @@ alloc_init_file(struct inode *ino)
 
 	fp = kmem_cache_alloc(file_slab, GFP_KERNEL);
 	if (fp) {
+		kref_init(&fp->fi_ref);
 		INIT_LIST_HEAD(&fp->fi_hash);
 		INIT_LIST_HEAD(&fp->fi_stateids);
 		INIT_LIST_HEAD(&fp->fi_delegations);
@@ -964,24 +987,6 @@ alloc_init_file(struct inode *ino)
 	return NULL;
 }
 
-static void
-release_all_files(void)
-{
-	int i;
-	struct nfs4_file *fp;
-
-	for (i=0;i<FILE_HASH_SIZE;i++) {
-		while (!list_empty(&file_hashtbl[i])) {
-			fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash);
-			/* this should never be more than once... */
-			if (!list_empty(&fp->fi_stateids) || !list_empty(&fp->fi_delegations)) {
-				printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp);
-			}
-			release_file(fp);
-		}
-	}
-}
-
 static void
 nfsd4_free_slab(kmem_cache_t **slab)
 {
@@ -1141,6 +1146,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
 	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
 	list_add(&stp->st_perfile, &fp->fi_stateids);
 	stp->st_stateowner = sop;
+	get_nfs4_file(fp);
 	stp->st_file = fp;
 	stp->st_stateid.si_boot = boot_time;
 	stp->st_stateid.si_stateownerid = sop->so_id;
@@ -1166,18 +1172,11 @@ release_stateid(struct nfs4_stateid *stp, int flags)
 		nfsd_close(filp);
 	} else if (flags & LOCK_STATE)
 		locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
+	put_nfs4_file(stp->st_file);
 	kmem_cache_free(stateid_slab, stp);
 	stp = NULL;
 }
 
-static void
-release_file(struct nfs4_file *fp)
-{
-	list_del(&fp->fi_hash);
-	iput(fp->fi_inode);
-	kmem_cache_free(file_slab, fp);
-}	
-
 void
 move_to_close_lru(struct nfs4_stateowner *sop)
 {
@@ -1192,7 +1191,6 @@ void
 release_state_owner(struct nfs4_stateid *stp, int flag)
 {
 	struct nfs4_stateowner *sop = stp->st_stateowner;
-	struct nfs4_file *fp = stp->st_file;
 
 	dprintk("NFSD: release_state_owner\n");
 	release_stateid(stp, flag);
@@ -1203,10 +1201,6 @@ release_state_owner(struct nfs4_stateid *stp, int flag)
 	 */
 	if (sop->so_confirmed && list_empty(&sop->so_perfilestate))
 		move_to_close_lru(sop);
-	/* unused nfs4_file's are releseed. XXX slab cache? */
-	if (list_empty(&fp->fi_stateids) && list_empty(&fp->fi_delegations)) {
-		release_file(fp);
-	}
 }
 
 static int
@@ -1236,8 +1230,10 @@ find_file(struct inode *ino)
 	struct nfs4_file *fp;
 
 	list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
-		if (fp->fi_inode == ino)
+		if (fp->fi_inode == ino) {
+			get_nfs4_file(fp);
 			return fp;
+		}
 	}
 	return NULL;
 }
@@ -1288,19 +1284,24 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
 	struct inode *ino = current_fh->fh_dentry->d_inode;
 	struct nfs4_file *fp;
 	struct nfs4_stateid *stp;
+	int ret;
 
 	dprintk("NFSD: nfs4_share_conflict\n");
 
 	fp = find_file(ino);
-	if (fp) {
+	if (!fp)
+		return nfs_ok;
+	ret = nfserr_share_denied;
 	/* Search for conflicting share reservations */
-		list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
-			if (test_bit(deny_type, &stp->st_deny_bmap) ||
-			    test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
-				return nfserr_share_denied;
-		}
+	list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
+		if (test_bit(deny_type, &stp->st_deny_bmap) ||
+		    test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
+			goto out;
 	}
-	return nfs_ok;
+	ret = nfs_ok;
+out:
+	put_nfs4_file(fp);
+	return ret;
 }
 
 static inline void
@@ -1829,10 +1830,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	            stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
 	            stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
 out:
-	/* take the opportunity to clean up unused state */
-	if (fp && list_empty(&fp->fi_stateids) && list_empty(&fp->fi_delegations))
-		release_file(fp);
-
+	if (fp)
+		put_nfs4_file(fp);
 	/* CLAIM_PREVIOUS has different error returns */
 	nfs4_set_claim_prev(open, &status);
 	/*
@@ -2480,16 +2479,19 @@ find_stateid(stateid_t *stid, int flags)
 static struct nfs4_delegation *
 find_delegation_stateid(struct inode *ino, stateid_t *stid)
 {
-	struct nfs4_file *fp = NULL;
+	struct nfs4_file *fp;
+	struct nfs4_delegation *dl;
 
 	dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
                     stid->si_boot, stid->si_stateownerid,
                     stid->si_fileid, stid->si_generation);
 
 	fp = find_file(ino);
-	if (fp)
-		return find_delegation_file(fp, stid);
-	return NULL;
+	if (!fp)
+		return NULL;
+	dl = find_delegation_file(fp, stid);
+	put_nfs4_file(fp);
+	return dl;
 }
 
 /*
@@ -2636,6 +2638,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
 	list_add(&stp->st_perfile, &fp->fi_stateids);
 	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
 	stp->st_stateowner = sop;
+	get_nfs4_file(fp);
 	stp->st_file = fp;
 	stp->st_stateid.si_boot = boot_time;
 	stp->st_stateid.si_stateownerid = sop->so_id;
@@ -3287,7 +3290,6 @@ __nfs4_state_shutdown(void)
 		unhash_delegation(dp);
 	}
 
-	release_all_files();
 	cancel_delayed_work(&laundromat_work);
 	flush_scheduled_work();
 	nfs4_init = 0;
-- 
cgit v1.2.2


From 46be925fa6f4796e732e16a020fa0ef9d48ea7c8 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:11 -0700
Subject: [PATCH] knfsd: lockd: flush signals on shutdown

Silence another annoying "failed to contact portmap (errno -512)" on shutdown.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/svc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index b82e470912..6e242556b9 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -191,7 +191,9 @@ lockd(struct svc_rqst *rqstp)
 		printk(KERN_DEBUG
 			"lockd: new process, skipping host shutdown\n");
 	wake_up(&lockd_exit);
-		
+
+	flush_signals(current);
+
 	/* Exit the RPC thread */
 	svc_exit_thread(rqstp);
 
-- 
cgit v1.2.2


From 7e06b7f9e9537cb826f3cff95816fc4384b67806 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:13 -0700
Subject: [PATCH] knfsd: nfs4: hold filp while reading or writing

We're trying to read and write from a struct file that we may not hold a
reference to any more (since a close could be processed as soon as we drop the
state lock).

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ac4878ac22..d71f14517b 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -45,6 +45,7 @@
 #include <linux/param.h>
 #include <linux/major.h>
 #include <linux/slab.h>
+#include <linux/file.h>
 
 #include <linux/sunrpc/svc.h>
 #include <linux/nfsd/nfsd.h>
@@ -477,26 +478,27 @@ static inline int
 nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read)
 {
 	int status;
-	struct file *filp = NULL;
 
 	/* no need to check permission - this will be done in nfsd_read() */
 
+	read->rd_filp = NULL;
 	if (read->rd_offset >= OFFSET_MAX)
 		return nfserr_inval;
 
 	nfs4_lock_state();
 	/* check stateid */
 	if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid,
-					CHECK_FH | RD_STATE, &filp))) {
+				CHECK_FH | RD_STATE, &read->rd_filp))) {
 		dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
 		goto out;
 	}
+	if (read->rd_filp)
+		get_file(read->rd_filp);
 	status = nfs_ok;
 out:
 	nfs4_unlock_state();
 	read->rd_rqstp = rqstp;
 	read->rd_fhp = current_fh;
-	read->rd_filp = filp;
 	return status;
 }
 
@@ -633,6 +635,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
 		dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
 		goto out;
 	}
+	if (filp)
+		get_file(filp);
 	nfs4_unlock_state();
 
 	write->wr_bytes_written = write->wr_buflen;
@@ -644,6 +648,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
 	status =  nfsd_write(rqstp, current_fh, filp, write->wr_offset,
 			write->wr_vec, write->wr_vlen, write->wr_buflen,
 			&write->wr_how_written);
+	if (filp)
+		fput(filp);
 
 	if (status == nfserr_symlink)
 		status = nfserr_inval;
@@ -932,6 +938,9 @@ encode_op:
 			nfs4_put_stateowner(replay_owner);
 			replay_owner = NULL;
 		}
+		/* XXX Ugh, we need to get rid of this kind of special case: */
+		if (op->opnum == OP_READ && op->u.read.rd_filp)
+			fput(op->u.read.rd_filp);
 	}
 
 out:
-- 
cgit v1.2.2


From 5ba266d6323e957588712f6a7d31252cd6b797bb Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:15 -0700
Subject: [PATCH] knfsd: nfsd4: fix probe_callback

rpc_create_client was modified recently to do its own (synchronous) NULL ping
of the server.  We'd rather do that on our own, asynchronously, so that we
don't have to block the nfsd thread doing the probe, and so that setclientid
handling (hence, client mounts) can proceed normally whether the callback is
succesful or not.  (We can still function fine without the callback
channel--we just won't be able to give out delegations till it's verified to
work.)

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4callback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 634465e9cf..38c3e1c47d 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -427,7 +427,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	 * XXX AUTH_UNIX only - need AUTH_GSS....
 	 */
 	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
-	clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
+	clnt = rpc_new_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
 	if (IS_ERR(clnt)) {
 		dprintk("NFSD: couldn't create callback client\n");
 		goto out_err;
-- 
cgit v1.2.2


From dfc8356570b6fcb4035c7d916ade5bbbe6c3b50a Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:16 -0700
Subject: [PATCH] knfsd: nfsd4: nfs4_check_open_reclaim cleanup

Minor cleanup.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6ba428afa4..2a5f00b0ee 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3155,11 +3155,7 @@ nfs4_find_reclaim_client(clientid_t *clid)
 int
 nfs4_check_open_reclaim(clientid_t *clid)
 {
-	struct nfs4_client_reclaim *crp;
-
-	if ((crp = nfs4_find_reclaim_client(clid)) == NULL)
-		return nfserr_reclaim_bad;
-	return nfs_ok;
+	return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
 }
 
 
-- 
cgit v1.2.2


From 58da282b733cff4caef805c6555c7a3b90772946 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:19 -0700
Subject: [PATCH] knfsd: nfsd4: create separate laundromat workqueue

We're running the laundromat work on the default kevent worker thread.  But
the laundromat takes the nfsv4 state semaphore, which is used for way too much
stuff, and the potential for deadlocks is high.  Better to have this on a
separate workqueue.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2a5f00b0ee..9bec088e76 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1844,6 +1844,7 @@ out:
 	return status;
 }
 
+static struct workqueue_struct *laundry_wq;
 static struct work_struct laundromat_work;
 static void laundromat_main(void *);
 static DECLARE_WORK(laundromat_work, laundromat_main, NULL);
@@ -1951,7 +1952,7 @@ laundromat_main(void *not_used)
 
 	t = nfs4_laundromat();
 	dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
-	schedule_delayed_work(&laundromat_work, t*HZ);
+	queue_delayed_work(laundry_wq, &laundromat_work, t*HZ);
 }
 
 /* search ownerid_hashtbl[] and close_lru for stateid owner
@@ -3211,7 +3212,8 @@ __nfs4_state_init(void)
 		printk("NFSD: starting %ld-second grace period\n", grace_time);
 	grace_end = boot_time + grace_time;
 	INIT_WORK(&laundromat_work,laundromat_main, NULL);
-	schedule_delayed_work(&laundromat_work, NFSD_LEASE_TIME*HZ);
+	laundry_wq = create_singlethread_workqueue("nfsd4");
+	queue_delayed_work(laundry_wq, &laundromat_work, NFSD_LEASE_TIME*HZ);
 }
 
 int
@@ -3287,7 +3289,8 @@ __nfs4_state_shutdown(void)
 	}
 
 	cancel_delayed_work(&laundromat_work);
-	flush_scheduled_work();
+	flush_workqueue(laundry_wq);
+	destroy_workqueue(laundry_wq);
 	nfs4_init = 0;
 }
 
-- 
cgit v1.2.2


From d99a05adf8490cc171b7709554936b8f3ac9e362 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:21 -0700
Subject: [PATCH] knfsd: nfsd4: simplify lease changing

The only way the protocol gives to change the lease time on the fly is to
simulate a reboot.  We don't have that completely right in the current code;
among other things, we should probably put lockd in grace too while we do
this.

For now, let's just keep this simple, and wait till the next time nfsd starts
to register any changes in lease time.  If the administrator really wants to
change the lease time *now*, they can go ahead and bring nfsd down and then
back up again after changing the lease time.

Also remove the "if (reclaim_str_hashtbl_size == 0)" case, a shortcut which
skips the grace period if we know of no clients in need of recovery.  This
isn't going to work well with nlm.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 63 ++++++++++-------------------------------------------
 1 file changed, 12 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9bec088e76..16c9a43218 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -53,7 +53,7 @@
 
 /* Globals */
 static time_t lease_time = 90;     /* default lease time */
-static time_t old_lease_time = 90; /* past incarnation lease time */
+static time_t user_lease_time = 90;
 static u32 nfs4_reclaim_init = 0;
 time_t boot_time;
 static time_t grace_end = 0;
@@ -3205,11 +3205,9 @@ __nfs4_state_init(void)
 	INIT_LIST_HEAD(&del_recall_lru);
 	spin_lock_init(&recall_lock);
 	boot_time = get_seconds();
-	grace_time = max(old_lease_time, lease_time);
-	if (reclaim_str_hashtbl_size == 0)
-		grace_time = 0;
-	if (grace_time)
-		printk("NFSD: starting %ld-second grace period\n", grace_time);
+	grace_time = max(user_lease_time, lease_time);
+	lease_time = user_lease_time;
+	printk("NFSD: starting %ld-second grace period\n", grace_time);
 	grace_end = boot_time + grace_time;
 	INIT_WORK(&laundromat_work,laundromat_main, NULL);
 	laundry_wq = create_singlethread_workqueue("nfsd4");
@@ -3307,53 +3305,16 @@ nfs4_state_shutdown(void)
 /*
  * Called when leasetime is changed.
  *
- * if nfsd is not started, simply set the global lease.
- *
- * if nfsd(s) are running, lease change requires nfsv4 state to be reset.
- * e.g: boot_time is reset, existing nfs4_client structs are
- * used to fill reclaim_str_hashtbl, then all state (except for the
- * reclaim_str_hashtbl) is re-initialized.
- *
- * if the old lease time is greater than the new lease time, the grace
- * period needs to be set to the old lease time to allow clients to reclaim
- * their state. XXX - we may want to set the grace period == lease time
- * after an initial grace period == old lease time
- *
- * if an error occurs in this process, the new lease is set, but the server
- * will not honor OPEN or LOCK reclaims, and will return nfserr_no_grace
- * which means OPEN/LOCK/READ/WRITE will fail during grace period.
- *
- * clients will attempt to reset all state with SETCLIENTID/CONFIRM, and
- * OPEN and LOCK reclaims.
+ * The only way the protocol gives us to handle on-the-fly lease changes is to
+ * simulate a reboot.  Instead of doing that, we just wait till the next time
+ * we start to register any changes in lease time.  If the administrator
+ * really wants to change the lease time *now*, they can go ahead and bring
+ * nfsd down and then back up again after changing the lease time.
  */
 void
 nfs4_reset_lease(time_t leasetime)
 {
-	struct nfs4_client *clp;
-	int i;
-
-	printk("NFSD: New leasetime %ld\n",leasetime);
-	if (!nfs4_init)
-		return;
-	nfs4_lock_state();
-	old_lease_time = lease_time;
-	lease_time = leasetime;
-
-	nfs4_release_reclaim();
-
-	/* populate reclaim_str_hashtbl with current confirmed nfs4_clientid */
-	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
-		list_for_each_entry(clp, &conf_id_hashtbl[i], cl_idhash) {
-			if (!nfs4_client_to_reclaim(clp->cl_name.data,
-						clp->cl_name.len)) {
-				nfs4_release_reclaim();
-				goto init_state;
-			}
-		}
-	}
-init_state:
-	__nfs4_state_shutdown();
-	__nfs4_state_init();
-	nfs4_unlock_state();
+	lock_kernel();
+	user_lease_time = leasetime;
+	unlock_kernel();
 }
-
-- 
cgit v1.2.2


From 7b190fecfa33d72bcf74c9473134c2ad14ae9545 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:23 -0700
Subject: [PATCH] knfsd: nfsd4: delegation recovery

Allow recovery of delegations after reboot.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 36 ++++++++++++++++++++++++++++--------
 fs/nfsd/nfs4xdr.c   |  2 +-
 2 files changed, 29 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 16c9a43218..0f6119714c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1709,14 +1709,30 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 	int status, flag = 0;
 
 	flag = NFS4_OPEN_DELEGATE_NONE;
-	if (open->op_claim_type != NFS4_OPEN_CLAIM_NULL
-	     || !atomic_read(&cb->cb_set) || !sop->so_confirmed)
-		goto out;
-
-	if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
-		flag = NFS4_OPEN_DELEGATE_WRITE;
-	else
-		flag = NFS4_OPEN_DELEGATE_READ;
+	open->op_recall = 0;
+	switch (open->op_claim_type) {
+		case NFS4_OPEN_CLAIM_PREVIOUS:
+			if (!atomic_read(&cb->cb_set))
+				open->op_recall = 1;
+			flag = open->op_delegate_type;
+			if (flag == NFS4_OPEN_DELEGATE_NONE)
+				goto out;
+			break;
+		case NFS4_OPEN_CLAIM_NULL:
+			/* Let's not give out any delegations till everyone's
+			 * had the chance to reclaim theirs.... */
+			if (nfs4_in_grace())
+				goto out;
+			if (!atomic_read(&cb->cb_set) || !sop->so_confirmed)
+				goto out;
+			if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+				flag = NFS4_OPEN_DELEGATE_WRITE;
+			else
+				flag = NFS4_OPEN_DELEGATE_READ;
+			break;
+		default:
+			goto out;
+	}
 
 	dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
 	if (dp == NULL) {
@@ -1750,6 +1766,10 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 	             dp->dl_stateid.si_fileid,
 	             dp->dl_stateid.si_generation);
 out:
+	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
+			&& flag == NFS4_OPEN_DELEGATE_NONE
+			&& open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
+		printk("NFSD: WARNING: refusing delegation reclaim\n");
 	open->op_delegate_type = flag;
 }
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0ae1467c3b..cfe978a72c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1972,7 +1972,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open
 	case NFS4_OPEN_DELEGATE_READ:
 		RESERVE_SPACE(20 + sizeof(stateid_t));
 		WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t));
-		WRITE32(0);
+		WRITE32(open->op_recall);
 
 		/*
 		 * TODO: ACE's in delegations
-- 
cgit v1.2.2


From 76a3550ec50ed86885a10a767ebaebb7c9104721 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:26 -0700
Subject: [PATCH] knfsd: nfsd4: rename nfs4_state_init

Somewhat gratuitous rename to simplify following patch.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 6 +++---
 fs/nfsd/nfssvc.c    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0f6119714c..e00b347285 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3185,7 +3185,7 @@ nfs4_check_open_reclaim(clientid_t *clid)
  */
 
 static void
-__nfs4_state_init(void)
+__nfs4_state_start(void)
 {
 	int i;
 	time_t grace_time;
@@ -3235,7 +3235,7 @@ __nfs4_state_init(void)
 }
 
 int
-nfs4_state_init(void)
+nfs4_state_start(void)
 {
 	int status;
 
@@ -3244,7 +3244,7 @@ nfs4_state_init(void)
 	status = nfsd4_init_slabs();
 	if (status)
 		return status;
-	__nfs4_state_init();
+	__nfs4_state_start();
 	nfs4_init = 1;
 	return 0;
 }
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 904df604e8..07b9a065e9 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -95,7 +95,7 @@ nfsd_svc(unsigned short port, int nrservs)
 	error =	nfsd_racache_init(2*nrservs);
 	if (error<0)
 		goto out;
-	error = nfs4_state_init();
+	error = nfs4_state_start();
 	if (error<0)
 		goto out;
 	if (!nfsd_serv) {
-- 
cgit v1.2.2


From ac4d8ff2a57179de3ef7834c6ab3fac430b0a05d Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:30 -0700
Subject: [PATCH] knfsd: nfsd4: clean up state initialization

Separate out stuff that needs initialization on startup from stuff that only
needs initialization on module init from static data.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 35 +++++++++++++++++------------------
 fs/nfsd/nfsctl.c    |  1 +
 2 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e00b347285..1f68ce36e7 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -117,7 +117,7 @@ static void release_stateid(struct nfs4_stateid *stp, int flags);
  */
 
 /* recall_lock protects the del_recall_lru */
-spinlock_t recall_lock;
+spinlock_t recall_lock = SPIN_LOCK_UNLOCKED;
 static struct list_head del_recall_lru;
 
 static void
@@ -3179,23 +3179,13 @@ nfs4_check_open_reclaim(clientid_t *clid)
 	return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
 }
 
+/* initialization to perform at module load time: */
 
-/* 
- * Start and stop routines
- */
-
-static void
-__nfs4_state_start(void)
+void
+nfs4_state_init(void)
 {
 	int i;
-	time_t grace_time;
 
-	if (!nfs4_reclaim_init) {
-		for (i = 0; i < CLIENT_HASH_SIZE; i++)
-			INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
-		reclaim_str_hashtbl_size = 0;
-		nfs4_reclaim_init = 1;
-	}
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		INIT_LIST_HEAD(&conf_id_hashtbl[i]);
 		INIT_LIST_HEAD(&conf_str_hashtbl[i]);
@@ -3217,19 +3207,28 @@ __nfs4_state_start(void)
 		INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]);
 		INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
 	}
-	memset(&zerostateid, 0, sizeof(stateid_t));
 	memset(&onestateid, ~0, sizeof(stateid_t));
-
 	INIT_LIST_HEAD(&close_lru);
 	INIT_LIST_HEAD(&client_lru);
 	INIT_LIST_HEAD(&del_recall_lru);
-	spin_lock_init(&recall_lock);
+	for (i = 0; i < CLIENT_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
+	reclaim_str_hashtbl_size = 0;
+	nfs4_reclaim_init = 1;
+}
+
+/* initialization to perform when the nfsd service is started: */
+
+static void
+__nfs4_state_start(void)
+{
+	time_t grace_time;
+
 	boot_time = get_seconds();
 	grace_time = max(user_lease_time, lease_time);
 	lease_time = user_lease_time;
 	printk("NFSD: starting %ld-second grace period\n", grace_time);
 	grace_end = boot_time + grace_time;
-	INIT_WORK(&laundromat_work,laundromat_main, NULL);
 	laundry_wq = create_singlethread_workqueue("nfsd4");
 	queue_delayed_work(laundry_wq, &laundromat_work, NFSD_LEASE_TIME*HZ);
 }
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 161afdcb8f..3d56531a7a 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -397,6 +397,7 @@ static int __init init_nfsd(void)
 	nfsd_cache_init();	/* RPC reply cache */
 	nfsd_export_init();	/* Exports table */
 	nfsd_lockd_init();	/* lockd->nfsd callbacks */
+	nfs4_state_init();	/* NFSv4 locking state */
 #ifdef CONFIG_NFSD_V4
 	nfsd_idmap_init();      /* Name to ID mapping */
 #endif /* CONFIG_NFSD_V4 */
-- 
cgit v1.2.2


From 707d4ab7b3aa6d1f7a7d2cd123fb83ba9a528205 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:32 -0700
Subject: [PATCH] knfsd: nfsd4: remove nfs4_reclaim_init

nfs4_reclaim_init is no longer performing any useful function.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1f68ce36e7..fb9b4eb430 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -54,7 +54,6 @@
 /* Globals */
 static time_t lease_time = 90;     /* default lease time */
 static time_t user_lease_time = 90;
-static u32 nfs4_reclaim_init = 0;
 time_t boot_time;
 static time_t grace_end = 0;
 static u32 current_clientid = 1;
@@ -3128,7 +3127,6 @@ nfs4_release_reclaim(void)
 	struct nfs4_client_reclaim *crp = NULL;
 	int i;
 
-	BUG_ON(!nfs4_reclaim_init);
 	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
 		while (!list_empty(&reclaim_str_hashtbl[i])) {
 			crp = list_entry(reclaim_str_hashtbl[i].next,
@@ -3214,7 +3212,6 @@ nfs4_state_init(void)
 	for (i = 0; i < CLIENT_HASH_SIZE; i++)
 		INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
 	reclaim_str_hashtbl_size = 0;
-	nfs4_reclaim_init = 1;
 }
 
 /* initialization to perform when the nfsd service is started: */
-- 
cgit v1.2.2


From bd0b1e954e3ba3e5d2cab941458cf98206471bd2 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:35 -0700
Subject: [PATCH] knfsd: nfsd4: idmap initialization

Adopt standard kernel style by defining a no-op function instead of putting
ifdef's in the code where the function is called.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfsctl.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3d56531a7a..3da43a3ed3 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -398,9 +398,7 @@ static int __init init_nfsd(void)
 	nfsd_export_init();	/* Exports table */
 	nfsd_lockd_init();	/* lockd->nfsd callbacks */
 	nfs4_state_init();	/* NFSv4 locking state */
-#ifdef CONFIG_NFSD_V4
 	nfsd_idmap_init();      /* Name to ID mapping */
-#endif /* CONFIG_NFSD_V4 */
 	if (proc_mkdir("fs/nfs", NULL)) {
 		struct proc_dir_entry *entry;
 		entry = create_proc_entry("fs/nfs/exports", 0, NULL);
@@ -427,9 +425,7 @@ static void __exit exit_nfsd(void)
 	remove_proc_entry("fs/nfs", NULL);
 	nfsd_stat_shutdown();
 	nfsd_lockd_shutdown();
-#ifdef CONFIG_NFSD_V4
 	nfsd_idmap_shutdown();
-#endif /* CONFIG_NFSD_V4 */
 	unregister_filesystem(&nfsd_fs_type);
 }
 
-- 
cgit v1.2.2


From 7dea9d280c96f90382ec5d5709433e66a0993ec9 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:42 -0700
Subject: [PATCH] knfsd: nfsd4: setclientid simplification

We can be a little more concise here.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fb9b4eb430..2a59d176e6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -674,23 +674,13 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		 * or different ip_address
 		 */
 		status = nfserr_clid_inuse;
-		if (!cmp_creds(&clp->cl_cred,&rqstp->rq_cred)) {
+		if (!cmp_creds(&clp->cl_cred,&rqstp->rq_cred)
+				|| clp->cl_addr != ip_addr) {
 			printk("NFSD: setclientid: string in use by client"
 			"(clientid %08x/%08x)\n",
 			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
 			goto out;
 		}
-		if (clp->cl_addr != ip_addr) { 
-			printk("NFSD: setclientid: string in use by client"
-			"(clientid %08x/%08x)\n",
-			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
-			goto out;
-		}
-
-		/* 
-	 	 * cl_name match from a previous SETCLIENTID operation
-	 	 * XXX check for additional matches?
-		 */
 		conf = clp;
 		break;
 	}
-- 
cgit v1.2.2


From a55370a3c0106106a975c5a09cee800611d0cf50 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:52 -0700
Subject: [PATCH] knfsd: nfsd4: reboot hash

For the purposes of reboot recovery we keep a directory with subdirectories
each having a name that is the ascii hex representation of the md5 sum of a
client identifier for an active client.

This adds the code to calculate that name.  We also use it for the purposes of
comparing clients, so if someone ever manages to find two client names that
are md5 collisions, then we'll return clid_inuse to the second.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Kconfig            |  2 ++
 fs/nfsd/Makefile      |  2 +-
 fs/nfsd/nfs4recover.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs4state.c   | 80 ++++++++++++++++++++----------------------
 4 files changed, 138 insertions(+), 43 deletions(-)
 create mode 100644 fs/nfsd/nfs4recover.c

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index a7c0cc3203..5c704d0562 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1413,6 +1413,8 @@ config NFSD_V4
 	bool "Provide NFSv4 server support (EXPERIMENTAL)"
 	depends on NFSD_V3 && EXPERIMENTAL
 	select NFSD_TCP
+	select CRYPTO_MD5
+	select CRYPTO
 	help
 	  If you would like to include the NFSv4 server as well as the NFSv2
 	  and NFSv3 servers, say Y here.  This feature is experimental, and
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 9f043f44c9..ce341dc76d 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -10,5 +10,5 @@ nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
 nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs3xdr.o
 nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
 nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
-			   nfs4acl.o nfs4callback.o
+			   nfs4acl.o nfs4callback.o nfs4recover.o
 nfsd-objs		:= $(nfsd-y)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
new file mode 100644
index 0000000000..841a305d79
--- /dev/null
+++ b/fs/nfsd/nfs4recover.c
@@ -0,0 +1,97 @@
+/*
+*  linux/fs/nfsd/nfs4recover.c
+*
+*  Copyright (c) 2004 The Regents of the University of Michigan.
+*  All rights reserved.
+*
+*  Andy Adamson <andros@citi.umich.edu>
+*
+*  Redistribution and use in source and binary forms, with or without
+*  modification, are permitted provided that the following conditions
+*  are met:
+*
+*  1. Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*  2. Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in the
+*     documentation and/or other materials provided with the distribution.
+*  3. Neither the name of the University nor the names of its
+*     contributors may be used to endorse or promote products derived
+*     from this software without specific prior written permission.
+*
+*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfs4.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/xdr4.h>
+#include <asm/uaccess.h>
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+
+
+#define NFSDDBG_FACILITY                NFSDDBG_PROC
+
+static void
+md5_to_hex(char *out, char *md5)
+{
+	int i;
+
+	for (i=0; i<16; i++) {
+		unsigned char c = md5[i];
+
+		*out++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
+		*out++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
+	}
+	*out = '\0';
+}
+
+int
+nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
+{
+	struct xdr_netobj cksum;
+	struct crypto_tfm *tfm;
+	struct scatterlist sg[1];
+	int status = nfserr_resource;
+
+	dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
+			clname->len, clname->data);
+	tfm = crypto_alloc_tfm("md5", 0);
+	if (tfm == NULL)
+		goto out;
+	cksum.len = crypto_tfm_alg_digestsize(tfm);
+	cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+	if (cksum.data == NULL)
+ 		goto out;
+	crypto_digest_init(tfm);
+
+	sg[0].page = virt_to_page(clname->data);
+	sg[0].offset = offset_in_page(clname->data);
+	sg[0].length = clname->len;
+
+	crypto_digest_update(tfm, sg, 1);
+	crypto_digest_final(tfm, cksum.data);
+
+	md5_to_hex(dname, cksum.data);
+
+	kfree(cksum.data);
+	status = nfs_ok;
+out:
+	if (tfm)
+		crypto_free_tfm(tfm);
+	return status;
+}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2a59d176e6..0be0b37c84 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -231,8 +231,8 @@ unhash_delegation(struct nfs4_delegation *dp)
 
 #define clientid_hashval(id) \
 	((id) & CLIENT_HASH_MASK)
-#define clientstr_hashval(name, namelen) \
-	(opaque_hashval((name), (namelen)) & CLIENT_HASH_MASK)
+#define clientstr_hashval(name) \
+	(opaque_hashval((name), 8) & CLIENT_HASH_MASK)
 /*
  * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
  * used in reboot/reset lease grace period processing
@@ -366,11 +366,12 @@ expire_client(struct nfs4_client *clp)
 }
 
 static struct nfs4_client *
-create_client(struct xdr_netobj name) {
+create_client(struct xdr_netobj name, char *recdir) {
 	struct nfs4_client *clp;
 
 	if (!(clp = alloc_client(name)))
 		goto out;
+	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
 	atomic_set(&clp->cl_count, 1);
 	atomic_set(&clp->cl_callback.cb_set, 0);
 	clp->cl_callback.cb_parsed = 0;
@@ -403,11 +404,9 @@ copy_cred(struct svc_cred *target, struct svc_cred *source) {
 	get_group_info(target->cr_group_info);
 }
 
-static int
-cmp_name(struct xdr_netobj *n1, struct xdr_netobj *n2) {
-	if (!n1 || !n2)
-		return 0;
-	return((n1->len == n2->len) && !memcmp(n1->data, n2->data, n2->len));
+static inline int
+same_name(const char *n1, const char *n2) {
+	return 0 == memcmp(n1, n2, HEXDIR_LEN);
 }
 
 static int
@@ -479,8 +478,7 @@ move_to_confirmed(struct nfs4_client *clp)
 	list_del_init(&clp->cl_strhash);
 	list_del_init(&clp->cl_idhash);
 	list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
-	strhashval = clientstr_hashval(clp->cl_name.data, 
-			clp->cl_name.len);
+	strhashval = clientstr_hashval(clp->cl_recdir);
 	list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
 	renew_client(clp);
 }
@@ -651,22 +649,27 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 	unsigned int 		strhashval;
 	struct nfs4_client *	conf, * unconf, * new, * clp;
 	int 			status;
+	char                    dname[HEXDIR_LEN];
 	
 	status = nfserr_inval;
 	if (!check_name(clname))
 		goto out;
 
+	status = nfs4_make_rec_clidname(dname, &clname);
+	if (status)
+		goto out;
+
 	/* 
 	 * XXX The Duplicate Request Cache (DRC) has been checked (??)
 	 * We get here on a DRC miss.
 	 */
 
-	strhashval = clientstr_hashval(clname.data, clname.len);
+	strhashval = clientstr_hashval(dname);
 
 	conf = NULL;
 	nfs4_lock_state();
 	list_for_each_entry(clp, &conf_str_hashtbl[strhashval], cl_strhash) {
-		if (!cmp_name(&clp->cl_name, &clname))
+		if (!same_name(clp->cl_recdir, dname))
 			continue;
 		/* 
 		 * CASE 0:
@@ -686,7 +689,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 	}
 	unconf = NULL;
 	list_for_each_entry(clp, &unconf_str_hashtbl[strhashval], cl_strhash) {
-		if (!cmp_name(&clp->cl_name, &clname))
+		if (!same_name(clp->cl_recdir, dname))
 			continue;
 		/* cl_name match from a previous SETCLIENTID operation */
 		unconf = clp;
@@ -700,7 +703,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		 */
 		if (unconf)
 			expire_client(unconf);
-		if (!(new = create_client(clname)))
+		new = create_client(clname, dname);
+		if (new == NULL)
 			goto out;
 		copy_verf(new, &clverifier);
 		new->cl_addr = ip_addr;
@@ -728,7 +732,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		     cmp_clid(&unconf->cl_clientid, &conf->cl_clientid)) {
 				expire_client(unconf);
 		}
-		if (!(new = create_client(clname)))
+		new = create_client(clname, dname);
+		if (new == NULL)
 			goto out;
 		copy_verf(new,&conf->cl_verifier);
 		new->cl_addr = ip_addr;
@@ -746,7 +751,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		 * using input clverifier, clname, and callback info
 		 * and generate a new cl_clientid and cl_confirm.
 		 */
-		if (!(new = create_client(clname)))
+		new = create_client(clname, dname);
+		if (new == NULL)
 			goto out;
 		copy_verf(new,&clverifier);
 		new->cl_addr = ip_addr;
@@ -772,7 +778,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		 * new cl_verifier and a new cl_confirm
 		 */
 		expire_client(unconf);
-		if (!(new = create_client(clname)))
+		new = create_client(clname, dname);
+		if (new == NULL)
 			goto out;
 		copy_verf(new,&clverifier);
 		new->cl_addr = ip_addr;
@@ -856,7 +863,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 	if ((conf && unconf) && 
 	    (cmp_verf(&unconf->cl_confirm, &confirm)) &&
 	    (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
-	    (cmp_name(&conf->cl_name,&unconf->cl_name))  &&
+	    (same_name(conf->cl_recdir,unconf->cl_recdir))  &&
 	    (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm))) {
 		if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) 
 			status = nfserr_clid_inuse;
@@ -876,7 +883,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 	if ((conf && !unconf) || 
 	    ((conf && unconf) && 
 	     (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
-	      !cmp_name(&conf->cl_name, &unconf->cl_name)))) {
+	      !same_name(conf->cl_recdir, unconf->cl_recdir)))) {
 		if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) {
 			status = nfserr_clid_inuse;
 		} else {
@@ -3074,39 +3081,28 @@ out:
 }
 
 static inline struct nfs4_client_reclaim *
-alloc_reclaim(int namelen)
+alloc_reclaim(void)
 {
-	struct nfs4_client_reclaim *crp = NULL;
-
-	crp = kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
-	if (!crp)
-		return NULL;
-	crp->cr_name.data = kmalloc(namelen, GFP_KERNEL);
-	if (!crp->cr_name.data) {
-		kfree(crp);
-		return NULL;
-	}
-	return crp;
+	return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
 }
 
 /*
  * failure => all reset bets are off, nfserr_no_grace...
  */
 static int
-nfs4_client_to_reclaim(char *name, int namlen)
+nfs4_client_to_reclaim(char *name)
 {
 	unsigned int strhashval;
 	struct nfs4_client_reclaim *crp = NULL;
 
-	dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", namlen, name);
-	crp = alloc_reclaim(namlen);
+	dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
+	crp = alloc_reclaim();
 	if (!crp)
 		return 0;
-	strhashval = clientstr_hashval(name, namlen);
+	strhashval = clientstr_hashval(name);
 	INIT_LIST_HEAD(&crp->cr_strhash);
 	list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
-	memcpy(crp->cr_name.data, name, namlen);
-	crp->cr_name.len = namlen;
+	memcpy(crp->cr_recdir, name, HEXDIR_LEN);
 	reclaim_str_hashtbl_size++;
 	return 1;
 }
@@ -3122,7 +3118,6 @@ nfs4_release_reclaim(void)
 			crp = list_entry(reclaim_str_hashtbl[i].next,
 			                struct nfs4_client_reclaim, cr_strhash);
 			list_del(&crp->cr_strhash);
-			kfree(crp->cr_name.data);
 			kfree(crp);
 			reclaim_str_hashtbl_size--;
 		}
@@ -3145,13 +3140,14 @@ nfs4_find_reclaim_client(clientid_t *clid)
 	if (clp == NULL)
 		return NULL;
 
-	dprintk("NFSD: nfs4_find_reclaim_client for %.*s\n",
-		            clp->cl_name.len, clp->cl_name.data);
+	dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n",
+		            clp->cl_name.len, clp->cl_name.data,
+			    clp->cl_recdir);
 
 	/* find clp->cl_name in reclaim_str_hashtbl */
-	strhashval = clientstr_hashval(clp->cl_name.data, clp->cl_name.len);
+	strhashval = clientstr_hashval(clp->cl_recdir);
 	list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) {
-		if (cmp_name(&crp->cr_name, &clp->cl_name)) {
+		if (same_name(crp->cr_recdir, clp->cl_recdir)) {
 			return crp;
 		}
 	}
-- 
cgit v1.2.2


From 28ce6054f1de89834e5e9522f9e451cd88a35f85 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:03:56 -0700
Subject: [PATCH] knfsd: nfsd4: add find_{un}conf_by_str functions to simplify
 setclientid

Minor setclientid cleanup

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 49 +++++++++++++++++++++++++++++++------------------
 1 file changed, 31 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0be0b37c84..72f2b6fdae 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -509,6 +509,30 @@ find_unconfirmed_client(clientid_t *clid)
 	return NULL;
 }
 
+static struct nfs4_client *
+find_confirmed_client_by_str(const char *dname, unsigned int hashval)
+{
+	struct nfs4_client *clp;
+
+	list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) {
+		if (same_name(clp->cl_recdir, dname))
+			return clp;
+	}
+	return NULL;
+}
+
+static struct nfs4_client *
+find_unconfirmed_client_by_str(const char *dname, unsigned int hashval)
+{
+	struct nfs4_client *clp;
+
+	list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) {
+		if (same_name(clp->cl_recdir, dname))
+			return clp;
+	}
+	return NULL;
+}
+
 /* a helper function for parse_callback */
 static int
 parse_octet(unsigned int *lenp, char **addrp)
@@ -647,7 +671,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 	};
 	nfs4_verifier		clverifier = setclid->se_verf;
 	unsigned int 		strhashval;
-	struct nfs4_client *	conf, * unconf, * new, * clp;
+	struct nfs4_client	*conf, *unconf, *new;
 	int 			status;
 	char                    dname[HEXDIR_LEN];
 	
@@ -666,35 +690,24 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 
 	strhashval = clientstr_hashval(dname);
 
-	conf = NULL;
 	nfs4_lock_state();
-	list_for_each_entry(clp, &conf_str_hashtbl[strhashval], cl_strhash) {
-		if (!same_name(clp->cl_recdir, dname))
-			continue;
+	conf = find_confirmed_client_by_str(dname, strhashval);
+	if (conf) {
 		/* 
 		 * CASE 0:
 		 * clname match, confirmed, different principal
 		 * or different ip_address
 		 */
 		status = nfserr_clid_inuse;
-		if (!cmp_creds(&clp->cl_cred,&rqstp->rq_cred)
-				|| clp->cl_addr != ip_addr) {
+		if (!cmp_creds(&conf->cl_cred, &rqstp->rq_cred)
+				|| conf->cl_addr != ip_addr) {
 			printk("NFSD: setclientid: string in use by client"
 			"(clientid %08x/%08x)\n",
-			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+			conf->cl_clientid.cl_boot, conf->cl_clientid.cl_id);
 			goto out;
 		}
-		conf = clp;
-		break;
-	}
-	unconf = NULL;
-	list_for_each_entry(clp, &unconf_str_hashtbl[strhashval], cl_strhash) {
-		if (!same_name(clp->cl_recdir, dname))
-			continue;
-		/* cl_name match from a previous SETCLIENTID operation */
-		unconf = clp;
-		break;
 	}
+	unconf = find_unconfirmed_client_by_str(dname, strhashval);
 	status = nfserr_resource;
 	if (!conf) {
 		/* 
-- 
cgit v1.2.2


From a76b4319ca85b5e3a8098470c623a272d40271cd Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:01 -0700
Subject: [PATCH] knfsd: nfsd4: grace period end

For the purposes of reboot recovery, we want to do some work during the
transition period at the end of the grace period.  Some of that work must be
guaranteed to have a certain relationship with the end of the grace period, so
we want to control the transition there.

Our approach is to modify the in_grace() checks to consult a global variable
instead of checking the time directly, to schedule the first run of the
laundromat thread at the end of the grace period, and to set the global
end-of-grace-period there.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 72f2b6fdae..1b2f67f5ee 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -55,7 +55,7 @@
 static time_t lease_time = 90;     /* default lease time */
 static time_t user_lease_time = 90;
 time_t boot_time;
-static time_t grace_end = 0;
+static int in_grace = 1;
 static u32 current_clientid = 1;
 static u32 current_ownerid = 1;
 static u32 current_fileid = 1;
@@ -1908,6 +1908,13 @@ out:
 	return status;
 }
 
+static void
+end_grace(void)
+{
+	dprintk("NFSD: end of grace period\n");
+	in_grace = 0;
+}
+
 time_t
 nfs4_laundromat(void)
 {
@@ -1922,6 +1929,8 @@ nfs4_laundromat(void)
 	nfs4_lock_state();
 
 	dprintk("NFSD: laundromat service - starting\n");
+	if (in_grace)
+		end_grace();
 	list_for_each_safe(pos, next, &client_lru) {
 		clp = list_entry(pos, struct nfs4_client, cl_lru);
 		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
@@ -3223,10 +3232,10 @@ __nfs4_state_start(void)
 	boot_time = get_seconds();
 	grace_time = max(user_lease_time, lease_time);
 	lease_time = user_lease_time;
+	in_grace = 1;
 	printk("NFSD: starting %ld-second grace period\n", grace_time);
-	grace_end = boot_time + grace_time;
 	laundry_wq = create_singlethread_workqueue("nfsd4");
-	queue_delayed_work(laundry_wq, &laundromat_work, NFSD_LEASE_TIME*HZ);
+	queue_delayed_work(laundry_wq, &laundromat_work, grace_time*HZ);
 }
 
 int
@@ -3247,14 +3256,7 @@ nfs4_state_start(void)
 int
 nfs4_in_grace(void)
 {
-	return get_seconds() < grace_end;
-}
-
-void
-set_no_grace(void)
-{
-	printk("NFSD: ERROR in reboot recovery.  State reclaims will fail.\n");
-	grace_end = get_seconds();
+	return in_grace;
 }
 
 time_t
-- 
cgit v1.2.2


From fd39ca9a808c6026989bc2188868a0574eb37108 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:03 -0700
Subject: [PATCH] knfsd: nfsd4: make needlessly global code static

This patch contains the following possible cleanups:

- make needlessly global code static

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4acl.c      |  4 ++--
 fs/nfsd/nfs4callback.c |  7 +++----
 fs/nfsd/nfs4idmap.c    | 12 +++++------
 fs/nfsd/nfs4state.c    | 57 ++++++++++++++++++++++++++------------------------
 fs/nfsd/nfs4xdr.c      |  4 ++--
 5 files changed, 43 insertions(+), 41 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 11ebf6c4aa..4a2105552a 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -125,7 +125,7 @@ static short ace2type(struct nfs4_ace *);
 static int _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int);
 static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl *, unsigned int);
 int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t);
-int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *);
+static int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *);
 
 struct nfs4_acl *
 nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl,
@@ -775,7 +775,7 @@ out_err:
 	return pacl;
 }
 
-int
+static int
 nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
 {
 	struct list_head *h, *n;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 38c3e1c47d..68bb245491 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -54,7 +54,6 @@
 
 /* declarations */
 static void nfs4_cb_null(struct rpc_task *task);
-extern spinlock_t recall_lock;
 
 /* Index of predefined Linux callback client operations */
 
@@ -329,12 +328,12 @@ out:
         .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,  \
 }
 
-struct rpc_procinfo     nfs4_cb_procedures[] = {
+static struct rpc_procinfo     nfs4_cb_procedures[] = {
     PROC(CB_NULL,      NULL,     enc_cb_null,     dec_cb_null),
     PROC(CB_RECALL,    COMPOUND,   enc_cb_recall,      dec_cb_recall),
 };
 
-struct rpc_version              nfs_cb_version4 = {
+static struct rpc_version       nfs_cb_version4 = {
         .number                 = 1,
         .nrprocs                = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]),
         .procs                  = nfs4_cb_procedures
@@ -348,7 +347,7 @@ static struct rpc_version *	nfs_cb_version[] = {
 /*
  * Use the SETCLIENTID credential
  */
-struct rpc_cred *
+static struct rpc_cred *
 nfsd4_lookupcred(struct nfs4_client *clp, int taskflags)
 {
         struct auth_cred acred;
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 4ba540841c..5605a26efc 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -104,7 +104,7 @@ ent_update(struct ent *new, struct ent *itm)
 	ent_init(new, itm);
 }
 
-void
+static void
 ent_put(struct cache_head *ch, struct cache_detail *cd)
 {
 	if (cache_put(ch, cd)) {
@@ -186,7 +186,7 @@ warn_no_idmapd(struct cache_detail *detail)
 static int         idtoname_parse(struct cache_detail *, char *, int);
 static struct ent *idtoname_lookup(struct ent *, int);
 
-struct cache_detail idtoname_cache = {
+static struct cache_detail idtoname_cache = {
 	.hash_size	= ENT_HASHMAX,
 	.hash_table	= idtoname_table,
 	.name		= "nfs4.idtoname",
@@ -277,7 +277,7 @@ nametoid_hash(struct ent *ent)
 	return hash_str(ent->name, ENT_HASHBITS);
 }
 
-void
+static void
 nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
     int *blen)
 {
@@ -317,9 +317,9 @@ nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
 }
 
 static struct ent *nametoid_lookup(struct ent *, int);
-int                nametoid_parse(struct cache_detail *, char *, int);
+static int         nametoid_parse(struct cache_detail *, char *, int);
 
-struct cache_detail nametoid_cache = {
+static struct cache_detail nametoid_cache = {
 	.hash_size	= ENT_HASHMAX,
 	.hash_table	= nametoid_table,
 	.name		= "nfs4.nametoid",
@@ -330,7 +330,7 @@ struct cache_detail nametoid_cache = {
 	.warn_no_listener = warn_no_idmapd,
 };
 
-int
+static int
 nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
 {
 	struct ent ent, *res;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1b2f67f5ee..8a5f777b1e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -54,18 +54,21 @@
 /* Globals */
 static time_t lease_time = 90;     /* default lease time */
 static time_t user_lease_time = 90;
-time_t boot_time;
+static time_t boot_time;
 static int in_grace = 1;
 static u32 current_clientid = 1;
 static u32 current_ownerid = 1;
 static u32 current_fileid = 1;
 static u32 current_delegid = 1;
 static u32 nfs4_init;
-stateid_t zerostateid;             /* bits all 0 */
-stateid_t onestateid;              /* bits all 1 */
+static stateid_t zerostateid;             /* bits all 0 */
+static stateid_t onestateid;              /* bits all 1 */
+
+#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
+#define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
 
 /* forward declarations */
-struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
+static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
 static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
 static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
 
@@ -77,10 +80,10 @@ static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
  */
 static DECLARE_MUTEX(client_sema);
 
-kmem_cache_t *stateowner_slab = NULL;
-kmem_cache_t *file_slab = NULL;
-kmem_cache_t *stateid_slab = NULL;
-kmem_cache_t *deleg_slab = NULL;
+static kmem_cache_t *stateowner_slab = NULL;
+static kmem_cache_t *file_slab = NULL;
+static kmem_cache_t *stateid_slab = NULL;
+static kmem_cache_t *deleg_slab = NULL;
 
 void
 nfs4_lock_state(void)
@@ -116,7 +119,7 @@ static void release_stateid(struct nfs4_stateid *stp, int flags);
  */
 
 /* recall_lock protects the del_recall_lru */
-spinlock_t recall_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t recall_lock = SPIN_LOCK_UNLOCKED;
 static struct list_head del_recall_lru;
 
 static void
@@ -456,7 +459,7 @@ check_name(struct xdr_netobj name) {
 	return 1;
 }
 
-void
+static void
 add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
 {
 	unsigned int idhashval;
@@ -468,7 +471,7 @@ add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
 	clp->cl_time = get_seconds();
 }
 
-void
+static void
 move_to_confirmed(struct nfs4_client *clp)
 {
 	unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
@@ -567,7 +570,7 @@ parse_octet(unsigned int *lenp, char **addrp)
 }
 
 /* parse and set the setclientid ipv4 callback address */
-int
+static int
 parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
 {
 	int temp = 0;
@@ -603,7 +606,7 @@ parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigne
 	return 1;
 }
 
-void
+static void
 gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 {
 	struct nfs4_callback *cb = &clp->cl_callback;
@@ -1186,7 +1189,7 @@ release_stateid(struct nfs4_stateid *stp, int flags)
 	stp = NULL;
 }
 
-void
+static void
 move_to_close_lru(struct nfs4_stateowner *sop)
 {
 	dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
@@ -1196,7 +1199,7 @@ move_to_close_lru(struct nfs4_stateowner *sop)
 	sop->so_time = get_seconds();
 }
 
-void
+static void
 release_state_owner(struct nfs4_stateid *stp, int flag)
 {
 	struct nfs4_stateowner *sop = stp->st_stateowner;
@@ -1250,7 +1253,7 @@ find_file(struct inode *ino)
 #define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0)
 #define TEST_DENY(x) ((x >= 0 || x < 5)?1:0)
 
-void
+static void
 set_access(unsigned int *access, unsigned long bmap) {
 	int i;
 
@@ -1261,7 +1264,7 @@ set_access(unsigned int *access, unsigned long bmap) {
 	}
 }
 
-void
+static void
 set_deny(unsigned int *deny, unsigned long bmap) {
 	int i;
 
@@ -1287,7 +1290,7 @@ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
  * Called to check deny when READ with all zero stateid or
  * WRITE with all zero or all one stateid
  */
-int
+static int
 nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
 {
 	struct inode *ino = current_fh->fh_dentry->d_inode;
@@ -1442,7 +1445,7 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
 		return -EAGAIN;
 }
 
-struct lock_manager_operations nfsd_lease_mng_ops = {
+static struct lock_manager_operations nfsd_lease_mng_ops = {
 	.fl_break = nfsd_break_deleg_cb,
 	.fl_release_private = nfsd_release_deleg_cb,
 	.fl_copy_lock = nfsd_copy_lock_deleg_cb,
@@ -1915,7 +1918,7 @@ end_grace(void)
 	in_grace = 0;
 }
 
-time_t
+static time_t
 nfs4_laundromat(void)
 {
 	struct nfs4_client *clp;
@@ -1996,7 +1999,7 @@ laundromat_main(void *not_used)
 /* search ownerid_hashtbl[] and close_lru for stateid owner
  * (stateid->si_stateownerid)
  */
-struct nfs4_stateowner *
+static struct nfs4_stateowner *
 find_openstateowner_id(u32 st_id, int flags) {
 	struct nfs4_stateowner *local = NULL;
 
@@ -2170,7 +2173,7 @@ out:
 /* 
  * Checks for sequence id mutating operations. 
  */
-int
+static int
 nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid)
 {
 	int status;
@@ -2486,7 +2489,7 @@ static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE];
 static struct list_head	lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
 static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
 
-struct nfs4_stateid *
+static struct nfs4_stateid *
 find_stateid(stateid_t *stid, int flags)
 {
 	struct nfs4_stateid *local = NULL;
@@ -2550,7 +2553,7 @@ nfs4_transform_lock_offset(struct file_lock *lock)
 		lock->fl_end = OFFSET_MAX;
 }
 
-int
+static int
 nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval)
 {
 	struct nfs4_stateowner *local = NULL;
@@ -2660,7 +2663,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
 	return sop;
 }
 
-struct nfs4_stateid *
+static struct nfs4_stateid *
 alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp)
 {
 	struct nfs4_stateid *stp;
@@ -2691,7 +2694,7 @@ out:
 	return stp;
 }
 
-int
+static int
 check_lock_length(u64 offset, u64 length)
 {
 	return ((length == 0)  || ((length != ~(u64)0) &&
@@ -3149,7 +3152,7 @@ nfs4_release_reclaim(void)
 
 /*
  * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
-struct nfs4_client_reclaim *
+static struct nfs4_client_reclaim *
 nfs4_find_reclaim_client(clientid_t *clid)
 {
 	unsigned int strhashval;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index cfe978a72c..91fb171d2a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -136,7 +136,7 @@ xdr_error:					\
 	}					\
 } while (0)
 
-u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
+static u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
 {
 	/* We want more bytes than seem to be available.
 	 * Maybe we need a new page, maybe we have just run out
@@ -190,7 +190,7 @@ defer_free(struct nfsd4_compoundargs *argp,
 	return 0;
 }
 
-char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
+static char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
 {
 	void *new = NULL;
 	if (p == argp->tmp) {
-- 
cgit v1.2.2


From 31f4a6c127f619886bf97f643e546f7788248f3f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:06 -0700
Subject: [PATCH] knfsd: nfsd4: fix uncomfirmed list

Setclientid code assumes there is only one match in unconfirmed list.
Make sure that assumption holds.

From: Fred Isaman
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8a5f777b1e..67a038dc0d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -743,10 +743,13 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 		 * nfs4_client,  but with the new callback info and a 
 		 * new cl_confirm
 		 */
-		if ((unconf) && 
-		    cmp_verf(&unconf->cl_verifier, &conf->cl_verifier) &&
-		     cmp_clid(&unconf->cl_clientid, &conf->cl_clientid)) {
-				expire_client(unconf);
+		if (unconf) {
+			/* Note this is removing unconfirmed {*x***},
+			 * which is stronger than RFC recommended {vxc**}.
+			 * This has the advantage that there is at most
+			 * one {*x***} in either list at any time.
+			 */
+			expire_client(unconf);
 		}
 		new = create_client(clname, dname);
 		if (new == NULL)
-- 
cgit v1.2.2


From 1a69c179a28a9bb9f4d086927b192d5cffe88e50 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:08 -0700
Subject: [PATCH] knfsd: nfsd4: fix setclientid_confirm cases

Setclientid_confirm code confused states 1 and 3 (numbering from the
IMPLEMENTATION section of rfc3530, section 14.2.33).  Fix this.

State 1 allows the client to change the callback channel on the fly.  We don't
implement this currently, so just turn off the callback channel in this case.

From: Fred Isaman
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 67a038dc0d..997343c230 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -887,10 +887,14 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 		if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) 
 			status = nfserr_clid_inuse;
 		else {
-			expire_client(conf);
-			clp = unconf;
-			move_to_confirmed(unconf);
+			/* XXX: We just turn off callbacks until we can handle
+			  * change request correctly. */
+			clp = conf;
+			clp->cl_callback.cb_parsed = 0;
+			gen_confirm(clp);
+			expire_client(unconf);
 			status = nfs_ok;
+
 		}
 		goto out;
 	} 
@@ -920,9 +924,16 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 		if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
 			status = nfserr_clid_inuse;
 		} else {
-			status = nfs_ok;
+			unsigned int hash =
+				clientstr_hashval(unconf->cl_recdir);
+			conf = find_confirmed_client_by_str(unconf->cl_recdir,
+									hash);
+			if (conf) {
+				expire_client(conf);
+			}
 			clp = unconf;
 			move_to_confirmed(unconf);
+			status = nfs_ok;
 		}
 		goto out;
 	}
-- 
cgit v1.2.2


From 22de4d837439071a0bec897485d3911383b6ffad Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:09 -0700
Subject: [PATCH] knfsd: nfsd4: fix setclientid_confirm error return

NFS4_INVAL is not a valid error for setclientid_confirm, and INUSE is the more
logical error here anyway.

From: Fred Isaman
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 997343c230..2dc6da74cc 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -849,12 +849,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 	nfs4_lock_state();
 	clp = find_confirmed_client(clid);
 	if (clp) {
-		status = nfserr_inval;
-		/* 
-		 * Found a record for this clientid. If the IP addresses
-		 * don't match, return ERR_INVAL just as if the record had
-		 * not been found.
-		 */
+		status = nfserr_clid_inuse;
 		if (clp->cl_addr != ip_addr) { 
 			printk("NFSD: setclientid: string in use by client"
 			"(clientid %08x/%08x)\n",
@@ -865,7 +860,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 	}
 	clp = find_unconfirmed_client(clid);
 	if (clp) {
-		status = nfserr_inval;
+		status = nfserr_clid_inuse;
 		if (clp->cl_addr != ip_addr) { 
 			printk("NFSD: setclientid: string in use by client"
 			"(clientid %08x/%08x)\n",
@@ -949,7 +944,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 		goto out;
 	}
 	/* check that we have hit one of the cases...*/
-	status = nfserr_inval;
+	status = nfserr_clid_inuse;
 	goto out;
 out:
 	if (!status)
-- 
cgit v1.2.2


From 08e8987c37cd04d2df211c1e019d8f165d44266e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:11 -0700
Subject: [PATCH] knfsd: nfsd4: setclientid_confirm gotoectomy

Change from "goto" to "else if" format in setclientid_confirm.

From: Fred Isaman
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2dc6da74cc..9014dc2a63 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -891,14 +891,13 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 			status = nfs_ok;
 
 		}
-		goto out;
 	} 
 	/* CASE 2:
 	 * conf record that matches input clientid.
 	 * if unconf record that matches input clientid, then unconf->cl_name
 	 * or unconf->cl_verifier don't match the conf record.
 	 */
-	if ((conf && !unconf) || 
+	else if ((conf && !unconf) ||
 	    ((conf && unconf) && 
 	     (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
 	      !same_name(conf->cl_recdir, unconf->cl_recdir)))) {
@@ -908,14 +907,13 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 			clp = conf;
 			status = nfs_ok;
 		}
-		goto out;
 	}
 	/* CASE 3:
 	 * conf record not found.
 	 * unconf record found. 
 	 * unconf->cl_confirm matches input confirm
 	 */ 
-	if (!conf && unconf && cmp_verf(&unconf->cl_confirm, &confirm)) {
+	else if (!conf && unconf && cmp_verf(&unconf->cl_confirm, &confirm)) {
 		if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
 			status = nfserr_clid_inuse;
 		} else {
@@ -930,7 +928,6 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 			move_to_confirmed(unconf);
 			status = nfs_ok;
 		}
-		goto out;
 	}
 	/* CASE 4:
 	 * conf record not found, or if conf, then conf->cl_confirm does not
@@ -938,14 +935,14 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 	 * unconf record not found, or if unconf, then unconf->cl_confirm 
 	 * does not match input confirm.
 	 */
-	if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm))) &&
+	else if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm))) &&
 	    (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm, &confirm)))) {
 		status = nfserr_stale_clientid;
-		goto out;
 	}
-	/* check that we have hit one of the cases...*/
-	status = nfserr_clid_inuse;
-	goto out;
+	else {
+		/* check that we have hit one of the cases...*/
+		status = nfserr_clid_inuse;
+	}
 out:
 	if (!status)
 		nfsd4_probe_callback(clp);
-- 
cgit v1.2.2


From 7c79f7377cd4f2a50d51475f4c7966a3e60596a7 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:13 -0700
Subject: [PATCH] knfsd: nfsd4: setclientid_confirm comments

Trivial whitespace and comment fixes.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 59 ++++++++++++++++++++++++++---------------------------
 1 file changed, 29 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9014dc2a63..f100eeab54 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -869,16 +869,16 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 		}
 		unconf = clp;
 	}
-	/* CASE 1: 
-	* unconf record that matches input clientid and input confirm.
-	* conf record that matches input clientid.
-	* conf  and unconf records match names, verifiers 
-	*/
 	if ((conf && unconf) && 
 	    (cmp_verf(&unconf->cl_confirm, &confirm)) &&
 	    (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
 	    (same_name(conf->cl_recdir,unconf->cl_recdir))  &&
 	    (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm))) {
+		/* CASE 1:
+		* unconf record that matches input clientid and input confirm.
+		* conf record that matches input clientid.
+		* conf and unconf records match names, verifiers
+		*/
 		if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) 
 			status = nfserr_clid_inuse;
 		else {
@@ -891,29 +891,29 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 			status = nfs_ok;
 
 		}
-	} 
-	/* CASE 2:
-	 * conf record that matches input clientid.
-	 * if unconf record that matches input clientid, then unconf->cl_name
-	 * or unconf->cl_verifier don't match the conf record.
-	 */
-	else if ((conf && !unconf) ||
+	} else if ((conf && !unconf) ||
 	    ((conf && unconf) && 
 	     (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
 	      !same_name(conf->cl_recdir, unconf->cl_recdir)))) {
+		/* CASE 2:
+		 * conf record that matches input clientid.
+		 * if unconf record matches input clientid, then
+		 * unconf->cl_name or unconf->cl_verifier don't match the
+		 * conf record.
+		 */
 		if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) {
 			status = nfserr_clid_inuse;
 		} else {
 			clp = conf;
 			status = nfs_ok;
 		}
-	}
-	/* CASE 3:
-	 * conf record not found.
-	 * unconf record found. 
-	 * unconf->cl_confirm matches input confirm
-	 */ 
-	else if (!conf && unconf && cmp_verf(&unconf->cl_confirm, &confirm)) {
+	} else if (!conf && unconf
+			&& cmp_verf(&unconf->cl_confirm, &confirm)) {
+		/* CASE 3:
+		 * conf record not found.
+		 * unconf record found.
+		 * unconf->cl_confirm matches input confirm
+		 */
 		if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
 			status = nfserr_clid_inuse;
 		} else {
@@ -928,18 +928,17 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 			move_to_confirmed(unconf);
 			status = nfs_ok;
 		}
-	}
-	/* CASE 4:
-	 * conf record not found, or if conf, then conf->cl_confirm does not
-	 * match input confirm.
-	 * unconf record not found, or if unconf, then unconf->cl_confirm 
-	 * does not match input confirm.
-	 */
-	else if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm))) &&
-	    (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm, &confirm)))) {
+	} else if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm)))
+	    && (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm,
+				    				&confirm)))) {
+		/* CASE 4:
+		 * conf record not found, or if conf, conf->cl_confirm does not
+		 * match input confirm.
+		 * unconf record not found, or if unconf, unconf->cl_confirm
+		 * does not match input confirm.
+		 */
 		status = nfserr_stale_clientid;
-	}
-	else {
+	} else {
 		/* check that we have hit one of the cases...*/
 		status = nfserr_clid_inuse;
 	}
-- 
cgit v1.2.2


From 21ab45a480ec7705d177e959ebf452d62340c004 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:14 -0700
Subject: [PATCH] knfsd: nfsd4: miscellaneous setclientid_confirm cleanup

Minor cleanup, remove some unnecessary printk's.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 49 +++++++++++++++++--------------------------------
 1 file changed, 17 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f100eeab54..8841151981 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -834,7 +834,7 @@ int
 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm)
 {
 	u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
-	struct nfs4_client *clp, *conf = NULL, *unconf = NULL;
+	struct nfs4_client *conf, *unconf;
 	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
 	clientid_t * clid = &setclientid_confirm->sc_clientid;
 	int status;
@@ -847,28 +847,16 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 	 */
 
 	nfs4_lock_state();
-	clp = find_confirmed_client(clid);
-	if (clp) {
-		status = nfserr_clid_inuse;
-		if (clp->cl_addr != ip_addr) { 
-			printk("NFSD: setclientid: string in use by client"
-			"(clientid %08x/%08x)\n",
-			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
-			goto out;
-		}
-		conf = clp;
-	}
-	clp = find_unconfirmed_client(clid);
-	if (clp) {
-		status = nfserr_clid_inuse;
-		if (clp->cl_addr != ip_addr) { 
-			printk("NFSD: setclientid: string in use by client"
-			"(clientid %08x/%08x)\n",
-			clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
-			goto out;
-		}
-		unconf = clp;
-	}
+
+	conf = find_confirmed_client(clid);
+	unconf = find_unconfirmed_client(clid);
+
+	status = nfserr_clid_inuse;
+	if (conf && conf->cl_addr != ip_addr)
+		goto out;
+	if (unconf && unconf->cl_addr != ip_addr)
+		goto out;
+
 	if ((conf && unconf) && 
 	    (cmp_verf(&unconf->cl_confirm, &confirm)) &&
 	    (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
@@ -884,9 +872,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 		else {
 			/* XXX: We just turn off callbacks until we can handle
 			  * change request correctly. */
-			clp = conf;
-			clp->cl_callback.cb_parsed = 0;
-			gen_confirm(clp);
+			conf->cl_callback.cb_parsed = 0;
+			gen_confirm(conf);
 			expire_client(unconf);
 			status = nfs_ok;
 
@@ -901,12 +888,10 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 		 * unconf->cl_name or unconf->cl_verifier don't match the
 		 * conf record.
 		 */
-		if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) {
+		if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred))
 			status = nfserr_clid_inuse;
-		} else {
-			clp = conf;
+		else
 			status = nfs_ok;
-		}
 	} else if (!conf && unconf
 			&& cmp_verf(&unconf->cl_confirm, &confirm)) {
 		/* CASE 3:
@@ -924,8 +909,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 			if (conf) {
 				expire_client(conf);
 			}
-			clp = unconf;
 			move_to_confirmed(unconf);
+			conf = unconf;
 			status = nfs_ok;
 		}
 	} else if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm)))
@@ -944,7 +929,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 	}
 out:
 	if (!status)
-		nfsd4_probe_callback(clp);
+		nfsd4_probe_callback(conf);
 	nfs4_unlock_state();
 	return status;
 }
-- 
cgit v1.2.2


From ea1da636e956ad1591a74904f23d98bbc26a644b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:17 -0700
Subject: [PATCH] knfsd: nfsd4: rename state list fields

Trivial renaming patch:

I can never remember, while looking at various lists relating the nfsd4 state
structures, which are the "heads" and which are items on other lists, or which
structures are actually on the various lists.  The following convention helps
me: given structures foo and bar, with foo containing the head of a list of
bars, use "bars" for the name of the head of the list contained in the struct
foo, and use "per_foo" for the entries in the struct bars.

Already done for struct nfs4_file; go ahead and do it for the other nfsd4
state structures.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 78 ++++++++++++++++++++++++++---------------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8841151981..22e76e3f06 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -154,8 +154,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 	dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
 	if (dp == NULL)
 		return dp;
-	INIT_LIST_HEAD(&dp->dl_del_perfile);
-	INIT_LIST_HEAD(&dp->dl_del_perclnt);
+	INIT_LIST_HEAD(&dp->dl_perfile);
+	INIT_LIST_HEAD(&dp->dl_perclnt);
 	INIT_LIST_HEAD(&dp->dl_recall_lru);
 	dp->dl_client = clp;
 	get_nfs4_file(fp);
@@ -176,8 +176,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
 		        current_fh->fh_handle.fh_size);
 	dp->dl_time = 0;
 	atomic_set(&dp->dl_count, 1);
-	list_add(&dp->dl_del_perfile, &fp->fi_delegations);
-	list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt);
+	list_add(&dp->dl_perfile, &fp->fi_delegations);
+	list_add(&dp->dl_perclnt, &clp->cl_delegations);
 	return dp;
 }
 
@@ -214,8 +214,8 @@ nfs4_close_delegation(struct nfs4_delegation *dp)
 static void
 unhash_delegation(struct nfs4_delegation *dp)
 {
-	list_del_init(&dp->dl_del_perfile);
-	list_del_init(&dp->dl_del_perclnt);
+	list_del_init(&dp->dl_perfile);
+	list_del_init(&dp->dl_perclnt);
 	spin_lock(&recall_lock);
 	list_del_init(&dp->dl_recall_lru);
 	spin_unlock(&recall_lock);
@@ -345,11 +345,11 @@ expire_client(struct nfs4_client *clp)
 
 	INIT_LIST_HEAD(&reaplist);
 	spin_lock(&recall_lock);
-	while (!list_empty(&clp->cl_del_perclnt)) {
-		dp = list_entry(clp->cl_del_perclnt.next, struct nfs4_delegation, dl_del_perclnt);
+	while (!list_empty(&clp->cl_delegations)) {
+		dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
 		dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
 				dp->dl_flock);
-		list_del_init(&dp->dl_del_perclnt);
+		list_del_init(&dp->dl_perclnt);
 		list_move(&dp->dl_recall_lru, &reaplist);
 	}
 	spin_unlock(&recall_lock);
@@ -361,8 +361,8 @@ expire_client(struct nfs4_client *clp)
 	list_del(&clp->cl_idhash);
 	list_del(&clp->cl_strhash);
 	list_del(&clp->cl_lru);
-	while (!list_empty(&clp->cl_perclient)) {
-		sop = list_entry(clp->cl_perclient.next, struct nfs4_stateowner, so_perclient);
+	while (!list_empty(&clp->cl_openowners)) {
+		sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
 		release_stateowner(sop);
 	}
 	put_nfs4_client(clp);
@@ -380,8 +380,8 @@ create_client(struct xdr_netobj name, char *recdir) {
 	clp->cl_callback.cb_parsed = 0;
 	INIT_LIST_HEAD(&clp->cl_idhash);
 	INIT_LIST_HEAD(&clp->cl_strhash);
-	INIT_LIST_HEAD(&clp->cl_perclient);
-	INIT_LIST_HEAD(&clp->cl_del_perclnt);
+	INIT_LIST_HEAD(&clp->cl_openowners);
+	INIT_LIST_HEAD(&clp->cl_delegations);
 	INIT_LIST_HEAD(&clp->cl_lru);
 out:
 	return clp;
@@ -1074,13 +1074,13 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
 	INIT_LIST_HEAD(&sop->so_idhash);
 	INIT_LIST_HEAD(&sop->so_strhash);
 	INIT_LIST_HEAD(&sop->so_perclient);
-	INIT_LIST_HEAD(&sop->so_perfilestate);
-	INIT_LIST_HEAD(&sop->so_perlockowner);  /* not used */
+	INIT_LIST_HEAD(&sop->so_stateids);
+	INIT_LIST_HEAD(&sop->so_perstateid);  /* not used */
 	INIT_LIST_HEAD(&sop->so_close_lru);
 	sop->so_time = 0;
 	list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]);
 	list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]);
-	list_add(&sop->so_perclient, &clp->cl_perclient);
+	list_add(&sop->so_perclient, &clp->cl_openowners);
 	sop->so_is_open_owner = 1;
 	sop->so_id = current_ownerid++;
 	sop->so_client = clp;
@@ -1098,10 +1098,10 @@ release_stateid_lockowners(struct nfs4_stateid *open_stp)
 {
 	struct nfs4_stateowner *lock_sop;
 
-	while (!list_empty(&open_stp->st_perlockowner)) {
-		lock_sop = list_entry(open_stp->st_perlockowner.next,
-				struct nfs4_stateowner, so_perlockowner);
-		/* list_del(&open_stp->st_perlockowner);  */
+	while (!list_empty(&open_stp->st_lockowners)) {
+		lock_sop = list_entry(open_stp->st_lockowners.next,
+				struct nfs4_stateowner, so_perstateid);
+		/* list_del(&open_stp->st_lockowners);  */
 		BUG_ON(lock_sop->so_is_open_owner);
 		release_stateowner(lock_sop);
 	}
@@ -1116,10 +1116,10 @@ unhash_stateowner(struct nfs4_stateowner *sop)
 	list_del(&sop->so_strhash);
 	if (sop->so_is_open_owner)
 		list_del(&sop->so_perclient);
-	list_del(&sop->so_perlockowner);
-	while (!list_empty(&sop->so_perfilestate)) {
-		stp = list_entry(sop->so_perfilestate.next, 
-			struct nfs4_stateid, st_perfilestate);
+	list_del(&sop->so_perstateid);
+	while (!list_empty(&sop->so_stateids)) {
+		stp = list_entry(sop->so_stateids.next,
+			struct nfs4_stateid, st_perstateowner);
 		if (sop->so_is_open_owner)
 			release_stateid(stp, OPEN_STATE);
 		else
@@ -1141,11 +1141,11 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
 	unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
 
 	INIT_LIST_HEAD(&stp->st_hash);
-	INIT_LIST_HEAD(&stp->st_perfilestate);
-	INIT_LIST_HEAD(&stp->st_perlockowner);
+	INIT_LIST_HEAD(&stp->st_perstateowner);
+	INIT_LIST_HEAD(&stp->st_lockowners);
 	INIT_LIST_HEAD(&stp->st_perfile);
 	list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
-	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
+	list_add(&stp->st_perstateowner, &sop->so_stateids);
 	list_add(&stp->st_perfile, &fp->fi_stateids);
 	stp->st_stateowner = sop;
 	get_nfs4_file(fp);
@@ -1167,7 +1167,7 @@ release_stateid(struct nfs4_stateid *stp, int flags)
 
 	list_del(&stp->st_hash);
 	list_del(&stp->st_perfile);
-	list_del(&stp->st_perfilestate);
+	list_del(&stp->st_perstateowner);
 	if (flags & OPEN_STATE) {
 		release_stateid_lockowners(stp);
 		stp->st_vfs_file = NULL;
@@ -1201,7 +1201,7 @@ release_state_owner(struct nfs4_stateid *stp, int flag)
 	 * released by the laundromat service after the lease period
 	 * to enable us to handle CLOSE replay
 	 */
-	if (sop->so_confirmed && list_empty(&sop->so_perfilestate))
+	if (sop->so_confirmed && list_empty(&sop->so_stateids))
 		move_to_close_lru(sop);
 }
 
@@ -1548,7 +1548,7 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
 {
 	struct nfs4_delegation *dp;
 
-	list_for_each_entry(dp, &fp->fi_delegations, dl_del_perfile) {
+	list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
 		if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
 			return dp;
 	}
@@ -1892,7 +1892,7 @@ nfsd4_renew(clientid_t *clid)
 	}
 	renew_client(clp);
 	status = nfserr_cb_path_down;
-	if (!list_empty(&clp->cl_del_perclnt)
+	if (!list_empty(&clp->cl_delegations)
 			&& !atomic_read(&clp->cl_callback.cb_set))
 		goto out;
 	status = nfs_ok;
@@ -2634,13 +2634,13 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
 	INIT_LIST_HEAD(&sop->so_idhash);
 	INIT_LIST_HEAD(&sop->so_strhash);
 	INIT_LIST_HEAD(&sop->so_perclient);
-	INIT_LIST_HEAD(&sop->so_perfilestate);
-	INIT_LIST_HEAD(&sop->so_perlockowner);
+	INIT_LIST_HEAD(&sop->so_stateids);
+	INIT_LIST_HEAD(&sop->so_perstateid);
 	INIT_LIST_HEAD(&sop->so_close_lru); /* not used */
 	sop->so_time = 0;
 	list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]);
 	list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]);
-	list_add(&sop->so_perlockowner, &open_stp->st_perlockowner);
+	list_add(&sop->so_perstateid, &open_stp->st_lockowners);
 	sop->so_is_open_owner = 0;
 	sop->so_id = current_ownerid++;
 	sop->so_client = clp;
@@ -2664,11 +2664,11 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
 		goto out;
 	INIT_LIST_HEAD(&stp->st_hash);
 	INIT_LIST_HEAD(&stp->st_perfile);
-	INIT_LIST_HEAD(&stp->st_perfilestate);
-	INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */
+	INIT_LIST_HEAD(&stp->st_perstateowner);
+	INIT_LIST_HEAD(&stp->st_lockowners); /* not used */
 	list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
 	list_add(&stp->st_perfile, &fp->fi_stateids);
-	list_add(&stp->st_perfilestate, &sop->so_perfilestate);
+	list_add(&stp->st_perstateowner, &sop->so_stateids);
 	stp->st_stateowner = sop;
 	get_nfs4_file(fp);
 	stp->st_file = fp;
@@ -3081,8 +3081,8 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *
 		/* check for any locks held by any stateid
 		 * associated with the (lock) stateowner */
 		status = nfserr_locks_held;
-		list_for_each_entry(stp, &local->so_perfilestate,
-				st_perfilestate) {
+		list_for_each_entry(stp, &local->so_stateids,
+				st_perstateowner) {
 			if (check_for_locks(stp->st_vfs_file, local))
 				goto out;
 		}
-- 
cgit v1.2.2


From 3e9e3dbe0fe36c824ce2c5d7b05997c87a64bbdc Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:20 -0700
Subject: [PATCH] knfsd: nfsd4: allow multiple lockowners

>From the language of rfc3530 section 8.1.3 (e.g., the suggestion that a
"process id" might be a reasonable lockowner value) it's conceivable that a
client might want to use the same lockowner string on multiple files, so we may
as well allow that.  We expect each use of open_to_lockowner to create a
distinct seqid stream, though.

For now we're also allowing multiple uses of open_to_lockowner with the same
open, though it seems unlikely clients would actually do that.

Also add a comment reminding myself of some very non-scalable data structures.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4state.c | 76 +++++++++++++++++++++++------------------------------
 1 file changed, 33 insertions(+), 43 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 22e76e3f06..26d00465c2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2583,22 +2583,6 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
 		deny->ld_type = NFS4_WRITE_LT;
 }
 
-static struct nfs4_stateowner *
-find_lockstateowner(struct xdr_netobj *owner, clientid_t *clid)
-{
-	struct nfs4_stateowner *local = NULL;
-	int i;
-
-	for (i = 0; i < LOCK_HASH_SIZE; i++) {
-		list_for_each_entry(local, &lock_ownerid_hashtbl[i], so_idhash) {
-			if (!cmp_owner_str(local, owner, clid))
-				continue;
-			return local;
-		}
-	}
-	return NULL;
-}
-
 static struct nfs4_stateowner *
 find_lockstateowner_str(struct inode *inode, clientid_t *clid,
 		struct xdr_netobj *owner)
@@ -2697,7 +2681,7 @@ check_lock_length(u64 offset, u64 length)
 int
 nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock)
 {
-	struct nfs4_stateowner *lock_sop = NULL, *open_sop = NULL;
+	struct nfs4_stateowner *open_sop = NULL;
 	struct nfs4_stateid *lock_stp;
 	struct file *filp;
 	struct file_lock file_lock;
@@ -2756,16 +2740,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 		strhashval = lock_ownerstr_hashval(fp->fi_inode, 
 				open_sop->so_client->cl_clientid.cl_id, 
 				&lock->v.new.owner);
-		/* 
-		 * If we already have this lock owner, the client is in 
-		 * error (or our bookeeping is wrong!) 
-		 * for asking for a 'new lock'.
-		 */
-		status = nfserr_bad_stateid;
-		lock_sop = find_lockstateowner(&lock->v.new.owner,
-						&lock->v.new.clientid);
-		if (lock_sop)
-			goto out;
+		/* XXX: Do we need to check for duplicate stateowners on
+		 * the same file, or should they just be allowed (and
+		 * create new stateids)? */
 		status = nfserr_resource;
 		if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock)))
 			goto out;
@@ -3056,8 +3033,11 @@ int
 nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner)
 {
 	clientid_t *clid = &rlockowner->rl_clientid;
-	struct nfs4_stateowner *local = NULL;
+	struct nfs4_stateowner *sop;
+	struct nfs4_stateid *stp;
 	struct xdr_netobj *owner = &rlockowner->rl_owner;
+	struct list_head matches;
+	int i;
 	int status;
 
 	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
@@ -3073,22 +3053,32 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *
 
 	nfs4_lock_state();
 
-	status = nfs_ok;
-	local = find_lockstateowner(owner, clid);
-	if (local) {
-		struct nfs4_stateid *stp;
-
-		/* check for any locks held by any stateid
-		 * associated with the (lock) stateowner */
-		status = nfserr_locks_held;
-		list_for_each_entry(stp, &local->so_stateids,
-				st_perstateowner) {
-			if (check_for_locks(stp->st_vfs_file, local))
-				goto out;
+	status = nfserr_locks_held;
+	/* XXX: we're doing a linear search through all the lockowners.
+	 * Yipes!  For now we'll just hope clients aren't really using
+	 * release_lockowner much, but eventually we have to fix these
+	 * data structures. */
+	INIT_LIST_HEAD(&matches);
+	for (i = 0; i < LOCK_HASH_SIZE; i++) {
+		list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) {
+			if (!cmp_owner_str(sop, owner, clid))
+				continue;
+			list_for_each_entry(stp, &sop->so_stateids,
+					st_perstateowner) {
+				if (check_for_locks(stp->st_vfs_file, sop))
+					goto out;
+				/* Note: so_perclient unused for lockowners,
+				 * so it's OK to fool with here. */
+				list_add(&sop->so_perclient, &matches);
+			}
 		}
-		/* no locks held by (lock) stateowner */
-		status = nfs_ok;
-		release_stateowner(local);
+	}
+	/* Clients probably won't expect us to return with some (but not all)
+	 * of the lockowner state released; so don't release any until all
+	 * have been checked. */
+	status = nfs_ok;
+	list_for_each_entry(sop, &matches, so_perclient) {
+		release_stateowner(sop);
 	}
 out:
 	nfs4_unlock_state();
-- 
cgit v1.2.2


From cb36d6345752fa24827044c68e15f6708a40d9f6 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:23 -0700
Subject: [PATCH] knfsd: nfsd4: remove cb_parsed

The cb_parsed field is only used by probe_callback, to determine whether the
callback information has been filled in by setclientid.  But there is no way
that probe_callback() can be called without that having already happened, so
that check is superfluous, as is cb_parsed.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4callback.c | 4 +---
 fs/nfsd/nfs4state.c    | 5 +----
 2 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 68bb245491..583c0710e4 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -386,9 +386,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	char                    hostname[32];
 	int status;
 
-	dprintk("NFSD: probe_callback. cb_parsed %d cb_set %d\n",
-			cb->cb_parsed, atomic_read(&cb->cb_set));
-	if (!cb->cb_parsed || atomic_read(&cb->cb_set))
+	if (atomic_read(&cb->cb_set))
 		return;
 
 	/* Initialize address */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 26d00465c2..0b47a97e95 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -377,7 +377,6 @@ create_client(struct xdr_netobj name, char *recdir) {
 	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
 	atomic_set(&clp->cl_count, 1);
 	atomic_set(&clp->cl_callback.cb_set, 0);
-	clp->cl_callback.cb_parsed = 0;
 	INIT_LIST_HEAD(&clp->cl_idhash);
 	INIT_LIST_HEAD(&clp->cl_strhash);
 	INIT_LIST_HEAD(&clp->cl_openowners);
@@ -620,14 +619,12 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 		goto out_err;
 	cb->cb_prog = se->se_callback_prog;
 	cb->cb_ident = se->se_callback_ident;
-	cb->cb_parsed = 1;
 	return;
 out_err:
 	printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
 		"will not receive delegations\n",
 		clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
 
-	cb->cb_parsed = 0;
 	return;
 }
 
@@ -872,7 +869,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 		else {
 			/* XXX: We just turn off callbacks until we can handle
 			  * change request correctly. */
-			conf->cl_callback.cb_parsed = 0;
+			atomic_set(&conf->cl_callback.cb_set, 0);
 			gen_confirm(conf);
 			expire_client(unconf);
 			status = nfs_ok;
-- 
cgit v1.2.2


From 190e4fbf96037e5e526ba3210f2bcc2a3b6fe964 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:25 -0700
Subject: [PATCH] knfsd: nfsd4: initialize recovery directory

NFSv4 clients are required to know what state they have on the server so that
they can reclaim it on server reboot.  However, it is possible for
pathalogical combinations of server reboots and network partitions to leave a
client in a state where it cannot know whether it has lost its state on the
server.

For this reason, rfc3530 requires that we store some information about clients
to stable storage.

So we maintain a directory /var/lib/nfs/v4recovery with a subdirectory for
each client with active state.  We leave open the possibility of including
files underneath each such subdirectory with information about the client, but
for now the subdirectories are empty.

We create a client subdirectory whenever a client makes its first non-reclaim
open_confirm.

We remove a client subdirectory whenever either
        a) its lease expires, or
	b) the grace period ends without it reclaiming anything.
When handling reclaims, we allow the reclaim if and only if the client doing
the reclaim has a subdirectory.

This patch adds just the code to scan the recovery directory on nfsd startup.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4recover.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs4state.c   |  18 +++++-
 2 files changed, 182 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 841a305d79..2dc9851a1d 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -39,6 +39,9 @@
 #include <linux/nfs4.h>
 #include <linux/nfsd/state.h>
 #include <linux/nfsd/xdr4.h>
+#include <linux/param.h>
+#include <linux/file.h>
+#include <linux/namei.h>
 #include <asm/uaccess.h>
 #include <asm/scatterlist.h>
 #include <linux/crypto.h>
@@ -46,6 +49,27 @@
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
+/* Globals */
+char recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
+static struct nameidata rec_dir;
+static int rec_dir_init = 0;
+
+static void
+nfs4_save_user(uid_t *saveuid, gid_t *savegid)
+{
+	*saveuid = current->fsuid;
+	*savegid = current->fsgid;
+	current->fsuid = 0;
+	current->fsgid = 0;
+}
+
+static void
+nfs4_reset_user(uid_t saveuid, gid_t savegid)
+{
+	current->fsuid = saveuid;
+	current->fsgid = savegid;
+}
+
 static void
 md5_to_hex(char *out, char *md5)
 {
@@ -95,3 +119,145 @@ out:
 		crypto_free_tfm(tfm);
 	return status;
 }
+
+typedef int (recdir_func)(struct dentry *, struct dentry *);
+
+struct dentry_list {
+	struct dentry *dentry;
+	struct list_head list;
+};
+
+struct dentry_list_arg {
+	struct list_head dentries;
+	struct dentry *parent;
+};
+
+static int
+nfsd4_build_dentrylist(void *arg, const char *name, int namlen,
+		loff_t offset, ino_t ino, unsigned int d_type)
+{
+	struct dentry_list_arg *dla = arg;
+	struct list_head *dentries = &dla->dentries;
+	struct dentry *parent = dla->parent;
+	struct dentry *dentry;
+	struct dentry_list *child;
+
+	if (name && isdotent(name, namlen))
+		return nfs_ok;
+	dentry = lookup_one_len(name, parent, namlen);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+	child = kmalloc(sizeof(*child), GFP_KERNEL);
+	if (child == NULL)
+		return -ENOMEM;
+	child->dentry = dentry;
+	list_add(&child->list, dentries);
+	return 0;
+}
+
+static int
+nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
+{
+	struct file *filp;
+	struct dentry_list_arg dla = {
+		.parent = dir,
+	};
+	struct list_head *dentries = &dla.dentries;
+	struct dentry_list *child;
+	uid_t uid;
+	gid_t gid;
+	int status;
+
+	if (!rec_dir_init)
+		return 0;
+
+	nfs4_save_user(&uid, &gid);
+
+	filp = dentry_open(dget(dir), mntget(rec_dir.mnt),
+			O_RDWR);
+	status = PTR_ERR(filp);
+	if (IS_ERR(filp))
+		goto out;
+	INIT_LIST_HEAD(dentries);
+	status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla);
+	fput(filp);
+	while (!list_empty(dentries)) {
+		child = list_entry(dentries->next, struct dentry_list, list);
+		status = f(dir, child->dentry);
+		if (status)
+			goto out;
+		list_del(&child->list);
+		dput(child->dentry);
+		kfree(child);
+	}
+out:
+	while (!list_empty(dentries)) {
+		child = list_entry(dentries->next, struct dentry_list, list);
+		list_del(&child->list);
+		dput(child->dentry);
+		kfree(child);
+	}
+	nfs4_reset_user(uid, gid);
+	return status;
+}
+
+static int
+load_recdir(struct dentry *parent, struct dentry *child)
+{
+	if (child->d_name.len != HEXDIR_LEN - 1) {
+		printk("nfsd4: illegal name %s in recovery directory\n",
+				child->d_name.name);
+		/* Keep trying; maybe the others are OK: */
+		return nfs_ok;
+	}
+	nfs4_client_to_reclaim(child->d_name.name);
+	return nfs_ok;
+}
+
+int
+nfsd4_recdir_load(void) {
+	int status;
+
+	status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir);
+	if (status)
+		printk("nfsd4: failed loading clients from recovery"
+			" directory %s\n", rec_dir.dentry->d_name.name);
+	return status;
+}
+
+/*
+ * Hold reference to the recovery directory.
+ */
+
+void
+nfsd4_init_recdir(char *rec_dirname)
+{
+	uid_t			uid = 0;
+	gid_t			gid = 0;
+	int 			status;
+
+	printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
+			rec_dirname);
+
+	BUG_ON(rec_dir_init);
+
+	nfs4_save_user(&uid, &gid);
+
+	status = path_lookup(rec_dirname, LOOKUP_FOLLOW, &rec_dir);
+	if (status == -ENOENT)
+		printk("NFSD: recovery directory %s doesn't exist\n",
+				rec_dirname);
+
+	if (!status)
+		rec_dir_init = 1;
+	nfs4_reset_user(uid, gid);
+}
+
+void
+nfsd4_shutdown_recdir(void)
+{
+	if (!rec_dir_init)
+		return;
+	rec_dir_init = 0;
+	path_release(&rec_dir);
+}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0b47a97e95..6b9d23c39a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -71,6 +71,7 @@ static stateid_t onestateid;              /* bits all 1 */
 static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
 static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
 static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
+extern char recovery_dirname[];
 
 /* Locking:
  *
@@ -3091,8 +3092,8 @@ alloc_reclaim(void)
 /*
  * failure => all reset bets are off, nfserr_no_grace...
  */
-static int
-nfs4_client_to_reclaim(char *name)
+int
+nfs4_client_to_reclaim(const char *name)
 {
 	unsigned int strhashval;
 	struct nfs4_client_reclaim *crp = NULL;
@@ -3202,6 +3203,17 @@ nfs4_state_init(void)
 	reclaim_str_hashtbl_size = 0;
 }
 
+static void
+nfsd4_load_reboot_recovery_data(void)
+{
+	int status;
+
+	nfsd4_init_recdir(recovery_dirname);
+	status = nfsd4_recdir_load();
+	if (status)
+		printk("NFSD: Failure reading reboot recovery data\n");
+}
+
 /* initialization to perform when the nfsd service is started: */
 
 static void
@@ -3228,6 +3240,7 @@ nfs4_state_start(void)
 	status = nfsd4_init_slabs();
 	if (status)
 		return status;
+	nfsd4_load_reboot_recovery_data();
 	__nfs4_state_start();
 	nfs4_init = 1;
 	return 0;
@@ -3286,6 +3299,7 @@ __nfs4_state_shutdown(void)
 	cancel_delayed_work(&laundromat_work);
 	flush_workqueue(laundry_wq);
 	destroy_workqueue(laundry_wq);
+	nfsd4_shutdown_recdir();
 	nfs4_init = 0;
 }
 
-- 
cgit v1.2.2


From c7b9a45927e74c81d6562153f7fde9d32da00159 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:30 -0700
Subject: [PATCH] knfsd: nfsd4: reboot recovery

This patch adds the code to create and remove client subdirectories from the
recovery directory, as described in the previous patch comment.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4recover.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs4state.c   |  16 +++++
 2 files changed, 185 insertions(+)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 2dc9851a1d..2805c5245e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -120,6 +120,70 @@ out:
 	return status;
 }
 
+static int
+nfsd4_rec_fsync(struct dentry *dentry)
+{
+	struct file *filp;
+	int status = nfs_ok;
+
+	dprintk("NFSD: nfs4_fsync_rec_dir\n");
+	filp = dentry_open(dget(dentry), mntget(rec_dir.mnt), O_RDWR);
+	if (IS_ERR(filp)) {
+		status = PTR_ERR(filp);
+		goto out;
+	}
+	if (filp->f_op && filp->f_op->fsync)
+		status = filp->f_op->fsync(filp, filp->f_dentry, 0);
+	fput(filp);
+out:
+	if (status)
+		printk("nfsd4: unable to sync recovery directory\n");
+	return status;
+}
+
+int
+nfsd4_create_clid_dir(struct nfs4_client *clp)
+{
+	char *dname = clp->cl_recdir;
+	struct dentry *dentry;
+	uid_t uid;
+	gid_t gid;
+	int status;
+
+	dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
+
+	if (!rec_dir_init || clp->cl_firststate)
+		return 0;
+
+	nfs4_save_user(&uid, &gid);
+
+	/* lock the parent */
+	down(&rec_dir.dentry->d_inode->i_sem);
+
+	dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1);
+	if (IS_ERR(dentry)) {
+		status = PTR_ERR(dentry);
+		goto out_unlock;
+	}
+	status = -EEXIST;
+	if (dentry->d_inode) {
+		dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
+		goto out_put;
+	}
+	status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU);
+out_put:
+	dput(dentry);
+out_unlock:
+	up(&rec_dir.dentry->d_inode->i_sem);
+	if (status == 0) {
+		clp->cl_firststate = 1;
+		status = nfsd4_rec_fsync(rec_dir.dentry);
+	}
+	nfs4_reset_user(uid, gid);
+	dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status);
+	return status;
+}
+
 typedef int (recdir_func)(struct dentry *, struct dentry *);
 
 struct dentry_list {
@@ -201,6 +265,111 @@ out:
 	return status;
 }
 
+static int
+nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry)
+{
+	int status;
+
+	if (!S_ISREG(dir->d_inode->i_mode)) {
+		printk("nfsd4: non-file found in client recovery directory\n");
+		return -EINVAL;
+	}
+	down(&dir->d_inode->i_sem);
+	status = vfs_unlink(dir->d_inode, dentry);
+	up(&dir->d_inode->i_sem);
+	return status;
+}
+
+static int
+nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
+{
+	int status;
+
+	/* For now this directory should already be empty, but we empty it of
+	 * any regular files anyway, just in case the directory was created by
+	 * a kernel from the future.... */
+	nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
+	down(&dir->d_inode->i_sem);
+	status = vfs_rmdir(dir->d_inode, dentry);
+	up(&dir->d_inode->i_sem);
+	return status;
+}
+
+static int
+nfsd4_unlink_clid_dir(char *name, int namlen)
+{
+	struct dentry *dentry;
+	int status;
+
+	dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
+
+	dentry = lookup_one_len(name, rec_dir.dentry, namlen);
+	if (IS_ERR(dentry)) {
+		status = PTR_ERR(dentry);
+		return status;
+	}
+	status = -ENOENT;
+	if (!dentry->d_inode)
+		goto out;
+
+	status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry);
+out:
+	dput(dentry);
+	return status;
+}
+
+void
+nfsd4_remove_clid_dir(struct nfs4_client *clp)
+{
+	uid_t uid;
+	gid_t gid;
+	int status;
+
+	if (!rec_dir_init || !clp->cl_firststate)
+		return;
+
+	nfs4_save_user(&uid, &gid);
+	status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
+	nfs4_reset_user(uid, gid);
+	if (status == 0)
+		status = nfsd4_rec_fsync(rec_dir.dentry);
+	if (status)
+		printk("NFSD: Failed to remove expired client state directory"
+				" %.*s\n", HEXDIR_LEN, clp->cl_recdir);
+	return;
+}
+
+static int
+purge_old(struct dentry *parent, struct dentry *child)
+{
+	int status;
+
+	if (nfs4_has_reclaimed_state(child->d_name.name))
+		return nfs_ok;
+
+	status = nfsd4_clear_clid_dir(parent, child);
+	if (status)
+		printk("failed to remove client recovery directory %s\n",
+				child->d_name.name);
+	/* Keep trying, success or failure: */
+	return nfs_ok;
+}
+
+void
+nfsd4_recdir_purge_old(void) {
+	int status;
+
+	if (!rec_dir_init)
+		return;
+	status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old);
+	if (status == 0)
+		status = nfsd4_rec_fsync(rec_dir.dentry);
+	if (status)
+		printk("nfsd4: failed to purge old clients from recovery"
+			" directory %s\n", rec_dir.dentry->d_name.name);
+	return;
+}
+
 static int
 load_recdir(struct dentry *parent, struct dentry *child)
 {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6b9d23c39a..6cca358cd6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -905,6 +905,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
 			conf = find_confirmed_client_by_str(unconf->cl_recdir,
 									hash);
 			if (conf) {
+				nfsd4_remove_clid_dir(conf);
 				expire_client(conf);
 			}
 			move_to_confirmed(unconf);
@@ -1691,6 +1692,7 @@ nfs4_set_claim_prev(struct nfsd4_open *open, int *status)
 			*status = nfserr_reclaim_bad;
 		else {
 			open->op_stateowner->so_confirmed = 1;
+			open->op_stateowner->so_client->cl_firststate = 1;
 			open->op_stateowner->so_seqid--;
 		}
 	}
@@ -1903,6 +1905,7 @@ static void
 end_grace(void)
 {
 	dprintk("NFSD: end of grace period\n");
+	nfsd4_recdir_purge_old();
 	in_grace = 0;
 }
 
@@ -1932,6 +1935,7 @@ nfs4_laundromat(void)
 		}
 		dprintk("NFSD: purging unused client (clientid %08x)\n",
 			clp->cl_clientid.cl_id);
+		nfsd4_remove_clid_dir(clp);
 		expire_client(clp);
 	}
 	INIT_LIST_HEAD(&reaplist);
@@ -2320,6 +2324,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
 		         stp->st_stateid.si_stateownerid,
 		         stp->st_stateid.si_fileid,
 		         stp->st_stateid.si_generation);
+
+	nfsd4_create_clid_dir(sop->so_client);
 out:
 	if (oc->oc_stateowner)
 		nfs4_get_stateowner(oc->oc_stateowner);
@@ -3089,6 +3095,16 @@ alloc_reclaim(void)
 	return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
 }
 
+int
+nfs4_has_reclaimed_state(const char *name)
+{
+	unsigned int strhashval = clientstr_hashval(name);
+	struct nfs4_client *clp;
+
+	clp = find_confirmed_client_by_str(name, strhashval);
+	return clp ? 1 : 0;
+}
+
 /*
  * failure => all reset bets are off, nfserr_no_grace...
  */
-- 
cgit v1.2.2


From 0964a3d3f1aa96468091924f6b0c391a46dc6d0b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@cse.unsw.edu.au>
Date: Thu, 23 Jun 2005 22:04:32 -0700
Subject: [PATCH] knfsd: nfsd4 reboot dirname fix

Set the recovery directory via /proc/fs/nfsd/nfs4recoverydir.

It may be changed any time, but is used only on startup.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4recover.c |  1 -
 fs/nfsd/nfs4state.c   | 37 +++++++++++++++++++++++++++++++++++--
 fs/nfsd/nfsctl.c      | 23 +++++++++++++++++++++++
 3 files changed, 58 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 2805c5245e..095f1740f3 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -50,7 +50,6 @@
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
 /* Globals */
-char recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
 static struct nameidata rec_dir;
 static int rec_dir_init = 0;
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6cca358cd6..89e36526d7 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -48,6 +48,7 @@
 #include <linux/nfs4.h>
 #include <linux/nfsd/state.h>
 #include <linux/nfsd/xdr4.h>
+#include <linux/namei.h>
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -71,7 +72,8 @@ static stateid_t onestateid;              /* bits all 1 */
 static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
 static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
 static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
-extern char recovery_dirname[];
+static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
+static void nfs4_set_recdir(char *recdir);
 
 /* Locking:
  *
@@ -3224,8 +3226,10 @@ nfsd4_load_reboot_recovery_data(void)
 {
 	int status;
 
-	nfsd4_init_recdir(recovery_dirname);
+	nfs4_lock_state();
+	nfsd4_init_recdir(user_recovery_dirname);
 	status = nfsd4_recdir_load();
+	nfs4_unlock_state();
 	if (status)
 		printk("NFSD: Failure reading reboot recovery data\n");
 }
@@ -3329,6 +3333,35 @@ nfs4_state_shutdown(void)
 	nfs4_unlock_state();
 }
 
+static void
+nfs4_set_recdir(char *recdir)
+{
+	nfs4_lock_state();
+	strcpy(user_recovery_dirname, recdir);
+	nfs4_unlock_state();
+}
+
+/*
+ * Change the NFSv4 recovery directory to recdir.
+ */
+int
+nfs4_reset_recoverydir(char *recdir)
+{
+	int status;
+	struct nameidata nd;
+
+	status = path_lookup(recdir, LOOKUP_FOLLOW, &nd);
+	if (status)
+		return status;
+	status = -ENOTDIR;
+	if (S_ISDIR(nd.dentry->d_inode->i_mode)) {
+		nfs4_set_recdir(recdir);
+		status = 0;
+	}
+	path_release(&nd);
+	return status;
+}
+
 /*
  * Called when leasetime is changed.
  *
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3da43a3ed3..841c562991 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -51,6 +51,7 @@ enum {
 	NFSD_Fh,
 	NFSD_Threads,
 	NFSD_Leasetime,
+	NFSD_RecoveryDir,
 };
 
 /*
@@ -66,6 +67,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size);
 static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
 static ssize_t write_threads(struct file *file, char *buf, size_t size);
 static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
+static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
 
 static ssize_t (*write_op[])(struct file *, char *, size_t) = {
 	[NFSD_Svc] = write_svc,
@@ -78,6 +80,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
 	[NFSD_Fh] = write_filehandle,
 	[NFSD_Threads] = write_threads,
 	[NFSD_Leasetime] = write_leasetime,
+	[NFSD_RecoveryDir] = write_recoverydir,
 };
 
 static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
@@ -349,6 +352,25 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
 	return strlen(buf);
 }
 
+static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
+{
+	char *mesg = buf;
+	char *recdir;
+	int len, status;
+
+	if (size > PATH_MAX || buf[size-1] != '\n')
+		return -EINVAL;
+	buf[size-1] = 0;
+
+	recdir = mesg;
+	len = qword_get(&mesg, recdir, size);
+	if (len <= 0)
+		return -EINVAL;
+
+	status = nfs4_reset_recoverydir(recdir);
+	return strlen(buf);
+}
+
 /*----------------------------------------------------------------------------*/
 /*
  *	populating the filesystem.
@@ -369,6 +391,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
 		[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
 #ifdef CONFIG_NFSD_V4
 		[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
+		[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
 #endif
 		/* last one */ {""}
 	};
-- 
cgit v1.2.2


From 3d41088fa327782b14b5659dbcfff62ec704c23c Mon Sep 17 00:00:00 2001
From: Martin Waitz <tali@admingilde.org>
Date: Thu, 23 Jun 2005 22:05:21 -0700
Subject: [PATCH] DocBook: update comments

This patch updates some comments to match code changes.

Signed-off-by: Martin Waitz <tali@admingilde.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/sysfs/file.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index e9cfa39f40..d72c1ce485 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -14,7 +14,7 @@
 #define to_subsys(k) container_of(k,struct subsystem,kset.kobj)
 #define to_sattr(a) container_of(a,struct subsys_attribute,attr)
 
-/**
+/*
  * Subsystem file operations.
  * These operations allow subsystems to have files that can be 
  * read/written. 
@@ -192,8 +192,9 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
 
 /**
  *	flush_write_buffer - push buffer to kobject.
- *	@file:		file pointer.
+ *	@dentry:	dentry to the attribute
  *	@buffer:	data buffer for file.
+ *	@count:		number of bytes
  *
  *	Get the correct pointers for the kobject and the attribute we're
  *	dealing with, then call the store() method for the attribute, 
-- 
cgit v1.2.2


From ceffc078528befc008c6f2c2c4decda79eabd534 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Thu, 23 Jun 2005 22:05:25 -0700
Subject: [PATCH] xip: fs/mm: execute in place

- generic_file* file operations do no longer have a xip/non-xip split
- filemap_xip.c implements a new set of fops that require get_xip_page
  aop to work proper. all new fops are exported GPL-only (don't like to
  see whatever code use those except GPL modules)
- __xip_unmap now uses page_check_address, which is no longer static
  in rmap.c, and defined in linux/rmap.h
- mm/filemap.h is now much more clean, plainly having just Linus'
  inline funcs moved here from filemap.c
- fix includes in filemap_xip to make it build cleanly on i386

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/open.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index 8ec63f7359..3f4a4286fd 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -808,7 +808,9 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
 
 	/* NB: we're sure to have correct a_ops only after f_op->open */
 	if (f->f_flags & O_DIRECT) {
-		if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) {
+		if (!f->f_mapping->a_ops ||
+		    ((!f->f_mapping->a_ops->direct_IO) &&
+		    (!f->f_mapping->a_ops->get_xip_page))) {
 			fput(f);
 			f = ERR_PTR(-EINVAL);
 		}
-- 
cgit v1.2.2


From 6d79125bba55ee82701f1c7d4ebbc1aa20ecbe4e Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Thu, 23 Jun 2005 22:05:26 -0700
Subject: [PATCH] xip: ext2: execute in place

These are the ext2 related parts.  Ext2 now uses the xip_* file operations
along with the get_xip_page aop when mounted with -o xip.

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Kconfig       | 17 ++++++++++++
 fs/ext2/Makefile |  1 +
 fs/ext2/ext2.h   |  2 ++
 fs/ext2/file.c   | 18 +++++++++++++
 fs/ext2/inode.c  | 31 +++++++++++++++++++---
 fs/ext2/namei.c  | 12 ++++++---
 fs/ext2/super.c  | 27 ++++++++++++++++++-
 fs/ext2/xip.c    | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ext2/xip.h    | 25 ++++++++++++++++++
 9 files changed, 205 insertions(+), 8 deletions(-)
 create mode 100644 fs/ext2/xip.c
 create mode 100644 fs/ext2/xip.h

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 5c704d0562..8157f2e2d5 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -50,6 +50,23 @@ config EXT2_FS_SECURITY
 	  If you are not using a security module that requires using
 	  extended attributes for file security labels, say N.
 
+config EXT2_FS_XIP
+	bool "Ext2 execute in place support"
+	depends on EXT2_FS
+	help
+	  Execute in place can be used on memory-backed block devices. If you
+	  enable this option, you can select to mount block devices which are
+	  capable of this feature without using the page cache.
+
+	  If you do not use a block device that is capable of using this,
+	  or if unsure, say N.
+
+config FS_XIP
+# execute in place
+	bool
+	depends on EXT2_FS_XIP
+	default y
+
 config EXT3_FS
 	tristate "Ext3 journalling file system support"
 	help
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
index ee240a14e7..c5d02da73b 100644
--- a/fs/ext2/Makefile
+++ b/fs/ext2/Makefile
@@ -10,3 +10,4 @@ ext2-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
 ext2-$(CONFIG_EXT2_FS_XATTR)	 += xattr.o xattr_user.o xattr_trusted.o
 ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o
 ext2-$(CONFIG_EXT2_FS_SECURITY)	 += xattr_security.o
+ext2-$(CONFIG_EXT2_FS_XIP)	 += xip.o
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 8f0fd726c3..eed521d22c 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -147,9 +147,11 @@ extern struct file_operations ext2_dir_operations;
 /* file.c */
 extern struct inode_operations ext2_file_inode_operations;
 extern struct file_operations ext2_file_operations;
+extern struct file_operations ext2_xip_file_operations;
 
 /* inode.c */
 extern struct address_space_operations ext2_aops;
+extern struct address_space_operations ext2_aops_xip;
 extern struct address_space_operations ext2_nobh_aops;
 
 /* namei.c */
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index f5e86141ec..2b3d572365 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -55,6 +55,24 @@ struct file_operations ext2_file_operations = {
 	.sendfile	= generic_file_sendfile,
 };
 
+#ifdef CONFIG_EXT2_FS_XIP
+struct file_operations ext2_xip_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+	.aio_read	= xip_file_aio_read,
+	.aio_write	= xip_file_aio_write,
+	.ioctl		= ext2_ioctl,
+	.mmap		= xip_file_mmap,
+	.open		= generic_file_open,
+	.release	= ext2_release_file,
+	.fsync		= ext2_sync_file,
+	.readv		= xip_file_readv,
+	.writev		= xip_file_writev,
+	.sendfile	= xip_file_sendfile,
+};
+#endif
+
 struct inode_operations ext2_file_inode_operations = {
 	.truncate	= ext2_truncate,
 #ifdef CONFIG_EXT2_FS_XATTR
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index a50d9db4b6..53dceb0c65 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -33,6 +33,7 @@
 #include <linux/mpage.h>
 #include "ext2.h"
 #include "acl.h"
+#include "xip.h"
 
 MODULE_AUTHOR("Remy Card and others");
 MODULE_DESCRIPTION("Second Extended Filesystem");
@@ -594,6 +595,16 @@ out:
 	if (err)
 		goto cleanup;
 
+	if (ext2_use_xip(inode->i_sb)) {
+		/*
+		 * we need to clear the block
+		 */
+		err = ext2_clear_xip_target (inode,
+			le32_to_cpu(chain[depth-1].key));
+		if (err)
+			goto cleanup;
+	}
+
 	if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0)
 		goto changed;
 
@@ -691,6 +702,11 @@ struct address_space_operations ext2_aops = {
 	.writepages		= ext2_writepages,
 };
 
+struct address_space_operations ext2_aops_xip = {
+	.bmap			= ext2_bmap,
+	.get_xip_page		= ext2_get_xip_page,
+};
+
 struct address_space_operations ext2_nobh_aops = {
 	.readpage		= ext2_readpage,
 	.readpages		= ext2_readpages,
@@ -910,7 +926,9 @@ void ext2_truncate (struct inode * inode)
 	iblock = (inode->i_size + blocksize-1)
 					>> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
 
-	if (test_opt(inode->i_sb, NOBH))
+	if (mapping_is_xip(inode->i_mapping))
+		xip_truncate_page(inode->i_mapping, inode->i_size);
+	else if (test_opt(inode->i_sb, NOBH))
 		nobh_truncate_page(inode->i_mapping, inode->i_size);
 	else
 		block_truncate_page(inode->i_mapping,
@@ -1110,11 +1128,16 @@ void ext2_read_inode (struct inode * inode)
 
 	if (S_ISREG(inode->i_mode)) {
 		inode->i_op = &ext2_file_inode_operations;
-		inode->i_fop = &ext2_file_operations;
-		if (test_opt(inode->i_sb, NOBH))
+		if (ext2_use_xip(inode->i_sb)) {
+			inode->i_mapping->a_ops = &ext2_aops_xip;
+			inode->i_fop = &ext2_xip_file_operations;
+		} else if (test_opt(inode->i_sb, NOBH)) {
 			inode->i_mapping->a_ops = &ext2_nobh_aops;
-		else
+			inode->i_fop = &ext2_file_operations;
+		} else {
 			inode->i_mapping->a_ops = &ext2_aops;
+			inode->i_fop = &ext2_file_operations;
+		}
 	} else if (S_ISDIR(inode->i_mode)) {
 		inode->i_op = &ext2_dir_inode_operations;
 		inode->i_fop = &ext2_dir_operations;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 3176b3d3ff..c5513953c8 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -34,6 +34,7 @@
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
+#include "xip.h"
 
 /*
  * Couple of helper functions - make the code slightly cleaner.
@@ -127,11 +128,16 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st
 	int err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
 		inode->i_op = &ext2_file_inode_operations;
-		inode->i_fop = &ext2_file_operations;
-		if (test_opt(inode->i_sb, NOBH))
+		if (ext2_use_xip(inode->i_sb)) {
+			inode->i_mapping->a_ops = &ext2_aops_xip;
+			inode->i_fop = &ext2_xip_file_operations;
+		} else if (test_opt(inode->i_sb, NOBH)) {
 			inode->i_mapping->a_ops = &ext2_nobh_aops;
-		else
+			inode->i_fop = &ext2_file_operations;
+		} else {
 			inode->i_mapping->a_ops = &ext2_aops;
+			inode->i_fop = &ext2_file_operations;
+		}
 		mark_inode_dirty(inode);
 		err = ext2_add_nondir(dentry, inode);
 	}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 661c3d98d9..876e391f28 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
+#include "xip.h"
 
 static void ext2_sync_super(struct super_block *sb,
 			    struct ext2_super_block *es);
@@ -257,7 +258,7 @@ enum {
 	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
 	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
 	Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh,
-	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip,
 	Opt_ignore, Opt_err,
 };
 
@@ -286,6 +287,7 @@ static match_table_t tokens = {
 	{Opt_nouser_xattr, "nouser_xattr"},
 	{Opt_acl, "acl"},
 	{Opt_noacl, "noacl"},
+	{Opt_xip, "xip"},
 	{Opt_ignore, "grpquota"},
 	{Opt_ignore, "noquota"},
 	{Opt_ignore, "quota"},
@@ -397,6 +399,13 @@ static int parse_options (char * options,
 			printk("EXT2 (no)acl options not supported\n");
 			break;
 #endif
+		case Opt_xip:
+#ifdef CONFIG_EXT2_FS_XIP
+			set_opt (sbi->s_mount_opt, XIP);
+#else
+			printk("EXT2 xip option not supported\n");
+#endif
+			break;
 		case Opt_ignore:
 			break;
 		default:
@@ -640,6 +649,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 		((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
 		 MS_POSIXACL : 0);
 
+	ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset
+				    EXT2_MOUNT_XIP if not */
+
 	if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
 	    (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
 	     EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
@@ -668,6 +680,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 
 	blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
 
+	if ((ext2_use_xip(sb)) && ((blocksize != PAGE_SIZE) ||
+				  (sb->s_blocksize != blocksize))) {
+		if (!silent)
+			printk("XIP: Unsupported blocksize\n");
+		goto failed_mount;
+	}
+
 	/* If the blocksize doesn't match, re-read the thing.. */
 	if (sb->s_blocksize != blocksize) {
 		brelse(bh);
@@ -916,6 +935,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 {
 	struct ext2_sb_info * sbi = EXT2_SB(sb);
 	struct ext2_super_block * es;
+	unsigned long old_mount_opt = sbi->s_mount_opt;
 
 	/*
 	 * Allow the "check" option to be passed as a remount option.
@@ -927,6 +947,11 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 		((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
 
 	es = sbi->s_es;
+	if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
+	    (old_mount_opt & EXT2_MOUNT_XIP)) &&
+	    invalidate_inodes(sb))
+		ext2_warning(sb, __FUNCTION__, "busy inodes while remounting "\
+			     "xip remain in cache (no functional problem)");
 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
 		return 0;
 	if (*flags & MS_RDONLY) {
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
new file mode 100644
index 0000000000..d44431d1a3
--- /dev/null
+++ b/fs/ext2/xip.c
@@ -0,0 +1,80 @@
+/*
+ *  linux/fs/ext2/xip.c
+ *
+ * Copyright (C) 2005 IBM Corporation
+ * Author: Carsten Otte (cotte@de.ibm.com)
+ */
+
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/buffer_head.h>
+#include <linux/ext2_fs_sb.h>
+#include <linux/ext2_fs.h>
+#include "ext2.h"
+#include "xip.h"
+
+static inline int
+__inode_direct_access(struct inode *inode, sector_t sector, unsigned long *data) {
+	BUG_ON(!inode->i_sb->s_bdev->bd_disk->fops->direct_access);
+	return inode->i_sb->s_bdev->bd_disk->fops
+		->direct_access(inode->i_sb->s_bdev,sector,data);
+}
+
+int
+ext2_clear_xip_target(struct inode *inode, int block) {
+	sector_t sector = block*(PAGE_SIZE/512);
+	unsigned long data;
+	int rc;
+
+	rc = __inode_direct_access(inode, sector, &data);
+	if (rc)
+		return rc;
+	clear_page((void*)data);
+	return 0;
+}
+
+void ext2_xip_verify_sb(struct super_block *sb)
+{
+	struct ext2_sb_info *sbi = EXT2_SB(sb);
+
+	if ((sbi->s_mount_opt & EXT2_MOUNT_XIP)) {
+		if ((sb->s_bdev == NULL) ||
+			sb->s_bdev->bd_disk == NULL ||
+			sb->s_bdev->bd_disk->fops == NULL ||
+			sb->s_bdev->bd_disk->fops->direct_access == NULL) {
+			sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
+			ext2_warning(sb, __FUNCTION__,
+				"ignoring xip option - not supported by bdev");
+		}
+	}
+}
+
+struct page*
+ext2_get_xip_page(struct address_space *mapping, sector_t blockno,
+		   int create)
+{
+	int rc;
+	unsigned long data;
+	struct buffer_head tmp;
+
+	tmp.b_state = 0;
+	tmp.b_blocknr = 0;
+	rc = ext2_get_block(mapping->host, blockno/(PAGE_SIZE/512) , &tmp,
+				create);
+	if (rc)
+		return ERR_PTR(rc);
+	if (tmp.b_blocknr == 0) {
+		/* SPARSE block */
+		BUG_ON(create);
+		return ERR_PTR(-ENODATA);
+	}
+
+	rc = __inode_direct_access
+		(mapping->host,tmp.b_blocknr*(PAGE_SIZE/512) ,&data);
+	if (rc)
+		return ERR_PTR(rc);
+
+	SetPageUptodate(virt_to_page(data));
+	return virt_to_page(data);
+}
diff --git a/fs/ext2/xip.h b/fs/ext2/xip.h
new file mode 100644
index 0000000000..aa85331d6c
--- /dev/null
+++ b/fs/ext2/xip.h
@@ -0,0 +1,25 @@
+/*
+ *  linux/fs/ext2/xip.h
+ *
+ * Copyright (C) 2005 IBM Corporation
+ * Author: Carsten Otte (cotte@de.ibm.com)
+ */
+
+#ifdef CONFIG_EXT2_FS_XIP
+extern void ext2_xip_verify_sb (struct super_block *);
+extern int ext2_clear_xip_target (struct inode *, int);
+
+static inline int ext2_use_xip (struct super_block *sb)
+{
+	struct ext2_sb_info *sbi = EXT2_SB(sb);
+	return (sbi->s_mount_opt & EXT2_MOUNT_XIP);
+}
+struct page* ext2_get_xip_page (struct address_space *, sector_t, int);
+#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_page)
+#else
+#define mapping_is_xip(map)			0
+#define ext2_xip_verify_sb(sb)			do { } while (0)
+#define ext2_use_xip(sb)			0
+#define ext2_clear_xip_target(inode, chain)	0
+#define ext2_get_xip_page			NULL
+#endif
-- 
cgit v1.2.2


From eb6fe0c388e43b02e261f0fdee60e42f6298d7f7 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Thu, 23 Jun 2005 22:05:28 -0700
Subject: [PATCH] xip: reduce code duplication

This patch reworks filemap_xip.c with the goal to reduce code duplication
from mm/filemap.c.  It applies agains 2.6.12-rc6-mm1.  Instead of
implementing the aio functions, this one implements the synchronous
read/write functions only.  For readv and writev, the generic fallback is
used.  For aio, we rely on the application doing the fallback.  Since our
"synchronous" function does memcpy immediately anyway, there is no
performance difference between using the fallbacks or implementing each
operation.

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext2/file.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 2b3d572365..a484412fc7 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -58,17 +58,13 @@ struct file_operations ext2_file_operations = {
 #ifdef CONFIG_EXT2_FS_XIP
 struct file_operations ext2_xip_file_operations = {
 	.llseek		= generic_file_llseek,
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= xip_file_aio_read,
-	.aio_write	= xip_file_aio_write,
+	.read		= xip_file_read,
+	.write		= xip_file_write,
 	.ioctl		= ext2_ioctl,
 	.mmap		= xip_file_mmap,
 	.open		= generic_file_open,
 	.release	= ext2_release_file,
 	.fsync		= ext2_sync_file,
-	.readv		= xip_file_readv,
-	.writev		= xip_file_writev,
 	.sendfile	= xip_file_sendfile,
 };
 #endif
-- 
cgit v1.2.2


From 52c1da39534fb382c061de58b65f678ad74b59f5 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 23 Jun 2005 22:05:33 -0700
Subject: [PATCH] make various thing static

Another rollup of patches which give various symbols static scope

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namespace.c      | 2 +-
 fs/reiserfs/stree.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index 3b93e5d750..208c079e9f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -337,7 +337,7 @@ int may_umount(struct vfsmount *mnt)
 
 EXPORT_SYMBOL(may_umount);
 
-void umount_tree(struct vfsmount *mnt)
+static void umount_tree(struct vfsmount *mnt)
 {
 	struct vfsmount *p;
 	LIST_HEAD(kill);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index c47f8fd31a..63158491e1 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -223,7 +223,7 @@ extern struct tree_balance * cur_tb;
 const struct reiserfs_key  MIN_KEY = {0, 0, {{0, 0},}};
 
 /* Maximal possible key. It is never in the tree. */
-const struct reiserfs_key  MAX_KEY = {
+static const struct reiserfs_key  MAX_KEY = {
 	__constant_cpu_to_le32(0xffffffff),
 	__constant_cpu_to_le32(0xffffffff),
 	{{__constant_cpu_to_le32(0xffffffff),
-- 
cgit v1.2.2


From 75043cb5b386e5a01fd03b88f647dd992de02f97 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@mail.ru>
Date: Fri, 24 Jun 2005 20:52:52 +0000
Subject: [PATCH] fs/qnx4/*: fix sparse warnings

This patch fixes sparse warnings in the qnx4fs (and might even make
qnx4fs work on big-endian boxes)

Signed-off-by: Alexey Dobriyan <adobriyan@mail.ru>
Signed-off-by: Domen Puncer <domen@coderock.org>
Signed-off-by: Anders Larsen <al@alarsen.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/qnx4/dir.c   | 2 +-
 fs/qnx4/inode.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index cd66147cca..7a8f5595c2 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -61,7 +61,7 @@ static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
 						ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1;
 					else {
 						le  = (struct qnx4_link_info*)de;
-						ino = ( le->dl_inode_blk - 1 ) *
+						ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) *
 							QNX4_INODES_PER_BLOCK +
 							le->dl_inode_ndx;
 					}
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index aa92d6b76a..b79162a354 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -236,7 +236,7 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
 	struct buffer_head *bh = NULL;
 	struct qnx4_xblk *xblk = NULL;
 	struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode);
-	qnx4_nxtnt_t nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts);
+	u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts);
 
 	if ( iblock < le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size) ) {
 		// iblock is in the first extent. This is easy.
@@ -372,7 +372,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
 		printk("qnx4: unable to read the superblock\n");
 		goto outnobh;
 	}
-	if ( le32_to_cpu( *(__u32*)bh->b_data ) != QNX4_SUPER_MAGIC ) {
+	if ( le32_to_cpup((__le32*) bh->b_data) != QNX4_SUPER_MAGIC ) {
 		if (!silent)
 			printk("qnx4: wrong fsid in superblock.\n");
 		goto out;
-- 
cgit v1.2.2