From 48ce8b056c88920c8ac187781048f5dae33c81b9 Mon Sep 17 00:00:00 2001
From: Evgeniy Dushistov <dushistov@mail.ru>
Date: Mon, 5 Jun 2006 08:21:03 -0500
Subject: JFS: commit_mutex cleanups

I look at code, and see that
1)locks wasn't release in the opposite order in which they were taken
2)in jfs_rename we lock new_ip, and in "error path" we didn't unlock it
3)I see strange expression: "! !"

May be this worth to fix?

Signed-off-by: Evgeniy Dushistov <dushistov@mail.ru>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
---
 fs/jfs/jfs_txnmgr.c |  2 +-
 fs/jfs/namei.c      | 33 ++++++++++++++++-----------------
 2 files changed, 17 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index ac3d66948e..49618dd94f 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2944,7 +2944,7 @@ int jfs_sync(void *arg)
 				 * Inode is being freed
 				 */
 				list_del_init(&jfs_ip->anon_inode_list);
-			} else if (! !mutex_trylock(&jfs_ip->commit_mutex)) {
+			} else if (mutex_trylock(&jfs_ip->commit_mutex)) {
 				/*
 				 * inode will be removed from anonymous list
 				 * when it is committed
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 09ea03f622..295268ad23 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -165,8 +165,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 
       out3:
 	txEnd(tid);
-	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 	mutex_unlock(&JFS_IP(ip)->commit_mutex);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
@@ -300,8 +300,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 
       out3:
 	txEnd(tid);
-	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 	mutex_unlock(&JFS_IP(ip)->commit_mutex);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
@@ -384,8 +384,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 		if (rc == -EIO)
 			txAbort(tid, 1);
 		txEnd(tid);
-		mutex_unlock(&JFS_IP(dip)->commit_mutex);
 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
+		mutex_unlock(&JFS_IP(dip)->commit_mutex);
 
 		goto out2;
 	}
@@ -422,8 +422,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 
 	txEnd(tid);
 
-	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 	mutex_unlock(&JFS_IP(ip)->commit_mutex);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 
 	/*
 	 * Truncating the directory index table is not guaranteed.  It
@@ -503,8 +503,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 		if (rc == -EIO)
 			txAbort(tid, 1);	/* Marks FS Dirty */
 		txEnd(tid);
-		mutex_unlock(&JFS_IP(dip)->commit_mutex);
 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
+		mutex_unlock(&JFS_IP(dip)->commit_mutex);
 		IWRITE_UNLOCK(ip);
 		goto out1;
 	}
@@ -527,8 +527,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 		if ((new_size = commitZeroLink(tid, ip)) < 0) {
 			txAbort(tid, 1);	/* Marks FS Dirty */
 			txEnd(tid);
-			mutex_unlock(&JFS_IP(dip)->commit_mutex);
 			mutex_unlock(&JFS_IP(ip)->commit_mutex);
+			mutex_unlock(&JFS_IP(dip)->commit_mutex);
 			IWRITE_UNLOCK(ip);
 			rc = new_size;
 			goto out1;
@@ -556,9 +556,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 
 	txEnd(tid);
 
-	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 	mutex_unlock(&JFS_IP(ip)->commit_mutex);
-
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 
 	while (new_size && (rc == 0)) {
 		tid = txBegin(dip->i_sb, 0);
@@ -847,8 +846,8 @@ static int jfs_link(struct dentry *old_dentry,
       out:
 	txEnd(tid);
 
-	mutex_unlock(&JFS_IP(dir)->commit_mutex);
 	mutex_unlock(&JFS_IP(ip)->commit_mutex);
+	mutex_unlock(&JFS_IP(dir)->commit_mutex);
 
 	jfs_info("jfs_link: rc:%d", rc);
 	return rc;
@@ -1037,8 +1036,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 
       out3:
 	txEnd(tid);
-	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 	mutex_unlock(&JFS_IP(ip)->commit_mutex);
+	mutex_unlock(&JFS_IP(dip)->commit_mutex);
 	if (rc) {
 		free_ea_wmap(ip);
 		ip->i_nlink = 0;
@@ -1160,10 +1159,11 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		if (S_ISDIR(new_ip->i_mode)) {
 			new_ip->i_nlink--;
 			if (new_ip->i_nlink) {
-				mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
-				mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
+				mutex_unlock(&JFS_IP(new_ip)->commit_mutex);
 				if (old_dir != new_dir)
 					mutex_unlock(&JFS_IP(old_dir)->commit_mutex);
+				mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
+				mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
 				if (!S_ISDIR(old_ip->i_mode) && new_ip)
 					IWRITE_UNLOCK(new_ip);
 				jfs_error(new_ip->i_sb,
@@ -1281,13 +1281,12 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
       out4:
 	txEnd(tid);
-
-	mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
-	mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
-	if (old_dir != new_dir)
-		mutex_unlock(&JFS_IP(old_dir)->commit_mutex);
 	if (new_ip)
 		mutex_unlock(&JFS_IP(new_ip)->commit_mutex);
+	if (old_dir != new_dir)
+		mutex_unlock(&JFS_IP(old_dir)->commit_mutex);
+	mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
+	mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
 
 	while (new_size && (rc == 0)) {
 		tid = txBegin(new_ip->i_sb, 0);
-- 
cgit v1.2.2


From 8ba10ab128e88bfbe58f7164543827ef3c3a2c88 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Sat, 8 Jul 2006 02:17:40 +0000
Subject: [CIFS] CIFS_DEBUG2 depends on CIFS

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 00aa3d5c5a..7db05742a9 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1791,6 +1791,7 @@ config CIFS_POSIX
 
 config CIFS_DEBUG2
 	bool "Enable additional CIFS debugging routines"
+	depends on CIFS
 	help
 	   Enabling this option adds a few more debugging routines
 	   to the cifs code which slightly increases the size of
-- 
cgit v1.2.2


From aadd06e5c56b9ff5117ec77e59eada43dc46e2fc Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Mon, 10 Jul 2006 11:00:01 +0200
Subject: [PATCH] splice: fix problems with sys_tee()

Several issues noticed/fixed:

- We cannot reliably block in link_pipe() while holding both input and output
  mutexes. So do preparatory checks before locking down both mutexes and doing
  the link.

- The ipipe->nrbufs vs i check was bad, because we could have dropped the
  ipipe lock in-between. This causes us to potentially look at unknown
  buffers if we were racing with someone else reading this pipe.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 fs/splice.c | 238 +++++++++++++++++++++++++++++++++---------------------------
 1 file changed, 133 insertions(+), 105 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index 05fd2787be..684bca3d3a 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1306,6 +1306,85 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
 	return error;
 }
 
+/*
+ * Make sure there's data to read. Wait for input if we can, otherwise
+ * return an appropriate error.
+ */
+static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
+{
+	int ret;
+
+	/*
+	 * Check ->nrbufs without the inode lock first. This function
+	 * is speculative anyways, so missing one is ok.
+	 */
+	if (pipe->nrbufs)
+		return 0;
+
+	ret = 0;
+	mutex_lock(&pipe->inode->i_mutex);
+
+	while (!pipe->nrbufs) {
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+		if (!pipe->writers)
+			break;
+		if (!pipe->waiting_writers) {
+			if (flags & SPLICE_F_NONBLOCK) {
+				ret = -EAGAIN;
+				break;
+			}
+		}
+		pipe_wait(pipe);
+	}
+
+	mutex_unlock(&pipe->inode->i_mutex);
+	return ret;
+}
+
+/*
+ * Make sure there's writeable room. Wait for room if we can, otherwise
+ * return an appropriate error.
+ */
+static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
+{
+	int ret;
+
+	/*
+	 * Check ->nrbufs without the inode lock first. This function
+	 * is speculative anyways, so missing one is ok.
+	 */
+	if (pipe->nrbufs < PIPE_BUFFERS)
+		return 0;
+
+	ret = 0;
+	mutex_lock(&pipe->inode->i_mutex);
+
+	while (pipe->nrbufs >= PIPE_BUFFERS) {
+		if (!pipe->readers) {
+			send_sig(SIGPIPE, current, 0);
+			ret = -EPIPE;
+			break;
+		}
+		if (flags & SPLICE_F_NONBLOCK) {
+			ret = -EAGAIN;
+			break;
+		}
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+		pipe->waiting_writers++;
+		pipe_wait(pipe);
+		pipe->waiting_writers--;
+	}
+
+	mutex_unlock(&pipe->inode->i_mutex);
+	return ret;
+}
+
 /*
  * Link contents of ipipe to opipe.
  */
@@ -1314,9 +1393,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 		     size_t len, unsigned int flags)
 {
 	struct pipe_buffer *ibuf, *obuf;
-	int ret, do_wakeup, i, ipipe_first;
-
-	ret = do_wakeup = ipipe_first = 0;
+	int ret = 0, i = 0, nbuf;
 
 	/*
 	 * Potential ABBA deadlock, work around it by ordering lock
@@ -1324,126 +1401,62 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 	 * could deadlock (one doing tee from A -> B, the other from B -> A).
 	 */
 	if (ipipe->inode < opipe->inode) {
-		ipipe_first = 1;
-		mutex_lock(&ipipe->inode->i_mutex);
-		mutex_lock(&opipe->inode->i_mutex);
+		mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT);
+		mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD);
 	} else {
-		mutex_lock(&opipe->inode->i_mutex);
-		mutex_lock(&ipipe->inode->i_mutex);
+		mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT);
+		mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD);
 	}
 
-	for (i = 0;; i++) {
+	do {
 		if (!opipe->readers) {
 			send_sig(SIGPIPE, current, 0);
 			if (!ret)
 				ret = -EPIPE;
 			break;
 		}
-		if (ipipe->nrbufs - i) {
-			ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
 
-			/*
-			 * If we have room, fill this buffer
-			 */
-			if (opipe->nrbufs < PIPE_BUFFERS) {
-				int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
-
-				/*
-				 * Get a reference to this pipe buffer,
-				 * so we can copy the contents over.
-				 */
-				ibuf->ops->get(ipipe, ibuf);
-
-				obuf = opipe->bufs + nbuf;
-				*obuf = *ibuf;
-
-				/*
-				 * Don't inherit the gift flag, we need to
-				 * prevent multiple steals of this page.
-				 */
-				obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
-
-				if (obuf->len > len)
-					obuf->len = len;
-
-				opipe->nrbufs++;
-				do_wakeup = 1;
-				ret += obuf->len;
-				len -= obuf->len;
-
-				if (!len)
-					break;
-				if (opipe->nrbufs < PIPE_BUFFERS)
-					continue;
-			}
-
-			/*
-			 * We have input available, but no output room.
-			 * If we already copied data, return that. If we
-			 * need to drop the opipe lock, it must be ordered
-			 * last to avoid deadlocks.
-			 */
-			if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) {
-				if (!ret)
-					ret = -EAGAIN;
-				break;
-			}
-			if (signal_pending(current)) {
-				if (!ret)
-					ret = -ERESTARTSYS;
-				break;
-			}
-			if (do_wakeup) {
-				smp_mb();
-				if (waitqueue_active(&opipe->wait))
-					wake_up_interruptible(&opipe->wait);
-				kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
-				do_wakeup = 0;
-			}
+		/*
+		 * If we have iterated all input buffers or ran out of
+		 * output room, break.
+		 */
+		if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS)
+			break;
 
-			opipe->waiting_writers++;
-			pipe_wait(opipe);
-			opipe->waiting_writers--;
-			continue;
-		}
+		ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
+		nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
 
 		/*
-		 * No input buffers, do the usual checks for available
-		 * writers and blocking and wait if necessary
+		 * Get a reference to this pipe buffer,
+		 * so we can copy the contents over.
 		 */
-		if (!ipipe->writers)
-			break;
-		if (!ipipe->waiting_writers) {
-			if (ret)
-				break;
-		}
+		ibuf->ops->get(ipipe, ibuf);
+
+		obuf = opipe->bufs + nbuf;
+		*obuf = *ibuf;
+
 		/*
-		 * pipe_wait() drops the ipipe mutex. To avoid deadlocks
-		 * with another process, we can only safely do that if
-		 * the ipipe lock is ordered last.
+		 * Don't inherit the gift flag, we need to
+		 * prevent multiple steals of this page.
 		 */
-		if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) {
-			if (!ret)
-				ret = -EAGAIN;
-			break;
-		}
-		if (signal_pending(current)) {
-			if (!ret)
-				ret = -ERESTARTSYS;
-			break;
-		}
+		obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
 
-		if (waitqueue_active(&ipipe->wait))
-			wake_up_interruptible_sync(&ipipe->wait);
-		kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT);
+		if (obuf->len > len)
+			obuf->len = len;
 
-		pipe_wait(ipipe);
-	}
+		opipe->nrbufs++;
+		ret += obuf->len;
+		len -= obuf->len;
+		i++;
+	} while (len);
 
 	mutex_unlock(&ipipe->inode->i_mutex);
 	mutex_unlock(&opipe->inode->i_mutex);
 
-	if (do_wakeup) {
+	/*
+	 * If we put data in the output pipe, wakeup any potential readers.
+	 */
+	if (ret > 0) {
 		smp_mb();
 		if (waitqueue_active(&opipe->wait))
 			wake_up_interruptible(&opipe->wait);
@@ -1464,14 +1477,29 @@ static long do_tee(struct file *in, struct file *out, size_t len,
 {
 	struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
 	struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
+	int ret = -EINVAL;
 
 	/*
-	 * Link ipipe to the two output pipes, consuming as we go along.
+	 * Duplicate the contents of ipipe to opipe without actually
+	 * copying the data.
 	 */
-	if (ipipe && opipe)
-		return link_pipe(ipipe, opipe, len, flags);
+	if (ipipe && opipe && ipipe != opipe) {
+		/*
+		 * Keep going, unless we encounter an error. The ipipe/opipe
+		 * ordering doesn't really matter.
+		 */
+		ret = link_ipipe_prep(ipipe, flags);
+		if (!ret) {
+			ret = link_opipe_prep(opipe, flags);
+			if (!ret) {
+				ret = link_pipe(ipipe, opipe, len, flags);
+				if (!ret && (flags & SPLICE_F_NONBLOCK))
+					ret = -EAGAIN;
+			}
+		}
+	}
 
-	return -EINVAL;
+	return ret;
 }
 
 asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)
-- 
cgit v1.2.2


From 73ce5934e2d855db436566297f12966eb507a435 Mon Sep 17 00:00:00 2001
From: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Date: Mon, 10 Jul 2006 04:43:56 -0700
Subject: [PATCH] reiserfs: fix journaling issue regarding fsync()

When write() extends a file(i_size is increased) and fsync() is called,
change of inode must be written to journaling area through fsync().
But,currently the i_trans_id is not correctly updated when i_size is
increased.  So fsync() does not kick the journal writer.

Reiserfs_file_write() already updates the transaction when blocks are
allocated, but the case when i_size increases and new blocks are not added
is not correctly treated.

Following patch fix this bug.

Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Chris Mason <mason@suse.com>
Cc: Hans Reiser <reiser@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/file.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 752cea12e3..f318b58510 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -860,8 +860,12 @@ static int reiserfs_submit_file_region_for_write(struct reiserfs_transaction_han
 			// this sets the proper flags for O_SYNC to trigger a commit
 			mark_inode_dirty(inode);
 			reiserfs_write_unlock(inode->i_sb);
-		} else
+		} else {
+			reiserfs_write_lock(inode->i_sb);
+			reiserfs_update_inode_transaction(inode);
 			mark_inode_dirty(inode);
+			reiserfs_write_unlock(inode->i_sb);
+		}
 
 		sd_update = 1;
 	}
-- 
cgit v1.2.2


From 25e206b54b9a20e63b6f5194aeebfa13d37e015c Mon Sep 17 00:00:00 2001
From: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Date: Mon, 10 Jul 2006 04:44:00 -0700
Subject: [PATCH] partitions: let partitions inherit policy from disk

Change the partition code in fs/partitions/check.c to initialize a newly
detected partition's policy field with that of the containing block device
(see patch below).

My reasoning is that function set_disk_ro() in block/genhd.c modifies the
policy field (read-only indicator) of a disk and all contained partitions.
When a partition is detected after the call to set_disk_ro(), the policy
field of this partition will currently not inherit the disk's policy field.
 This behavior poses a problem in cases where a block device can be
'logically de- and reactivated' like e.g.  the s390 DASD driver because
partition detection may run after the policy field has been modified.

Signed-off-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Acked-by: Al Viro <viro@ftp.linux.org.uk>
Makes-sense-to: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/check.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 839634026e..51c6a748df 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -339,6 +339,7 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
 	p->start_sect = start;
 	p->nr_sects = len;
 	p->partno = part;
+	p->policy = disk->policy;
 
 	if (isdigit(disk->kobj.name[strlen(disk->kobj.name)-1]))
 		snprintf(p->kobj.name,KOBJ_NAME_LEN,"%sp%d",disk->kobj.name,part);
-- 
cgit v1.2.2


From 69c3a5b8fd8cfa67be22f6d7ae5c681c6777d817 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 10 Jul 2006 04:44:23 -0700
Subject: [PATCH] fs/read_write.c: EXPORT_UNUSED_SYMBOL

This patch marks an unused export as EXPORT_UNUSED_SYMBOL.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/read_write.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/read_write.c b/fs/read_write.c
index 5bc0e9234f..d4cb3183c9 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -436,7 +436,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 	return seg;
 }
 
-EXPORT_SYMBOL(iov_shorten);
+EXPORT_UNUSED_SYMBOL(iov_shorten);  /*  June 2006  */
 
 /* A write operation does a read from user space and vice versa */
 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
-- 
cgit v1.2.2


From b6174df5eec9cdfd598c03d6d0807e344e109213 Mon Sep 17 00:00:00 2001
From: "Zhang, Yanmin" <yanmin.zhang@intel.com>
Date: Mon, 10 Jul 2006 04:44:49 -0700
Subject: [PATCH] mmap zero-length hugetlb file with PROT_NONE to protect a
 hugetlb virtual area

Sometimes, applications need below call to be successful although
"/mnt/hugepages/file1" doesn't exist.

fd = open("/mnt/hugepages/file1", O_CREAT|O_RDWR, 0755);
*addr = mmap(NULL, 0x1024*1024*256, PROT_NONE, 0, fd, 0);

As for regular pages (or files), above call does work, but as for huge
pages, above call would fail because hugetlbfs_file_mmap would fail if
(!(vma->vm_flags & VM_WRITE) && len > inode->i_size).

This capability on huge page is useful on ia64 when the process wants to
protect one area on region 4, so other threads couldn't read/write this
area.  A famous JVM (Java Virtual Machine) implementation on IA64 needs the
capability.

Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Hugh Dickins <hugh@veritas.com>
[ Expand-on-mmap semantics again... this time matching normal fs's. wli ]
Acked-by: William Lee Irwin III <wli@holomorphy.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hugetlbfs/inode.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 6449cb6979..c3920c96da 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -83,8 +83,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 
 	ret = -ENOMEM;
 	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
-	if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size)
-		goto out;
 
 	if (vma->vm_flags & VM_MAYSHARE &&
 	    hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT),
@@ -93,7 +91,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 
 	ret = 0;
 	hugetlb_prefault_arch_hook(vma->vm_mm);
-	if (inode->i_size < len)
+	if (vma->vm_flags & VM_WRITE && inode->i_size < len)
 		inode->i_size = len;
 out:
 	mutex_unlock(&inode->i_mutex);
-- 
cgit v1.2.2


From 1aeb21d626327ee909fef03f72aea6e8a60e6c0c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 10 Jul 2006 04:44:50 -0700
Subject: [PATCH] FDPIC: Fix FDPIC compile errors

Fix FDPIC compile errors.

(akpm: we suspect it fixes a warning)

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf_fdpic.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index eba4e23b9c..07624b95ae 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -459,6 +459,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 	 */
 	hwcap = ELF_HWCAP;
 	k_platform = ELF_PLATFORM;
+	u_platform = NULL;
 
 	if (k_platform) {
 		platform_len = strlen(k_platform) + 1;
-- 
cgit v1.2.2


From 21ff821630c0e64f5d2fab96ced72000d77fa90b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 10 Jul 2006 04:44:52 -0700
Subject: [PATCH] NOMMU: Fix execution off of ramfs with mmap()

Fix execution through the FDPIC binfmt of programs stored on ramfs by
preventing the ramfs mmap() returning successfully on a private mapping of
a ramfs file.  This causes NOMMU mmap to make a copy of the mapped portion
of the file and map that instead.

This could be improved by granting direct mapping access to read-only
private mappings for which the data is stored on a contiguous run of pages.
 However, this is only likely to be the case if the file was extended with
truncate before being written.

ramfs is left to map the file directly for shared mappings so that SYSV IPC
and POSIX shared memory both still work.

Signed-off-by: David Howells <dhowells@redhat.com>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ramfs/file-nommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 99fffc9e1b..677139b48e 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -283,9 +283,9 @@ unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
 
 /*****************************************************************************/
 /*
- * set up a mapping
+ * set up a mapping for shared memory segments
  */
 int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	return 0;
+	return vma->vm_flags & VM_SHARED ? 0 : -ENOSYS;
 }
-- 
cgit v1.2.2


From 8a2ab7f5df76b920d62b908919d987d3b8a82856 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 10 Jul 2006 04:44:53 -0700
Subject: [PATCH] FDPIC: Adjust the ELF-FDPIC driver to conform more to the
 CodingStyle

Adjust the ELF-FDPIC binfmt driver to conform much more to the CodingStyle,
silly though it may be.

Further changes:

 (*) Drop the casts to long for addresses in kdebug() statements (they're
     unsigned long already).

 (*) Use extra variables to avoid expressions longer than 80 chars by splitting
     the statement into multiple statements and letting the compiler optimise
     them back together.

 (*) Eliminate duplicate call of ksize() when working out how much space was
     actually allocated for the stack.

 (*) Discard the commented-out load_shlib prototype and op pointer as this will
     not be supported in ELF-FDPIC for the foreseeable future.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf_fdpic.c | 305 +++++++++++++++++++++++++++-----------------------
 1 file changed, 168 insertions(+), 137 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 07624b95ae..a4ff873898 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1,6 +1,6 @@
 /* binfmt_elf_fdpic.c: FDPIC ELF binary format
  *
- * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2003, 2004, 2006 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  * Derived from binfmt_elf.c
  *
@@ -50,43 +50,45 @@ typedef char *elf_caddr_t;
 
 MODULE_LICENSE("GPL");
 
-static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs);
-//static int load_elf_fdpic_library(struct file *);
-static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct file *file);
-static int elf_fdpic_map_file(struct elf_fdpic_params *params,
-			      struct file *file,
-			      struct mm_struct *mm,
-			      const char *what);
+static int load_elf_fdpic_binary(struct linux_binprm *, struct pt_regs *);
+static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *, struct file *);
+static int elf_fdpic_map_file(struct elf_fdpic_params *, struct file *,
+			      struct mm_struct *, const char *);
 
-static int create_elf_fdpic_tables(struct linux_binprm *bprm,
-				   struct mm_struct *mm,
-				   struct elf_fdpic_params *exec_params,
-				   struct elf_fdpic_params *interp_params);
+static int create_elf_fdpic_tables(struct linux_binprm *, struct mm_struct *,
+				   struct elf_fdpic_params *,
+				   struct elf_fdpic_params *);
 
 #ifndef CONFIG_MMU
-static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm, unsigned long *_sp);
-static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *params,
-						   struct file *file,
-						   struct mm_struct *mm);
+static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *,
+					    unsigned long *);
+static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *,
+						   struct file *,
+						   struct mm_struct *);
 #endif
 
-static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
-					     struct file *file,
-					     struct mm_struct *mm);
+static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
+					     struct file *, struct mm_struct *);
 
 static struct linux_binfmt elf_fdpic_format = {
 	.module		= THIS_MODULE,
 	.load_binary	= load_elf_fdpic_binary,
-//	.load_shlib	= load_elf_fdpic_library,
 //	.core_dump	= elf_fdpic_core_dump,
 	.min_coredump	= ELF_EXEC_PAGESIZE,
 };
 
-static int __init init_elf_fdpic_binfmt(void)  { return register_binfmt(&elf_fdpic_format); }
-static void __exit exit_elf_fdpic_binfmt(void) { unregister_binfmt(&elf_fdpic_format); }
+static int __init init_elf_fdpic_binfmt(void)
+{
+	return register_binfmt(&elf_fdpic_format);
+}
 
-module_init(init_elf_fdpic_binfmt)
-module_exit(exit_elf_fdpic_binfmt)
+static void __exit exit_elf_fdpic_binfmt(void)
+{
+	unregister_binfmt(&elf_fdpic_format);
+}
+
+module_init(init_elf_fdpic_binfmt);
+module_exit(exit_elf_fdpic_binfmt);
 
 static int is_elf_fdpic(struct elfhdr *hdr, struct file *file)
 {
@@ -105,7 +107,8 @@ static int is_elf_fdpic(struct elfhdr *hdr, struct file *file)
 /*
  * read the program headers table into memory
  */
-static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct file *file)
+static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params,
+				 struct file *file)
 {
 	struct elf32_phdr *phdr;
 	unsigned long size;
@@ -121,7 +124,8 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct file *f
 	if (!params->phdrs)
 		return -ENOMEM;
 
-	retval = kernel_read(file, params->hdr.e_phoff, (char *) params->phdrs, size);
+	retval = kernel_read(file, params->hdr.e_phoff,
+			     (char *) params->phdrs, size);
 	if (retval < 0)
 		return retval;
 
@@ -141,17 +145,24 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct file *f
 	}
 
 	return 0;
-} /* end elf_fdpic_fetch_phdrs() */
+}
 
 /*****************************************************************************/
 /*
  * load an fdpic binary into various bits of memory
  */
-static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+static int load_elf_fdpic_binary(struct linux_binprm *bprm,
+				 struct pt_regs *regs)
 {
 	struct elf_fdpic_params exec_params, interp_params;
 	struct elf_phdr *phdr;
-	unsigned long stack_size;
+	unsigned long stack_size, entryaddr;
+#ifndef CONFIG_MMU
+	unsigned long fullsize;
+#endif
+#ifdef ELF_FDPIC_PLAT_INIT
+	unsigned long dynaddr;
+#endif
 	struct file *interpreter = NULL; /* to shut gcc up */
 	char *interpreter_name = NULL;
 	int executable_stack;
@@ -212,7 +223,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
 				goto error;
 			}
 
-			retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE);
+			retval = kernel_read(interpreter, 0, bprm->buf,
+					     BINPRM_BUF_SIZE);
 			if (retval < 0)
 				goto error;
 
@@ -295,7 +307,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
 				  &current->mm->start_stack,
 				  &current->mm->start_brk);
 
-	retval = setup_arg_pages(bprm, current->mm->start_stack, executable_stack);
+	retval = setup_arg_pages(bprm, current->mm->start_stack,
+				 executable_stack);
 	if (retval < 0) {
 		send_sig(SIGKILL, current, 0);
 		goto error_kill;
@@ -303,7 +316,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
 #endif
 
 	/* load the executable and interpreter into memory */
-	retval = elf_fdpic_map_file(&exec_params, bprm->file, current->mm, "executable");
+	retval = elf_fdpic_map_file(&exec_params, bprm->file, current->mm,
+				    "executable");
 	if (retval < 0)
 		goto error_kill;
 
@@ -324,7 +338,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
 	if (!current->mm->start_brk)
 		current->mm->start_brk = current->mm->end_data;
 
-	current->mm->brk = current->mm->start_brk = PAGE_ALIGN(current->mm->start_brk);
+	current->mm->brk = current->mm->start_brk =
+		PAGE_ALIGN(current->mm->start_brk);
 
 #else
 	/* create a stack and brk area big enough for everyone
@@ -336,47 +351,45 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
 		stack_size = PAGE_SIZE * 2;
 
 	down_write(&current->mm->mmap_sem);
-	current->mm->start_brk = do_mmap(NULL,
-					 0,
-					 stack_size,
+	current->mm->start_brk = do_mmap(NULL, 0, stack_size,
 					 PROT_READ | PROT_WRITE | PROT_EXEC,
 					 MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN,
 					 0);
 
-	if (IS_ERR((void *) current->mm->start_brk)) {
+	if (IS_ERR_VALUE(current->mm->start_brk)) {
 		up_write(&current->mm->mmap_sem);
 		retval = current->mm->start_brk;
 		current->mm->start_brk = 0;
 		goto error_kill;
 	}
 
-	if (do_mremap(current->mm->start_brk,
-		      stack_size,
-		      ksize((char *) current->mm->start_brk),
-		      0, 0
-		      ) == current->mm->start_brk
-	    )
-		stack_size = ksize((char *) current->mm->start_brk);
+	/* expand the stack mapping to use up the entire allocation granule */
+	fullsize = ksize((char *) current->mm->start_brk);
+	if (!IS_ERR_VALUE(do_mremap(current->mm->start_brk, stack_size,
+				    fullsize, 0, 0)))
+		stack_size = fullsize;
 	up_write(&current->mm->mmap_sem);
 
 	current->mm->brk = current->mm->start_brk;
 	current->mm->context.end_brk = current->mm->start_brk;
-	current->mm->context.end_brk += (stack_size > PAGE_SIZE) ? (stack_size - PAGE_SIZE) : 0;
+	current->mm->context.end_brk +=
+		(stack_size > PAGE_SIZE) ? (stack_size - PAGE_SIZE) : 0;
 	current->mm->start_stack = current->mm->start_brk + stack_size;
 #endif
 
 	compute_creds(bprm);
 	current->flags &= ~PF_FORKNOEXEC;
-	if (create_elf_fdpic_tables(bprm, current->mm, &exec_params, &interp_params) < 0)
+	if (create_elf_fdpic_tables(bprm, current->mm,
+				    &exec_params, &interp_params) < 0)
 		goto error_kill;
 
-	kdebug("- start_code  %lx",	(long) current->mm->start_code);
-	kdebug("- end_code    %lx",	(long) current->mm->end_code);
-	kdebug("- start_data  %lx",	(long) current->mm->start_data);
-	kdebug("- end_data    %lx",	(long) current->mm->end_data);
-	kdebug("- start_brk   %lx",	(long) current->mm->start_brk);
-	kdebug("- brk         %lx",	(long) current->mm->brk);
-	kdebug("- start_stack %lx",	(long) current->mm->start_stack);
+	kdebug("- start_code  %lx", current->mm->start_code);
+	kdebug("- end_code    %lx", current->mm->end_code);
+	kdebug("- start_data  %lx", current->mm->start_data);
+	kdebug("- end_data    %lx", current->mm->end_data);
+	kdebug("- start_brk   %lx", current->mm->start_brk);
+	kdebug("- brk         %lx", current->mm->brk);
+	kdebug("- start_stack %lx", current->mm->start_stack);
 
 #ifdef ELF_FDPIC_PLAT_INIT
 	/*
@@ -385,21 +398,18 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
 	 * example.  This macro performs whatever initialization to
 	 * the regs structure is required.
 	 */
-	ELF_FDPIC_PLAT_INIT(regs,
-			    exec_params.map_addr,
-			    interp_params.map_addr,
-			    interp_params.dynamic_addr ?: exec_params.dynamic_addr
-			    );
+	dynaddr = interp_params.dynamic_addr ?: exec_params.dynamic_addr;
+	ELF_FDPIC_PLAT_INIT(regs, exec_params.map_addr, interp_params.map_addr,
+			    dynaddr);
 #endif
 
 	/* everything is now ready... get the userspace context ready to roll */
-	start_thread(regs,
-		     interp_params.entry_addr ?: exec_params.entry_addr,
-		     current->mm->start_stack);
+	entryaddr = interp_params.entry_addr ?: exec_params.entry_addr;
+	start_thread(regs, entryaddr, current->mm->start_stack);
 
 	if (unlikely(current->ptrace & PT_PTRACED)) {
 		if (current->ptrace & PT_TRACE_EXEC)
-			ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
+			ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
 		else
 			send_sig(SIGTRAP, current, 0);
 	}
@@ -419,11 +429,11 @@ error:
 	return retval;
 
 	/* unrecoverable error - kill the process */
- error_kill:
+error_kill:
 	send_sig(SIGSEGV, current, 0);
 	goto error;
 
-} /* end load_elf_fdpic_binary() */
+}
 
 /*****************************************************************************/
 /*
@@ -471,11 +481,11 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 
 #if defined(__i386__) && defined(CONFIG_SMP)
 	/* in some cases (e.g. Hyper-Threading), we want to avoid L1 evictions
-	 * by the processes running on the same package. One thing we can do
-	 * is to shuffle the initial stack for them.
+	 * by the processes running on the same package. One thing we can do is
+	 * to shuffle the initial stack for them.
 	 *
-	 * the conditionals here are unneeded, but kept in to make the
-	 * code behaviour the same as pre change unless we have hyperthreaded
+	 * the conditionals here are unneeded, but kept in to make the code
+	 * behaviour the same as pre change unless we have hyperthreaded
 	 * processors. This keeps Mr Marcelo Person happier but should be
 	 * removed for 2.5
 	 */
@@ -498,11 +508,13 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 
 	if (interp_params->loadmap) {
 		len = sizeof(struct elf32_fdpic_loadmap);
-		len += sizeof(struct elf32_fdpic_loadseg) * interp_params->loadmap->nsegs;
+		len += sizeof(struct elf32_fdpic_loadseg) *
+			interp_params->loadmap->nsegs;
 		sp = (sp - len) & ~7UL;
 		interp_params->map_addr = sp;
 
-		if (copy_to_user((void __user *) sp, interp_params->loadmap, len) != 0)
+		if (copy_to_user((void __user *) sp, interp_params->loadmap,
+				 len) != 0)
 			return -EFAULT;
 
 		current->mm->context.interp_fdpic_loadmap = (unsigned long) sp;
@@ -526,34 +538,37 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 	sp -= sp & 15UL;
 
 	/* put the ELF interpreter info on the stack */
-#define NEW_AUX_ENT(nr, id, val)						\
-	do {									\
-		struct { unsigned long _id, _val; } __user *ent = (void __user *) csp;	\
-		__put_user((id), &ent[nr]._id);					\
-		__put_user((val), &ent[nr]._val);				\
+#define NEW_AUX_ENT(nr, id, val)					\
+	do {								\
+		struct { unsigned long _id, _val; } __user *ent;	\
+									\
+		ent = (void __user *) csp;				\
+		__put_user((id), &ent[nr]._id);				\
+		__put_user((val), &ent[nr]._val);			\
 	} while (0)
 
 	csp -= 2 * sizeof(unsigned long);
 	NEW_AUX_ENT(0, AT_NULL, 0);
 	if (k_platform) {
 		csp -= 2 * sizeof(unsigned long);
-		NEW_AUX_ENT(0, AT_PLATFORM, (elf_addr_t)(unsigned long) u_platform);
+		NEW_AUX_ENT(0, AT_PLATFORM,
+			    (elf_addr_t) (unsigned long) u_platform);
 	}
 
 	csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long);
-	NEW_AUX_ENT( 0, AT_HWCAP,		hwcap);
-	NEW_AUX_ENT( 1, AT_PAGESZ,		PAGE_SIZE);
-	NEW_AUX_ENT( 2, AT_CLKTCK,		CLOCKS_PER_SEC);
-	NEW_AUX_ENT( 3, AT_PHDR,		exec_params->ph_addr);
-	NEW_AUX_ENT( 4, AT_PHENT,		sizeof(struct elf_phdr));
-	NEW_AUX_ENT( 5, AT_PHNUM,		exec_params->hdr.e_phnum);
-	NEW_AUX_ENT( 6,	AT_BASE,		interp_params->elfhdr_addr);
-	NEW_AUX_ENT( 7, AT_FLAGS,		0);
-	NEW_AUX_ENT( 8, AT_ENTRY,		exec_params->entry_addr);
-	NEW_AUX_ENT( 9, AT_UID,			(elf_addr_t) current->uid);
-	NEW_AUX_ENT(10, AT_EUID,		(elf_addr_t) current->euid);
-	NEW_AUX_ENT(11, AT_GID,			(elf_addr_t) current->gid);
-	NEW_AUX_ENT(12, AT_EGID,		(elf_addr_t) current->egid);
+	NEW_AUX_ENT( 0, AT_HWCAP,	hwcap);
+	NEW_AUX_ENT( 1, AT_PAGESZ,	PAGE_SIZE);
+	NEW_AUX_ENT( 2, AT_CLKTCK,	CLOCKS_PER_SEC);
+	NEW_AUX_ENT( 3, AT_PHDR,	exec_params->ph_addr);
+	NEW_AUX_ENT( 4, AT_PHENT,	sizeof(struct elf_phdr));
+	NEW_AUX_ENT( 5, AT_PHNUM,	exec_params->hdr.e_phnum);
+	NEW_AUX_ENT( 6,	AT_BASE,	interp_params->elfhdr_addr);
+	NEW_AUX_ENT( 7, AT_FLAGS,	0);
+	NEW_AUX_ENT( 8, AT_ENTRY,	exec_params->entry_addr);
+	NEW_AUX_ENT( 9, AT_UID,		(elf_addr_t) current->uid);
+	NEW_AUX_ENT(10, AT_EUID,	(elf_addr_t) current->euid);
+	NEW_AUX_ENT(11, AT_GID,		(elf_addr_t) current->gid);
+	NEW_AUX_ENT(12, AT_EGID,	(elf_addr_t) current->egid);
 
 #ifdef ARCH_DLINFO
 	/* ARCH_DLINFO must come last so platform specific code can enforce
@@ -579,7 +594,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 #ifdef CONFIG_MMU
 	current->mm->arg_start = bprm->p;
 #else
-	current->mm->arg_start = current->mm->start_stack - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p);
+	current->mm->arg_start = current->mm->start_stack -
+		(MAX_ARG_PAGES * PAGE_SIZE - bprm->p);
 #endif
 
 	p = (char __user *) current->mm->arg_start;
@@ -607,7 +623,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 
 	mm->start_stack = (unsigned long) sp;
 	return 0;
-} /* end create_elf_fdpic_tables() */
+}
 
 /*****************************************************************************/
 /*
@@ -615,7 +631,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
  * the stack
  */
 #ifndef CONFIG_MMU
-static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm, unsigned long *_sp)
+static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm,
+					    unsigned long *_sp)
 {
 	unsigned long index, stop, sp;
 	char *src;
@@ -636,9 +653,9 @@ static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm, unsigned
 
 	*_sp = (*_sp - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p)) & ~15;
 
- out:
+out:
 	return ret;
-} /* end elf_fdpic_transfer_args_to_stack() */
+}
 #endif
 
 /*****************************************************************************/
@@ -713,17 +730,18 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
 		seg = loadmap->segs;
 		for (loop = loadmap->nsegs; loop > 0; loop--, seg++) {
 			if (params->hdr.e_entry >= seg->p_vaddr &&
-			    params->hdr.e_entry < seg->p_vaddr + seg->p_memsz
-			    ) {
+			    params->hdr.e_entry < seg->p_vaddr + seg->p_memsz) {
 				params->entry_addr =
-					(params->hdr.e_entry - seg->p_vaddr) + seg->addr;
+					(params->hdr.e_entry - seg->p_vaddr) +
+					seg->addr;
 				break;
 			}
 		}
 	}
 
 	/* determine where the program header table has wound up if mapped */
-	stop = params->hdr.e_phoff + params->hdr.e_phnum * sizeof (struct elf_phdr);
+	stop = params->hdr.e_phoff;
+	stop += params->hdr.e_phnum * sizeof (struct elf_phdr);
 	phdr = params->phdrs;
 
 	for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) {
@@ -737,9 +755,11 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
 		seg = loadmap->segs;
 		for (loop = loadmap->nsegs; loop > 0; loop--, seg++) {
 			if (phdr->p_vaddr >= seg->p_vaddr &&
-			    phdr->p_vaddr + phdr->p_filesz <= seg->p_vaddr + seg->p_memsz
-			    ) {
-				params->ph_addr = (phdr->p_vaddr - seg->p_vaddr) + seg->addr +
+			    phdr->p_vaddr + phdr->p_filesz <=
+			    seg->p_vaddr + seg->p_memsz) {
+				params->ph_addr =
+					(phdr->p_vaddr - seg->p_vaddr) +
+					seg->addr +
 					params->hdr.e_phoff - phdr->p_offset;
 				break;
 			}
@@ -756,18 +776,22 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
 		seg = loadmap->segs;
 		for (loop = loadmap->nsegs; loop > 0; loop--, seg++) {
 			if (phdr->p_vaddr >= seg->p_vaddr &&
-			    phdr->p_vaddr + phdr->p_memsz <= seg->p_vaddr + seg->p_memsz
-			    ) {
-				params->dynamic_addr = (phdr->p_vaddr - seg->p_vaddr) + seg->addr;
-
-				/* check the dynamic section contains at least one item, and that
-				 * the last item is a NULL entry */
+			    phdr->p_vaddr + phdr->p_memsz <=
+			    seg->p_vaddr + seg->p_memsz) {
+				params->dynamic_addr =
+					(phdr->p_vaddr - seg->p_vaddr) +
+					seg->addr;
+
+				/* check the dynamic section contains at least
+				 * one item, and that the last item is a NULL
+				 * entry */
 				if (phdr->p_memsz == 0 ||
 				    phdr->p_memsz % sizeof(Elf32_Dyn) != 0)
 					goto dynamic_error;
 
 				tmp = phdr->p_memsz / sizeof(Elf32_Dyn);
-				if (((Elf32_Dyn *) params->dynamic_addr)[tmp - 1].d_tag != 0)
+				if (((Elf32_Dyn *)
+				     params->dynamic_addr)[tmp - 1].d_tag != 0)
 					goto dynamic_error;
 				break;
 			}
@@ -776,8 +800,8 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
 	}
 
 	/* now elide adjacent segments in the load map on MMU linux
-	 * - on uClinux the holes between may actually be filled with system stuff or stuff from
-	 *   other processes
+	 * - on uClinux the holes between may actually be filled with system
+	 *   stuff or stuff from other processes
 	 */
 #ifdef CONFIG_MMU
 	nloads = loadmap->nsegs;
@@ -788,7 +812,9 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
 		if (seg->p_vaddr - mseg->p_vaddr == seg->addr - mseg->addr) {
 			load_addr = PAGE_ALIGN(mseg->addr + mseg->p_memsz);
 			if (load_addr == (seg->addr & PAGE_MASK)) {
-				mseg->p_memsz += load_addr - (mseg->addr + mseg->p_memsz);
+				mseg->p_memsz +=
+					load_addr -
+					(mseg->addr + mseg->p_memsz);
 				mseg->p_memsz += seg->addr & ~PAGE_MASK;
 				mseg->p_memsz += seg->p_memsz;
 				loadmap->nsegs--;
@@ -816,20 +842,21 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
 
 	return 0;
 
- dynamic_error:
+dynamic_error:
 	printk("ELF FDPIC %s with invalid DYNAMIC section (inode=%lu)\n",
 	       what, file->f_dentry->d_inode->i_ino);
 	return -ELIBBAD;
-} /* end elf_fdpic_map_file() */
+}
 
 /*****************************************************************************/
 /*
  * map a file with constant displacement under uClinux
  */
 #ifndef CONFIG_MMU
-static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *params,
-						   struct file *file,
-						   struct mm_struct *mm)
+static int elf_fdpic_map_file_constdisp_on_uclinux(
+	struct elf_fdpic_params *params,
+	struct file *file,
+	struct mm_struct *mm)
 {
 	struct elf32_fdpic_loadseg *seg;
 	struct elf32_phdr *phdr;
@@ -840,7 +867,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para
 	load_addr = params->load_addr;
 	seg = params->loadmap->segs;
 
-	/* determine the bounds of the contiguous overall allocation we must make */
+	/* determine the bounds of the contiguous overall allocation we must
+	 * make */
 	phdr = params->phdrs;
 	for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) {
 		if (params->phdrs[loop].p_type != PT_LOAD)
@@ -861,7 +889,7 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para
 	maddr = do_mmap(NULL, load_addr, top - base,
 			PROT_READ | PROT_WRITE | PROT_EXEC, mflags, 0);
 	up_write(&mm->mmap_sem);
-	if (IS_ERR((void *) maddr))
+	if (IS_ERR_VALUE(maddr))
 		return (int) maddr;
 
 	if (load_addr != 0)
@@ -879,7 +907,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para
 		seg->p_vaddr = phdr->p_vaddr;
 		seg->p_memsz = phdr->p_memsz;
 
-		ret = file->f_op->read(file, (void *) seg->addr, phdr->p_filesz, &fpos);
+		ret = file->f_op->read(file, (void *) seg->addr,
+				       phdr->p_filesz, &fpos);
 		if (ret < 0)
 			return ret;
 
@@ -896,8 +925,7 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para
 			if (phdr->p_flags & PF_X) {
 				mm->start_code = seg->addr;
 				mm->end_code = seg->addr + phdr->p_memsz;
-			}
-			else if (!mm->start_data) {
+			} else if (!mm->start_data) {
 				mm->start_data = seg->addr;
 #ifndef CONFIG_MMU
 				mm->end_data = seg->addr + phdr->p_memsz;
@@ -914,7 +942,7 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para
 	}
 
 	return 0;
-} /* end elf_fdpic_map_file_constdisp_on_uclinux() */
+}
 #endif
 
 /*****************************************************************************/
@@ -975,14 +1003,14 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
 
 		case ELF_FDPIC_FLAG_CONSTDISP:
 			/* constant displacement
-			 * - can be mapped anywhere, but must be mapped as a unit
+			 * - can be mapped anywhere, but must be mapped as a
+			 *   unit
 			 */
 			if (!dvset) {
 				maddr = load_addr;
 				delta_vaddr = phdr->p_vaddr;
 				dvset = 1;
-			}
-			else {
+			} else {
 				maddr = load_addr + phdr->p_vaddr - delta_vaddr;
 				flags |= MAP_FIXED;
 			}
@@ -1006,13 +1034,14 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
 		up_write(&mm->mmap_sem);
 
 		kdebug("mmap[%d] <file> sz=%lx pr=%x fl=%x of=%lx --> %08lx",
-		       loop, phdr->p_memsz + disp, prot, flags, phdr->p_offset - disp,
-		       maddr);
+		       loop, phdr->p_memsz + disp, prot, flags,
+		       phdr->p_offset - disp, maddr);
 
-		if (IS_ERR((void *) maddr))
+		if (IS_ERR_VALUE(maddr))
 			return (int) maddr;
 
-		if ((params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) == ELF_FDPIC_FLAG_CONTIGUOUS)
+		if ((params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) ==
+		    ELF_FDPIC_FLAG_CONTIGUOUS)
 			load_addr += PAGE_ALIGN(phdr->p_memsz + disp);
 
 		seg->addr = maddr + disp;
@@ -1023,7 +1052,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
 		if (phdr->p_offset == 0)
 			params->elfhdr_addr = seg->addr;
 
-		/* clear the bit between beginning of mapping and beginning of PT_LOAD */
+		/* clear the bit between beginning of mapping and beginning of
+		 * PT_LOAD */
 		if (prot & PROT_WRITE && disp > 0) {
 			kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp);
 			clear_user((void __user *) maddr, disp);
@@ -1039,19 +1069,20 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
 		excess1 = PAGE_SIZE - ((maddr + phdr->p_filesz) & ~PAGE_MASK);
 
 #ifdef CONFIG_MMU
-
 		if (excess > excess1) {
 			unsigned long xaddr = maddr + phdr->p_filesz + excess1;
 			unsigned long xmaddr;
 
 			flags |= MAP_FIXED | MAP_ANONYMOUS;
 			down_write(&mm->mmap_sem);
-			xmaddr = do_mmap(NULL, xaddr, excess - excess1, prot, flags, 0);
+			xmaddr = do_mmap(NULL, xaddr, excess - excess1,
+					 prot, flags, 0);
 			up_write(&mm->mmap_sem);
 
 			kdebug("mmap[%d] <anon>"
 			       " ad=%lx sz=%lx pr=%x fl=%x of=0 --> %08lx",
-			       loop, xaddr, excess - excess1, prot, flags, xmaddr);
+			       loop, xaddr, excess - excess1, prot, flags,
+			       xmaddr);
 
 			if (xmaddr != xaddr)
 				return -ENOMEM;
@@ -1060,7 +1091,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
 		if (prot & PROT_WRITE && excess1 > 0) {
 			kdebug("clear[%d] ad=%lx sz=%lx",
 			       loop, maddr + phdr->p_filesz, excess1);
-			clear_user((void __user *) maddr + phdr->p_filesz, excess1);
+			clear_user((void __user *) maddr + phdr->p_filesz,
+				   excess1);
 		}
 
 #else
@@ -1075,8 +1107,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
 			if (phdr->p_flags & PF_X) {
 				mm->start_code = maddr;
 				mm->end_code = maddr + phdr->p_memsz;
-			}
-			else if (!mm->start_data) {
+			} else if (!mm->start_data) {
 				mm->start_data = maddr;
 				mm->end_data = maddr + phdr->p_memsz;
 			}
@@ -1086,4 +1117,4 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
 	}
 
 	return 0;
-} /* end elf_fdpic_map_file_by_direct_mmap() */
+}
-- 
cgit v1.2.2


From b4cac1a0227a6f84be0381cd350a3c8730a4a671 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 10 Jul 2006 04:44:54 -0700
Subject: [PATCH] FDPIC: Move roundup() into linux/kernel.h

Move the roundup() macro from binfmt_elf.c into linux/kernel.h as it's
generally useful.

[akpm@osdl.org: nuke all the other implementations]
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c              | 2 --
 fs/proc/kcore.c              | 2 --
 fs/xfs/linux-2.6/xfs_linux.h | 1 -
 3 files changed, 5 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f42e64210e..672a3b90bc 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1185,8 +1185,6 @@ static int maydump(struct vm_area_struct *vma)
 	return 1;
 }
 
-#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
-
 /* An ELF note in memory */
 struct memelfnote
 {
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 036d14d836..8d6d85d740 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -42,8 +42,6 @@ const struct file_operations proc_kcore_operations = {
 #define	kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
 #endif
 
-#define roundup(x, y)  ((((x)+((y)-1))/(y))*(y))
-
 /* An ELF note in memory */
 struct memelfnote
 {
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 8c021dc57d..a13f75c1a9 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -215,7 +215,6 @@ BUFFER_FNS(PrivateStart, unwritten);
 #define MIN(a,b)	(min(a,b))
 #define MAX(a,b)	(max(a,b))
 #define howmany(x, y)	(((x)+((y)-1))/(y))
-#define roundup(x, y)	((((x)+((y)-1))/(y))*(y))
 
 /*
  * Various platform dependent calls that don't fit anywhere else
-- 
cgit v1.2.2


From 6d8c4e3b0150ff537902477ed62f8a8e9e70007b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 10 Jul 2006 04:44:55 -0700
Subject: [PATCH] FDPIC: Add coredump capability for the ELF-FDPIC binfmt

Add coredump capability for the ELF-FDPIC binfmt.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf_fdpic.c | 676 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 674 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index a4ff873898..2f33658292 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -24,7 +24,9 @@
 #include <linux/file.h>
 #include <linux/fcntl.h>
 #include <linux/slab.h>
+#include <linux/pagemap.h>
 #include <linux/highmem.h>
+#include <linux/highuid.h>
 #include <linux/personality.h>
 #include <linux/ptrace.h>
 #include <linux/init.h>
@@ -48,6 +50,12 @@ typedef char *elf_caddr_t;
 #define kdebug(fmt, ...) do {} while(0)
 #endif
 
+#if 0
+#define kdcore(fmt, ...) printk("FDPIC "fmt"\n" ,##__VA_ARGS__ )
+#else
+#define kdcore(fmt, ...) do {} while(0)
+#endif
+
 MODULE_LICENSE("GPL");
 
 static int load_elf_fdpic_binary(struct linux_binprm *, struct pt_regs *);
@@ -70,10 +78,16 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *,
 static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
 					     struct file *, struct mm_struct *);
 
+#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
+static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *);
+#endif
+
 static struct linux_binfmt elf_fdpic_format = {
 	.module		= THIS_MODULE,
 	.load_binary	= load_elf_fdpic_binary,
-//	.core_dump	= elf_fdpic_core_dump,
+#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
+	.core_dump	= elf_fdpic_core_dump,
+#endif
 	.min_coredump	= ELF_EXEC_PAGESIZE,
 };
 
@@ -87,7 +101,7 @@ static void __exit exit_elf_fdpic_binfmt(void)
 	unregister_binfmt(&elf_fdpic_format);
 }
 
-module_init(init_elf_fdpic_binfmt);
+core_initcall(init_elf_fdpic_binfmt);
 module_exit(exit_elf_fdpic_binfmt);
 
 static int is_elf_fdpic(struct elfhdr *hdr, struct file *file)
@@ -1118,3 +1132,661 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
 
 	return 0;
 }
+
+/*****************************************************************************/
+/*
+ * ELF-FDPIC core dumper
+ *
+ * Modelled on fs/exec.c:aout_core_dump()
+ * Jeremy Fitzhardinge <jeremy@sw.oz.au>
+ *
+ * Modelled on fs/binfmt_elf.c core dumper
+ */
+#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
+
+/*
+ * These are the only things you should do on a core-file: use only these
+ * functions to write out all the necessary info.
+ */
+static int dump_write(struct file *file, const void *addr, int nr)
+{
+	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+}
+
+static int dump_seek(struct file *file, loff_t off)
+{
+	if (file->f_op->llseek) {
+		if (file->f_op->llseek(file, off, SEEK_SET) != off)
+			return 0;
+	} else {
+		file->f_pos = off;
+	}
+	return 1;
+}
+
+/*
+ * Decide whether a segment is worth dumping; default is yes to be
+ * sure (missing info is worse than too much; etc).
+ * Personally I'd include everything, and use the coredump limit...
+ *
+ * I think we should skip something. But I am not sure how. H.J.
+ */
+static int maydump(struct vm_area_struct *vma)
+{
+	/* Do not dump I/O mapped devices or special mappings */
+	if (vma->vm_flags & (VM_IO | VM_RESERVED)) {
+		kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags);
+		return 0;
+	}
+
+	/* If we may not read the contents, don't allow us to dump
+	 * them either. "dump_write()" can't handle it anyway.
+	 */
+	if (!(vma->vm_flags & VM_READ)) {
+		kdcore("%08lx: %08lx: no (!read)", vma->vm_start, vma->vm_flags);
+		return 0;
+	}
+
+	/* Dump shared memory only if mapped from an anonymous file. */
+	if (vma->vm_flags & VM_SHARED) {
+		if (vma->vm_file->f_dentry->d_inode->i_nlink == 0) {
+			kdcore("%08lx: %08lx: no (share)", vma->vm_start, vma->vm_flags);
+			return 1;
+		}
+
+		kdcore("%08lx: %08lx: no (share)", vma->vm_start, vma->vm_flags);
+		return 0;
+	}
+
+#ifdef CONFIG_MMU
+	/* If it hasn't been written to, don't write it out */
+	if (!vma->anon_vma) {
+		kdcore("%08lx: %08lx: no (!anon)", vma->vm_start, vma->vm_flags);
+		return 0;
+	}
+#endif
+
+	kdcore("%08lx: %08lx: yes", vma->vm_start, vma->vm_flags);
+	return 1;
+}
+
+/* An ELF note in memory */
+struct memelfnote
+{
+	const char *name;
+	int type;
+	unsigned int datasz;
+	void *data;
+};
+
+static int notesize(struct memelfnote *en)
+{
+	int sz;
+
+	sz = sizeof(struct elf_note);
+	sz += roundup(strlen(en->name) + 1, 4);
+	sz += roundup(en->datasz, 4);
+
+	return sz;
+}
+
+/* #define DEBUG */
+
+#define DUMP_WRITE(addr, nr)	\
+	do { if (!dump_write(file, (addr), (nr))) return 0; } while(0)
+#define DUMP_SEEK(off)	\
+	do { if (!dump_seek(file, (off))) return 0; } while(0)
+
+static int writenote(struct memelfnote *men, struct file *file)
+{
+	struct elf_note en;
+
+	en.n_namesz = strlen(men->name) + 1;
+	en.n_descsz = men->datasz;
+	en.n_type = men->type;
+
+	DUMP_WRITE(&en, sizeof(en));
+	DUMP_WRITE(men->name, en.n_namesz);
+	/* XXX - cast from long long to long to avoid need for libgcc.a */
+	DUMP_SEEK(roundup((unsigned long)file->f_pos, 4));	/* XXX */
+	DUMP_WRITE(men->data, men->datasz);
+	DUMP_SEEK(roundup((unsigned long)file->f_pos, 4));	/* XXX */
+
+	return 1;
+}
+#undef DUMP_WRITE
+#undef DUMP_SEEK
+
+#define DUMP_WRITE(addr, nr)	\
+	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+		goto end_coredump;
+#define DUMP_SEEK(off)	\
+	if (!dump_seek(file, (off))) \
+		goto end_coredump;
+
+static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
+{
+	memcpy(elf->e_ident, ELFMAG, SELFMAG);
+	elf->e_ident[EI_CLASS] = ELF_CLASS;
+	elf->e_ident[EI_DATA] = ELF_DATA;
+	elf->e_ident[EI_VERSION] = EV_CURRENT;
+	elf->e_ident[EI_OSABI] = ELF_OSABI;
+	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
+
+	elf->e_type = ET_CORE;
+	elf->e_machine = ELF_ARCH;
+	elf->e_version = EV_CURRENT;
+	elf->e_entry = 0;
+	elf->e_phoff = sizeof(struct elfhdr);
+	elf->e_shoff = 0;
+	elf->e_flags = ELF_FDPIC_CORE_EFLAGS;
+	elf->e_ehsize = sizeof(struct elfhdr);
+	elf->e_phentsize = sizeof(struct elf_phdr);
+	elf->e_phnum = segs;
+	elf->e_shentsize = 0;
+	elf->e_shnum = 0;
+	elf->e_shstrndx = 0;
+	return;
+}
+
+static inline void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
+{
+	phdr->p_type = PT_NOTE;
+	phdr->p_offset = offset;
+	phdr->p_vaddr = 0;
+	phdr->p_paddr = 0;
+	phdr->p_filesz = sz;
+	phdr->p_memsz = 0;
+	phdr->p_flags = 0;
+	phdr->p_align = 0;
+	return;
+}
+
+static inline void fill_note(struct memelfnote *note, const char *name, int type,
+		unsigned int sz, void *data)
+{
+	note->name = name;
+	note->type = type;
+	note->datasz = sz;
+	note->data = data;
+	return;
+}
+
+/*
+ * fill up all the fields in prstatus from the given task struct, except
+ * registers which need to be filled up seperately.
+ */
+static void fill_prstatus(struct elf_prstatus *prstatus,
+			  struct task_struct *p, long signr)
+{
+	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
+	prstatus->pr_sigpend = p->pending.signal.sig[0];
+	prstatus->pr_sighold = p->blocked.sig[0];
+	prstatus->pr_pid = p->pid;
+	prstatus->pr_ppid = p->parent->pid;
+	prstatus->pr_pgrp = process_group(p);
+	prstatus->pr_sid = p->signal->session;
+	if (thread_group_leader(p)) {
+		/*
+		 * This is the record for the group leader.  Add in the
+		 * cumulative times of previous dead threads.  This total
+		 * won't include the time of each live thread whose state
+		 * is included in the core dump.  The final total reported
+		 * to our parent process when it calls wait4 will include
+		 * those sums as well as the little bit more time it takes
+		 * this and each other thread to finish dying after the
+		 * core dump synchronization phase.
+		 */
+		cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
+				   &prstatus->pr_utime);
+		cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
+				   &prstatus->pr_stime);
+	} else {
+		cputime_to_timeval(p->utime, &prstatus->pr_utime);
+		cputime_to_timeval(p->stime, &prstatus->pr_stime);
+	}
+	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
+	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
+
+	prstatus->pr_exec_fdpic_loadmap = p->mm->context.exec_fdpic_loadmap;
+	prstatus->pr_interp_fdpic_loadmap = p->mm->context.interp_fdpic_loadmap;
+}
+
+static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
+		       struct mm_struct *mm)
+{
+	unsigned int i, len;
+
+	/* first copy the parameters from user space */
+	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
+
+	len = mm->arg_end - mm->arg_start;
+	if (len >= ELF_PRARGSZ)
+		len = ELF_PRARGSZ - 1;
+	if (copy_from_user(&psinfo->pr_psargs,
+		           (const char __user *) mm->arg_start, len))
+		return -EFAULT;
+	for (i = 0; i < len; i++)
+		if (psinfo->pr_psargs[i] == 0)
+			psinfo->pr_psargs[i] = ' ';
+	psinfo->pr_psargs[len] = 0;
+
+	psinfo->pr_pid = p->pid;
+	psinfo->pr_ppid = p->parent->pid;
+	psinfo->pr_pgrp = process_group(p);
+	psinfo->pr_sid = p->signal->session;
+
+	i = p->state ? ffz(~p->state) + 1 : 0;
+	psinfo->pr_state = i;
+	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
+	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
+	psinfo->pr_nice = task_nice(p);
+	psinfo->pr_flag = p->flags;
+	SET_UID(psinfo->pr_uid, p->uid);
+	SET_GID(psinfo->pr_gid, p->gid);
+	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
+
+	return 0;
+}
+
+/* Here is the structure in which status of each thread is captured. */
+struct elf_thread_status
+{
+	struct list_head list;
+	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
+	elf_fpregset_t fpu;		/* NT_PRFPREG */
+	struct task_struct *thread;
+#ifdef ELF_CORE_COPY_XFPREGS
+	elf_fpxregset_t xfpu;		/* NT_PRXFPREG */
+#endif
+	struct memelfnote notes[3];
+	int num_notes;
+};
+
+/*
+ * In order to add the specific thread information for the elf file format,
+ * we need to keep a linked list of every thread's pr_status and then create
+ * a single section for them in the final core file.
+ */
+static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
+{
+	struct task_struct *p = t->thread;
+	int sz = 0;
+
+	t->num_notes = 0;
+
+	fill_prstatus(&t->prstatus, p, signr);
+	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
+
+	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
+		  &t->prstatus);
+	t->num_notes++;
+	sz += notesize(&t->notes[0]);
+
+	t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, &t->fpu);
+	if (t->prstatus.pr_fpvalid) {
+		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
+			  &t->fpu);
+		t->num_notes++;
+		sz += notesize(&t->notes[1]);
+	}
+
+#ifdef ELF_CORE_COPY_XFPREGS
+	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
+		fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
+			  &t->xfpu);
+		t->num_notes++;
+		sz += notesize(&t->notes[2]);
+	}
+#endif
+	return sz;
+}
+
+/*
+ * dump the segments for an MMU process
+ */
+#ifdef CONFIG_MMU
+static int elf_fdpic_dump_segments(struct file *file, struct mm_struct *mm,
+				   size_t *size, unsigned long *limit)
+{
+	struct vm_area_struct *vma;
+
+	for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
+		unsigned long addr;
+
+		if (!maydump(vma))
+			continue;
+
+		for (addr = vma->vm_start;
+		     addr < vma->vm_end;
+		     addr += PAGE_SIZE
+		     ) {
+			struct vm_area_struct *vma;
+			struct page *page;
+
+			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
+					   &page, &vma) <= 0) {
+				DUMP_SEEK(file->f_pos + PAGE_SIZE);
+			}
+			else if (page == ZERO_PAGE(addr)) {
+				DUMP_SEEK(file->f_pos + PAGE_SIZE);
+				page_cache_release(page);
+			}
+			else {
+				void *kaddr;
+
+				flush_cache_page(vma, addr, page_to_pfn(page));
+				kaddr = kmap(page);
+				if ((*size += PAGE_SIZE) > *limit ||
+				    !dump_write(file, kaddr, PAGE_SIZE)
+				    ) {
+					kunmap(page);
+					page_cache_release(page);
+					return -EIO;
+				}
+				kunmap(page);
+				page_cache_release(page);
+			}
+		}
+	}
+
+	return 0;
+
+end_coredump:
+	return -EFBIG;
+}
+#endif
+
+/*
+ * dump the segments for a NOMMU process
+ */
+#ifndef CONFIG_MMU
+static int elf_fdpic_dump_segments(struct file *file, struct mm_struct *mm,
+				   size_t *size, unsigned long *limit)
+{
+	struct vm_list_struct *vml;
+
+	for (vml = current->mm->context.vmlist; vml; vml = vml->next) {
+	struct vm_area_struct *vma = vml->vma;
+
+		if (!maydump(vma))
+			continue;
+
+		if ((*size += PAGE_SIZE) > *limit)
+			return -EFBIG;
+
+		if (!dump_write(file, (void *) vma->vm_start,
+				vma->vm_end - vma->vm_start))
+			return -EIO;
+	}
+
+	return 0;
+}
+#endif
+
+/*
+ * Actual dumper
+ *
+ * This is a two-pass process; first we find the offsets of the bits,
+ * and then they are actually written out.  If we run out of core limit
+ * we just truncate.
+ */
+static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
+			       struct file *file)
+{
+#define	NUM_NOTES	6
+	int has_dumped = 0;
+	mm_segment_t fs;
+	int segs;
+	size_t size = 0;
+	int i;
+	struct vm_area_struct *vma;
+	struct elfhdr *elf = NULL;
+	loff_t offset = 0, dataoff;
+	unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
+	int numnote;
+	struct memelfnote *notes = NULL;
+	struct elf_prstatus *prstatus = NULL;	/* NT_PRSTATUS */
+	struct elf_prpsinfo *psinfo = NULL;	/* NT_PRPSINFO */
+ 	struct task_struct *g, *p;
+ 	LIST_HEAD(thread_list);
+ 	struct list_head *t;
+	elf_fpregset_t *fpu = NULL;
+#ifdef ELF_CORE_COPY_XFPREGS
+	elf_fpxregset_t *xfpu = NULL;
+#endif
+	int thread_status_size = 0;
+#ifndef CONFIG_MMU
+	struct vm_list_struct *vml;
+#endif
+	elf_addr_t *auxv;
+
+	/*
+	 * We no longer stop all VM operations.
+	 *
+	 * This is because those proceses that could possibly change map_count
+	 * or the mmap / vma pages are now blocked in do_exit on current
+	 * finishing this core dump.
+	 *
+	 * Only ptrace can touch these memory addresses, but it doesn't change
+	 * the map_count or the pages allocated. So no possibility of crashing
+	 * exists while dumping the mm->vm_next areas to the core file.
+	 */
+
+	/* alloc memory for large data structures: too large to be on stack */
+	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
+	if (!elf)
+		goto cleanup;
+	prstatus = kzalloc(sizeof(*prstatus), GFP_KERNEL);
+	if (!prstatus)
+		goto cleanup;
+	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
+	if (!psinfo)
+		goto cleanup;
+	notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
+	if (!notes)
+		goto cleanup;
+	fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
+	if (!fpu)
+		goto cleanup;
+#ifdef ELF_CORE_COPY_XFPREGS
+	xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
+	if (!xfpu)
+		goto cleanup;
+#endif
+
+	if (signr) {
+		struct elf_thread_status *tmp;
+		read_lock(&tasklist_lock);
+		do_each_thread(g,p)
+			if (current->mm == p->mm && current != p) {
+				tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
+				if (!tmp) {
+					read_unlock(&tasklist_lock);
+					goto cleanup;
+				}
+				INIT_LIST_HEAD(&tmp->list);
+				tmp->thread = p;
+				list_add(&tmp->list, &thread_list);
+			}
+		while_each_thread(g,p);
+		read_unlock(&tasklist_lock);
+		list_for_each(t, &thread_list) {
+			struct elf_thread_status *tmp;
+			int sz;
+
+			tmp = list_entry(t, struct elf_thread_status, list);
+			sz = elf_dump_thread_status(signr, tmp);
+			thread_status_size += sz;
+		}
+	}
+
+	/* now collect the dump for the current */
+	fill_prstatus(prstatus, current, signr);
+	elf_core_copy_regs(&prstatus->pr_reg, regs);
+
+#ifdef CONFIG_MMU
+	segs = current->mm->map_count;
+#else
+	segs = 0;
+	for (vml = current->mm->context.vmlist; vml; vml = vml->next)
+	    segs++;
+#endif
+#ifdef ELF_CORE_EXTRA_PHDRS
+	segs += ELF_CORE_EXTRA_PHDRS;
+#endif
+
+	/* Set up header */
+	fill_elf_fdpic_header(elf, segs + 1);	/* including notes section */
+
+	has_dumped = 1;
+	current->flags |= PF_DUMPCORE;
+
+	/*
+	 * Set up the notes in similar form to SVR4 core dumps made
+	 * with info from their /proc.
+	 */
+
+	fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
+	fill_psinfo(psinfo, current->group_leader, current->mm);
+	fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
+
+	numnote = 2;
+
+	auxv = (elf_addr_t *) current->mm->saved_auxv;
+
+	i = 0;
+	do
+		i += 2;
+	while (auxv[i - 2] != AT_NULL);
+	fill_note(&notes[numnote++], "CORE", NT_AUXV,
+		  i * sizeof(elf_addr_t), auxv);
+
+  	/* Try to dump the FPU. */
+	if ((prstatus->pr_fpvalid =
+	     elf_core_copy_task_fpregs(current, regs, fpu)))
+		fill_note(notes + numnote++,
+			  "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
+#ifdef ELF_CORE_COPY_XFPREGS
+	if (elf_core_copy_task_xfpregs(current, xfpu))
+		fill_note(notes + numnote++,
+			  "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
+#endif
+
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	DUMP_WRITE(elf, sizeof(*elf));
+	offset += sizeof(*elf);				/* Elf header */
+	offset += (segs+1) * sizeof(struct elf_phdr);	/* Program headers */
+
+	/* Write notes phdr entry */
+	{
+		struct elf_phdr phdr;
+		int sz = 0;
+
+		for (i = 0; i < numnote; i++)
+			sz += notesize(notes + i);
+
+		sz += thread_status_size;
+
+		fill_elf_note_phdr(&phdr, sz, offset);
+		offset += sz;
+		DUMP_WRITE(&phdr, sizeof(phdr));
+	}
+
+	/* Page-align dumped data */
+	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
+
+	/* write program headers for segments dump */
+	for (
+#ifdef CONFIG_MMU
+		vma = current->mm->mmap; vma; vma = vma->vm_next
+#else
+			vml = current->mm->context.vmlist; vml; vml = vml->next
+#endif
+	     ) {
+		struct elf_phdr phdr;
+		size_t sz;
+
+#ifndef CONFIG_MMU
+		vma = vml->vma;
+#endif
+
+		sz = vma->vm_end - vma->vm_start;
+
+		phdr.p_type = PT_LOAD;
+		phdr.p_offset = offset;
+		phdr.p_vaddr = vma->vm_start;
+		phdr.p_paddr = 0;
+		phdr.p_filesz = maydump(vma) ? sz : 0;
+		phdr.p_memsz = sz;
+		offset += phdr.p_filesz;
+		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
+		if (vma->vm_flags & VM_WRITE)
+			phdr.p_flags |= PF_W;
+		if (vma->vm_flags & VM_EXEC)
+			phdr.p_flags |= PF_X;
+		phdr.p_align = ELF_EXEC_PAGESIZE;
+
+		DUMP_WRITE(&phdr, sizeof(phdr));
+	}
+
+#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
+	ELF_CORE_WRITE_EXTRA_PHDRS;
+#endif
+
+ 	/* write out the notes section */
+	for (i = 0; i < numnote; i++)
+		if (!writenote(notes + i, file))
+			goto end_coredump;
+
+	/* write out the thread status notes section */
+	list_for_each(t, &thread_list) {
+		struct elf_thread_status *tmp =
+				list_entry(t, struct elf_thread_status, list);
+
+		for (i = 0; i < tmp->num_notes; i++)
+			if (!writenote(&tmp->notes[i], file))
+				goto end_coredump;
+	}
+
+	DUMP_SEEK(dataoff);
+
+	if (elf_fdpic_dump_segments(file, current->mm, &size, &limit) < 0)
+		goto end_coredump;
+
+#ifdef ELF_CORE_WRITE_EXTRA_DATA
+	ELF_CORE_WRITE_EXTRA_DATA;
+#endif
+
+	if (file->f_pos != offset) {
+		/* Sanity check */
+		printk(KERN_WARNING
+		       "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n",
+		       file->f_pos, offset);
+	}
+
+end_coredump:
+	set_fs(fs);
+
+cleanup:
+	while (!list_empty(&thread_list)) {
+		struct list_head *tmp = thread_list.next;
+		list_del(tmp);
+		kfree(list_entry(tmp, struct elf_thread_status, list));
+	}
+
+	kfree(elf);
+	kfree(prstatus);
+	kfree(psinfo);
+	kfree(notes);
+	kfree(fpu);
+#ifdef ELF_CORE_COPY_XFPREGS
+	kfree(xfpu);
+#endif
+	return has_dumped;
+#undef NUM_NOTES
+}
+
+#endif		/* USE_ELF_CORE_DUMP */
-- 
cgit v1.2.2


From 92eb7a2f28d551acedeb5752263267a64b1f5ddf Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 10 Jul 2006 04:45:31 -0700
Subject: [PATCH] fix weird logic in alloc_fdtable()

There's a fairly obvious infinite loop in there.

Also, use roundup_pow_of_two() rather than open-coding stuff.

Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/file.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 55f4e70225..3f35608606 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -240,13 +240,9 @@ static struct fdtable *alloc_fdtable(int nr)
 	if (!fdt)
   		goto out;
 
-	nfds = 8 * L1_CACHE_BYTES;
-  	/* Expand to the max in easy steps */
-  	while (nfds <= nr) {
-		nfds = nfds * 2;
-		if (nfds > NR_OPEN)
-			nfds = NR_OPEN;
-	}
+	nfds = max_t(int, 8 * L1_CACHE_BYTES, roundup_pow_of_two(nfds));
+	if (nfds > NR_OPEN)
+		nfds = NR_OPEN;
 
   	new_openset = alloc_fdset(nfds);
   	new_execset = alloc_fdset(nfds);
-- 
cgit v1.2.2


From 36cf96f5e7c098731a1ad9d79694d6f591b18e7f Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Mon, 10 Jul 2006 04:45:33 -0700
Subject: [PATCH] Remove leftover ext3 acl declarations

These functions no longer exist; remove their declarations.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/acl.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 92d50b53a9..0d1e6279cb 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -62,9 +62,6 @@ extern int ext3_permission (struct inode *, int, struct nameidata *);
 extern int ext3_acl_chmod (struct inode *);
 extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
 
-extern int init_ext3_acl(void);
-extern void exit_ext3_acl(void);
-
 #else  /* CONFIG_EXT3_FS_POSIX_ACL */
 #include <linux/sched.h>
 #define ext3_permission NULL
-- 
cgit v1.2.2


From e2b209509ca33743864846aef2e1b2afc21f7915 Mon Sep 17 00:00:00 2001
From: Shankar Anand <shanand@novell.com>
Date: Mon, 10 Jul 2006 04:45:44 -0700
Subject: [PATCH] knfsd: nfsd4: add per-operation server stats

Add an nfs4 operations count array to nfsd_stats structure.  The count is
incremented in nfsd4_proc_compound() where all the operations are handled
by the nfsv4 server.  This count of individual nfsv4 operations is also
entered into /proc filesystem.

Signed-off-by: Shankar Anand<shanand@novell.com>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c |  8 ++++++++
 fs/nfsd/stats.c    | 10 ++++++++++
 2 files changed, 18 insertions(+)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index b0e095ea0c..ee4eff27ae 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -721,6 +721,12 @@ nfsd4_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 	return nfs_ok;
 }
 
+static inline void nfsd4_increment_op_stats(u32 opnum)
+{
+	if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP)
+		nfsdstats.nfs4_opcount[opnum]++;
+}
+
 
 /*
  * COMPOUND call.
@@ -930,6 +936,8 @@ encode_op:
 		/* XXX Ugh, we need to get rid of this kind of special case: */
 		if (op->opnum == OP_READ && op->u.read.rd_filp)
 			fput(op->u.read.rd_filp);
+
+		nfsd4_increment_op_stats(op->opnum);
 	}
 
 out:
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 57265d5638..71944cddf6 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -72,6 +72,16 @@ static int nfsd_proc_show(struct seq_file *seq, void *v)
 	/* show my rpc info */
 	svc_seq_show(seq, &nfsd_svcstats);
 
+#ifdef CONFIG_NFSD_V4
+	/* Show count for individual nfsv4 operations */
+	/* Writing operation numbers 0 1 2 also for maintaining uniformity */
+	seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1);
+	for (i = 0; i <= LAST_NFS4_OP; i++)
+		seq_printf(seq, " %u", nfsdstats.nfs4_opcount[i]);
+
+	seq_putc(seq, '\n');
+#endif
+
 	return 0;
 }
 
-- 
cgit v1.2.2


From d579091b4385e9386e244622d593fe064aa8e8e7 Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@openvz.org>
Date: Wed, 12 Jul 2006 09:03:05 -0700
Subject: [PATCH] fix fdset leakage

When found, it is obvious.  nfds calculated when allocating fdsets is
rewritten by calculation of size of fdtable, and when we are unlucky, we
try to free fdsets of wrong size.

Found due to OpenVZ resource management (User Beancounters).

Signed-off-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: Kirill Korotaev <dev@openvz.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/file.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index 3f35608606..c8f1b0af8e 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -273,11 +273,13 @@ static struct fdtable *alloc_fdtable(int nr)
 	} while (nfds <= nr);
 	new_fds = alloc_fd_array(nfds);
 	if (!new_fds)
-		goto out;
+		goto out2;
 	fdt->fd = new_fds;
 	fdt->max_fds = nfds;
 	fdt->free_files = NULL;
 	return fdt;
+out2:
+	nfds = fdt->max_fdset;
 out:
   	if (new_openset)
   		free_fdset(new_openset, nfds);
-- 
cgit v1.2.2


From 232ba9dbd68bb084d5d90c511f207d18eae614da Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 12 Jul 2006 09:03:06 -0700
Subject: [PATCH] lockdep: annotate the sysfs i_mutex to be a separate class

sysfs has a different i_mutex lock order behavior for i_mutex than the
other filesystems; sysfs i_mutex is called in many places with subsystem
locks held.  At the same time, many of the VFS locking rules do not apply
to sysfs at all (cross directory rename for example).  To untangle this
mess (which gives false positives in lockdep), we're giving sysfs inodes
their own class for i_mutex.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/sysfs/inode.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 5e0e31cc46..9889e54e1f 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -109,6 +109,17 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
 	inode->i_ctime = iattr->ia_ctime;
 }
 
+
+/*
+ * sysfs has a different i_mutex lock order behavior for i_mutex than other
+ * filesystems; sysfs i_mutex is called in many places with subsystem locks
+ * held. At the same time, many of the VFS locking rules do not apply to
+ * sysfs at all (cross directory rename for example). To untangle this mess
+ * (which gives false positives in lockdep), we're giving sysfs inodes their
+ * own class for i_mutex.
+ */
+static struct lock_class_key sysfs_inode_imutex_key;
+
 struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd)
 {
 	struct inode * inode = new_inode(sysfs_sb);
@@ -118,6 +129,7 @@ struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd)
 		inode->i_mapping->a_ops = &sysfs_aops;
 		inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
 		inode->i_op = &sysfs_inode_operations;
+		lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
 
 		if (sd->s_iattr) {
 			/* sysfs_dirent has non-default attributes
-- 
cgit v1.2.2


From 0635170b544b01b46a81b4ac5cff5020ab59d1fc Mon Sep 17 00:00:00 2001
From: "Adam B. Jerome" <abj@novell.com>
Date: Wed, 12 Jul 2006 09:03:07 -0700
Subject: [PATCH] /fs/proc/: 'larger than buffer size' memory accessed by
 clear_user()

Address a potential 'larger than buffer size' memory access by
clear_user().  Without this patch, this call to clear_user() can attempt to
clear too many (tsz) bytes resulting in a wrong (-EFAULT) return code by
read_kcore().

Signed-off-by: Adam B. Jerome <abj@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/kcore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 8d6d85d740..6a984f64ed 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -382,7 +382,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 				 */
 				if (n) { 
 					if (clear_user(buffer + tsz - n,
-								tsz - n))
+								n))
 						return -EFAULT;
 				}
 			} else {
-- 
cgit v1.2.2


From a29b0b74e73b66674d20a170e463fe9032f2272a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 12 Jul 2006 09:03:08 -0700
Subject: [PATCH] alloc_fdtable() expansion fix

We're supposed to go the next power of two if nfds==nr.

Of `nr', not of `nfsd'.

Spotted by Rene Scharfe <rene.scharfe@lsrfire.ath.cx>

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/file.c b/fs/file.c
index c8f1b0af8e..b3c6b82e6a 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -240,7 +240,7 @@ static struct fdtable *alloc_fdtable(int nr)
 	if (!fdt)
   		goto out;
 
-	nfds = max_t(int, 8 * L1_CACHE_BYTES, roundup_pow_of_two(nfds));
+	nfds = max_t(int, 8 * L1_CACHE_BYTES, roundup_pow_of_two(nr + 1));
 	if (nfds > NR_OPEN)
 		nfds = NR_OPEN;
 
-- 
cgit v1.2.2


From 18b0bbd8ca6d3cb90425aa0d77b99a762c6d6de3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Fri, 14 Jul 2006 16:51:34 -0700
Subject: Fix nasty /proc vulnerability

We have a bad interaction with both the kernel and user space being able
to change some of the /proc file status.  This fixes the most obvious
part of it, but I expect we'll also make it harder for users to modify
even their "own" files in /proc.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/base.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 243a94af04..0cb8f20d00 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1338,6 +1338,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
 		} else {
 			inode->i_uid = 0;
 			inode->i_gid = 0;
+			inode->i_mode = 0;
 		}
 		security_task_to_inode(task, inode);
 		put_task_struct(task);
-- 
cgit v1.2.2


From 9ee8ab9fbf21e6b87ad227cd46c0a4be41ab749b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Fri, 14 Jul 2006 21:48:03 -0700
Subject: Relax /proc fix a bit

Clearign all of i_mode was a bit draconian. We only really care about
S_ISUID/ISGID, after all.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/base.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 0cb8f20d00..474eae3450 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1338,8 +1338,8 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
 		} else {
 			inode->i_uid = 0;
 			inode->i_gid = 0;
-			inode->i_mode = 0;
 		}
+		inode->i_mode &= ~(S_ISUID | S_ISGID);
 		security_task_to_inode(task, inode);
 		put_task_struct(task);
 		return 1;
@@ -1390,6 +1390,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
 					inode->i_uid = 0;
 					inode->i_gid = 0;
 				}
+				inode->i_mode &= ~(S_ISUID | S_ISGID);
 				security_task_to_inode(task, inode);
 				put_task_struct(task);
 				return 1;
-- 
cgit v1.2.2


From de45921535bfc3b1f63b426c2a9739635f864283 Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@sw.ru>
Date: Fri, 14 Jul 2006 00:23:49 -0700
Subject: [PATCH] struct file leakage

2.6.16 leaks like hell. While testing, I found massive leakage
(reproduced in openvz) in:

*filp
*size-4096

And 1 object leaks in
*size-32
*size-64
*size-128

It is the fix for the first one.  filp leaks in the bowels of namei.c.

Seems, size-4096 is file table leaking in expand_fdtables.

I have no idea what are the rest and why they show only accompanying
another leaks.  Some debugging structs?

[akpm@osdl.org, Trond: remove the IS_ERR() check]
Signed-off-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: Kirill Korotaev <dev@openvz.org>
Cc: <stable@kernel.org>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namei.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index c9750d755a..e01070d7bf 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1712,8 +1712,14 @@ do_link:
 	if (error)
 		goto exit_dput;
 	error = __do_follow_link(&path, nd);
-	if (error)
+	if (error) {
+		/* Does someone understand code flow here? Or it is only
+		 * me so stupid? Anathema to whoever designed this non-sense
+		 * with "intent.open".
+		 */
+		release_open_intent(nd);
 		return error;
+	}
 	nd->flags &= ~LOOKUP_PARENT;
 	if (nd->last_type == LAST_BIND)
 		goto ok;
-- 
cgit v1.2.2


From 6fbe82a952790c634ea6035c223a01a81377daf1 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 14 Jul 2006 00:24:22 -0700
Subject: [PATCH] reiserfs: fix handling of device names with /'s in them

On systems with block devices containing a slash (virtual dasd, cciss,
etc), reiserfs will fail to initialize /proc/fs/reiserfs/<dev> due to it
being interpreted as a subdirectory.  The generic block device code changes
the / to !  for use in the sysfs tree.  This patch uses that convention.

Tested by making dm devices use dm/<number> rather than dm-<number>

[akpm@osdl.org: name variables consistently]
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/procfs.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 5d8a8cfebc..c533ec1bca 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -492,9 +492,17 @@ static void add_file(struct super_block *sb, char *name,
 
 int reiserfs_proc_info_init(struct super_block *sb)
 {
+	char b[BDEVNAME_SIZE];
+	char *s;
+
+	/* Some block devices use /'s */
+	strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE);
+	s = strchr(b, '/');
+	if (s)
+		*s = '!';
+
 	spin_lock_init(&__PINFO(sb).lock);
-	REISERFS_SB(sb)->procdir =
-	    proc_mkdir(reiserfs_bdevname(sb), proc_info_root);
+	REISERFS_SB(sb)->procdir = proc_mkdir(b, proc_info_root);
 	if (REISERFS_SB(sb)->procdir) {
 		REISERFS_SB(sb)->procdir->owner = THIS_MODULE;
 		REISERFS_SB(sb)->procdir->data = sb;
@@ -508,13 +516,22 @@ int reiserfs_proc_info_init(struct super_block *sb)
 		return 0;
 	}
 	reiserfs_warning(sb, "reiserfs: cannot create /proc/%s/%s",
-			 proc_info_root_name, reiserfs_bdevname(sb));
+			 proc_info_root_name, b);
 	return 1;
 }
 
 int reiserfs_proc_info_done(struct super_block *sb)
 {
 	struct proc_dir_entry *de = REISERFS_SB(sb)->procdir;
+	char b[BDEVNAME_SIZE];
+	char *s;
+
+	/* Some block devices use /'s */
+	strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE);
+	s = strchr(b, '/');
+	if (s)
+		*s = '!';
+
 	if (de) {
 		remove_proc_entry("journal", de);
 		remove_proc_entry("oidmap", de);
@@ -528,7 +545,7 @@ int reiserfs_proc_info_done(struct super_block *sb)
 	__PINFO(sb).exiting = 1;
 	spin_unlock(&__PINFO(sb).lock);
 	if (proc_info_root) {
-		remove_proc_entry(reiserfs_bdevname(sb), proc_info_root);
+		remove_proc_entry(b, proc_info_root);
 		REISERFS_SB(sb)->procdir = NULL;
 	}
 	return 0;
-- 
cgit v1.2.2


From d247e2c661f28a21e5f9a8d672e1e88a7c1c5d4a Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eike-kernel@sf-tec.de>
Date: Fri, 14 Jul 2006 00:24:23 -0700
Subject: [PATCH] add function documentation for register_chrdev()

Documentation for register_chrdev() was missing completely.

[akpm@osdl.org: kerneldocification]
Signed-off-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/char_dev.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'fs')

diff --git a/fs/char_dev.c b/fs/char_dev.c
index a4cbc6706e..3483d3cf80 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -182,6 +182,28 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
 	return 0;
 }
 
+/**
+ * register_chrdev() - Register a major number for character devices.
+ * @major: major device number or 0 for dynamic allocation
+ * @name: name of this range of devices
+ * @fops: file operations associated with this devices
+ *
+ * If @major == 0 this functions will dynamically allocate a major and return
+ * its number.
+ *
+ * If @major > 0 this function will attempt to reserve a device with the given
+ * major number and will return zero on success.
+ *
+ * Returns a -ve errno on failure.
+ *
+ * The name of this device has nothing to do with the name of the device in
+ * /dev. It only helps to keep track of the different owners of devices. If
+ * your module name has only one type of devices it's ok to use e.g. the name
+ * of the module here.
+ *
+ * This function registers a range of 256 minor numbers. The first minor number
+ * is 0.
+ */
 int register_chrdev(unsigned int major, const char *name,
 		    const struct file_operations *fops)
 {
-- 
cgit v1.2.2


From 25890454667b3295f67b3372352be90705f8667c Mon Sep 17 00:00:00 2001
From: Shailabh Nagar <nagar@watson.ibm.com>
Date: Fri, 14 Jul 2006 00:24:43 -0700
Subject: [PATCH] per-task-delay-accounting: /proc export of aggregated block
 I/O delays

Export I/O delays seen by a task through /proc/<tgid>/stats for use in top
etc.

Note that delays for I/O done for swapping in pages (swapin I/O) is clubbed
together with all other I/O here (this is not the case in the netlink
interface where the swapin I/O is kept distinct)

[akpm@osdl.org: printk warning fix]
Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
Cc: Jes Sorensen <jes@sgi.com>
Cc: Peter Chubb <peterc@gelato.unsw.edu.au>
Cc: Erich Focht <efocht@ess.nec.de>
Cc: Levent Serinol <lserinol@gmail.com>
Cc: Jay Lan <jlan@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/array.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 7495d3e207..0b615d62a1 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -74,6 +74,7 @@
 #include <linux/times.h>
 #include <linux/cpuset.h>
 #include <linux/rcupdate.h>
+#include <linux/delayacct.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -411,7 +412,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 
 	res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
 %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n",
+%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu %llu\n",
 		task->pid,
 		tcomm,
 		state,
@@ -455,7 +456,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 		task->exit_signal,
 		task_cpu(task),
 		task->rt_priority,
-		task->policy);
+		task->policy,
+		(unsigned long long)delayacct_blkio_ticks(task));
 	if(mm)
 		mmput(mm);
 	return res;
-- 
cgit v1.2.2


From 92d032855e64834283de5acfb0463232e0ab128e Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@evo.osdl.org>
Date: Sat, 15 Jul 2006 12:20:05 -0700
Subject: Mark /proc MS_NOSUID and MS_NOEXEC

Not that we really need this any more, but at the same time there's no
reason not to do this.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 6dcef089e1..49dfb2ab78 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -192,7 +192,7 @@ int proc_fill_super(struct super_block *s, void *data, int silent)
 {
 	struct inode * root_inode;
 
-	s->s_flags |= MS_NODIRATIME;
+	s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
 	s->s_blocksize = 1024;
 	s->s_blocksize_bits = 10;
 	s->s_magic = PROC_SUPER_MAGIC;
-- 
cgit v1.2.2


From 6d76fa58b050044994fe25f8753b8023f2b36737 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@evo.osdl.org>
Date: Sat, 15 Jul 2006 12:26:45 -0700
Subject: Don't allow chmod() on the /proc/<pid>/ files

This just turns off chmod() on the /proc/<pid>/ files, since there is no
good reason to allow it, and had we disallowed it originally, the nasty
/proc race exploit wouldn't have been possible.

The other patches already fixed the problem chmod() could cause, so this
is really just some final mop-up..

This particular version is based off a patch by Eugene and Marcel which
had much better naming than my original equivalent one.

Signed-off-by: Eugene Teo <eteo@redhat.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/base.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 474eae3450..fe8d55fb17 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -551,6 +551,27 @@ static int proc_fd_access_allowed(struct inode *inode)
 	return allowed;
 }
 
+static int proc_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	int error;
+	struct inode *inode = dentry->d_inode;
+
+	if (attr->ia_valid & ATTR_MODE)
+		return -EPERM;
+
+	error = inode_change_ok(inode, attr);
+	if (!error) {
+		error = security_inode_setattr(dentry, attr);
+		if (!error)
+			error = inode_setattr(inode, attr);
+	}
+	return error;
+}
+
+static struct inode_operations proc_def_inode_operations = {
+	.setattr	= proc_setattr,
+};
+
 extern struct seq_operations mounts_op;
 struct proc_mounts {
 	struct seq_file m;
@@ -1111,7 +1132,8 @@ out:
 
 static struct inode_operations proc_pid_link_inode_operations = {
 	.readlink	= proc_pid_readlink,
-	.follow_link	= proc_pid_follow_link
+	.follow_link	= proc_pid_follow_link,
+	.setattr	= proc_setattr,
 };
 
 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
@@ -1285,6 +1307,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
 	ei = PROC_I(inode);
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	inode->i_ino = fake_ino(task->pid, ino);
+	inode->i_op = &proc_def_inode_operations;
 
 	/*
 	 * grab the reference to task.
@@ -1529,11 +1552,13 @@ static struct file_operations proc_task_operations = {
  */
 static struct inode_operations proc_fd_inode_operations = {
 	.lookup		= proc_lookupfd,
+	.setattr	= proc_setattr,
 };
 
 static struct inode_operations proc_task_inode_operations = {
 	.lookup		= proc_task_lookup,
 	.getattr	= proc_task_getattr,
+	.setattr	= proc_setattr,
 };
 
 #ifdef CONFIG_SECURITY
@@ -1847,11 +1872,13 @@ static struct file_operations proc_tid_base_operations = {
 static struct inode_operations proc_tgid_base_inode_operations = {
 	.lookup		= proc_tgid_base_lookup,
 	.getattr	= pid_getattr,
+	.setattr	= proc_setattr,
 };
 
 static struct inode_operations proc_tid_base_inode_operations = {
 	.lookup		= proc_tid_base_lookup,
 	.getattr	= pid_getattr,
+	.setattr	= proc_setattr,
 };
 
 #ifdef CONFIG_SECURITY
@@ -1894,11 +1921,13 @@ static struct dentry *proc_tid_attr_lookup(struct inode *dir,
 static struct inode_operations proc_tgid_attr_inode_operations = {
 	.lookup		= proc_tgid_attr_lookup,
 	.getattr	= pid_getattr,
+	.setattr	= proc_setattr,
 };
 
 static struct inode_operations proc_tid_attr_inode_operations = {
 	.lookup		= proc_tid_attr_lookup,
 	.getattr	= pid_getattr,
+	.setattr	= proc_setattr,
 };
 #endif
 
-- 
cgit v1.2.2


From 2a293b7d5aa2f0d1e3d87b642f7ac263c2d664e3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 28 Jul 2006 17:04:26 +1000
Subject: [XFS] All xfs_disk_dquot_t values are (as the name says) disk endian.
 Before putting them into struct statfs they should be endian-swapped.

SGI-PV: 954580
SGI-Modid: xfs-linux-melb:xfs-kern:26550a

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/quota/xfs_qm_bhv.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index e95e99f716..f137856c32 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -217,17 +217,24 @@ xfs_qm_statvfs(
 		return 0;
 	dp = &dqp->q_core;
 
-	limit = dp->d_blk_softlimit ? dp->d_blk_softlimit : dp->d_blk_hardlimit;
+	limit = dp->d_blk_softlimit ?
+		be64_to_cpu(dp->d_blk_softlimit) :
+		be64_to_cpu(dp->d_blk_hardlimit);
 	if (limit && statp->f_blocks > limit) {
 		statp->f_blocks = limit;
-		statp->f_bfree = (statp->f_blocks > dp->d_bcount) ?
-					(statp->f_blocks - dp->d_bcount) : 0;
+		statp->f_bfree =
+			(statp->f_blocks > be64_to_cpu(dp->d_bcount)) ?
+			 (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0;
 	}
-	limit = dp->d_ino_softlimit ? dp->d_ino_softlimit : dp->d_ino_hardlimit;
+
+	limit = dp->d_ino_softlimit ?
+		be64_to_cpu(dp->d_ino_softlimit) :
+		be64_to_cpu(dp->d_ino_hardlimit);
 	if (limit && statp->f_files > limit) {
 		statp->f_files = limit;
-		statp->f_ffree = (statp->f_files > dp->d_icount) ?
-					(statp->f_ffree - dp->d_icount) : 0;
+		statp->f_ffree =
+			(statp->f_files > be64_to_cpu(dp->d_icount)) ?
+			 (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0;
 	}
 
 	xfs_qm_dqput(dqp);
-- 
cgit v1.2.2


From f5faad799475c4058416264f672bb33bf8b5ef41 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 28 Jul 2006 17:04:44 +1000
Subject: [XFS] Fix remount vs no/barrier options by ensuring we clear unwanted
 flags from iclog buffers before submitting them for writing.

SGI-PV: 954772
SGI-Modid: xfs-linux-melb:xfs-kern:26605a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_buf.h |  4 ++--
 fs/xfs/xfs_log.c           | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index ceda3a2859..7858703ed8 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -246,8 +246,8 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
 #define BUF_BUSY		XBF_DONT_BLOCK
 
 #define XFS_BUF_BFLAGS(bp)	((bp)->b_flags)
-#define XFS_BUF_ZEROFLAGS(bp)	\
-	((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI))
+#define XFS_BUF_ZEROFLAGS(bp)	((bp)->b_flags &= \
+		~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
 
 #define XFS_BUF_STALE(bp)	((bp)->b_flags |= XFS_B_STALE)
 #define XFS_BUF_UNSTALE(bp)	((bp)->b_flags &= ~XFS_B_STALE)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index e730328636..21ac1a67e3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1413,7 +1413,7 @@ xlog_sync(xlog_t		*log,
 	ops = iclog->ic_header.h_num_logops;
 	INT_SET(iclog->ic_header.h_num_logops, ARCH_CONVERT, ops);
 
-	bp	    = iclog->ic_bp;
+	bp = iclog->ic_bp;
 	ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1);
 	XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2);
 	XFS_BUF_SET_ADDR(bp, BLOCK_LSN(INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT)));
@@ -1430,15 +1430,14 @@ xlog_sync(xlog_t		*log,
 	}
 	XFS_BUF_SET_PTR(bp, (xfs_caddr_t) &(iclog->ic_header), count);
 	XFS_BUF_SET_FSPRIVATE(bp, iclog);	/* save for later */
+	XFS_BUF_ZEROFLAGS(bp);
 	XFS_BUF_BUSY(bp);
 	XFS_BUF_ASYNC(bp);
 	/*
 	 * Do an ordered write for the log block.
-	 *
-	 * It may not be needed to flush the first split block in the log wrap
-	 * case, but do it anyways to be safe -AK
+	 * Its unnecessary to flush the first split block in the log wrap case.
 	 */
-	if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
+	if (!split && (log->l_mp->m_flags & XFS_MOUNT_BARRIER))
 		XFS_BUF_ORDERED(bp);
 
 	ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
@@ -1460,7 +1459,7 @@ xlog_sync(xlog_t		*log,
 		return error;
 	}
 	if (split) {
-		bp		= iclog->ic_log->l_xbuf;
+		bp = iclog->ic_log->l_xbuf;
 		ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) ==
 							(unsigned long)1);
 		XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2);
@@ -1468,6 +1467,7 @@ xlog_sync(xlog_t		*log,
 		XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+
 					    (__psint_t)count), split);
 		XFS_BUF_SET_FSPRIVATE(bp, iclog);
+		XFS_BUF_ZEROFLAGS(bp);
 		XFS_BUF_BUSY(bp);
 		XFS_BUF_ASYNC(bp);
 		if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
-- 
cgit v1.2.2


From b2ea401bac39e75ebb64038609ed22efbc799905 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 28 Jul 2006 17:05:13 +1000
Subject: [XFS] Fix a barrier related forced shutdown on mounts with quota
 enabled.

SGI-PV: 912426
SGI-Modid: xfs-linux-melb:xfs-kern:26622a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/linux-2.6/xfs_super.c | 7 +++++++
 fs/xfs/xfs_vfsops.c          | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 9bdef9d519..4754f342a5 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -314,6 +314,13 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
 		return;
 	}
 
+	if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
+		xfs_fs_cmn_err(CE_NOTE, mp,
+		  "Disabling barriers, underlying device is readonly");
+		mp->m_flags &= ~XFS_MOUNT_BARRIER;
+		return;
+	}
+
 	error = xfs_barrier_test(mp);
 	if (error) {
 		xfs_fs_cmn_err(CE_NOTE, mp,
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 6c96391f3f..b427d220a1 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -515,7 +515,7 @@ xfs_mount(
 	if (error)
 		goto error2;
 
-	if ((mp->m_flags & XFS_MOUNT_BARRIER) && !(vfsp->vfs_flag & VFS_RDONLY))
+	if (mp->m_flags & XFS_MOUNT_BARRIER)
 		xfs_mountfs_check_barriers(mp);
 
 	error = XFS_IOINIT(vfsp, args, flags);
-- 
cgit v1.2.2


From 41ff715abc49324fb2cb20e66bc4e0290cfdbe51 Mon Sep 17 00:00:00 2001
From: Nathan Scott <nathans@sgi.com>
Date: Fri, 28 Jul 2006 17:05:51 +1000
Subject: [XFS] Ensure bulkstat from an invalid inode number gets caught always
 with EINVAL.

SGI-PV: 953819
SGI-Modid: xfs-linux-melb:xfs-kern:26629a

Signed-off-by: Nathan Scott <nathans@sgi.com>
---
 fs/xfs/xfs_inode.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 86c1bf0bba..1f8ecff855 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -334,10 +334,9 @@ xfs_itobp(
 #if !defined(__KERNEL__)
 	ni = 0;
 #elif defined(DEBUG)
-	ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 :
-		(BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog);
+	ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog;
 #else	/* usual case */
-	ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : 1;
+	ni = 1;
 #endif
 
 	for (i = 0; i < ni; i++) {
@@ -348,11 +347,15 @@ xfs_itobp(
 					(i << mp->m_sb.sb_inodelog));
 		di_ok = INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC &&
 			    XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT));
-		if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP,
-				 XFS_RANDOM_ITOBP_INOTOBP))) {
+		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
+						XFS_ERRTAG_ITOBP_INOTOBP,
+						XFS_RANDOM_ITOBP_INOTOBP))) {
+			if (imap_flags & XFS_IMAP_BULKSTAT) {
+				xfs_trans_brelse(tp, bp);
+				return XFS_ERROR(EINVAL);
+			}
 #ifdef DEBUG
-			if (!(imap_flags & XFS_IMAP_BULKSTAT))
-				cmn_err(CE_ALERT,
+			cmn_err(CE_ALERT,
 					"Device %s - bad inode magic/vsn "
 					"daddr %lld #%d (magic=%x)",
 				XFS_BUFTARG_NAME(mp->m_ddev_targp),
-- 
cgit v1.2.2


From 2ccb48ebb4de139eef4fcefd5f2bb823cb0d81b9 Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@suse.de>
Date: Sun, 30 Jul 2006 03:03:01 -0700
Subject: [PATCH] ext3: avoid triggering ext3_error on bad NFS file handle

The inode number out of an NFS file handle gets passed eventually to
ext3_get_inode_block() without any checking.  If ext3_get_inode_block()
allows it to trigger an error, then bad filehandles can have unpleasant
effect - ext3_error() will usually cause a forced read-only remount, or a
panic if `errors=panic' was used.

So remove the call to ext3_error there and put a matching check in
ext3/namei.c where inode numbers are read off storage.

[akpm@osdl.org: fix off-by-one error]
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: <stable@kernel.org>
Cc: "Stephen C. Tweedie" <sct@redhat.com>
Cc: Eric Sandeen <esandeen@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/inode.c | 13 +++++++------
 fs/ext3/namei.c | 15 +++++++++++++--
 2 files changed, 20 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index f804d5e9d6..ab034d3053 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2402,14 +2402,15 @@ static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
 	struct buffer_head *bh;
 	struct ext3_group_desc * gdp;
 
-
-	if ((ino != EXT3_ROOT_INO && ino != EXT3_JOURNAL_INO &&
-		ino != EXT3_RESIZE_INO && ino < EXT3_FIRST_INO(sb)) ||
-		ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) {
-		ext3_error(sb, "ext3_get_inode_block",
-			    "bad inode number: %lu", ino);
+	if (!ext3_valid_inum(sb, ino)) {
+		/*
+		 * This error is already checked for in namei.c unless we are
+		 * looking at an NFS filehandle, in which case no error
+		 * report is needed
+		 */
 		return 0;
 	}
+
 	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
 	if (block_group >= EXT3_SB(sb)->s_groups_count) {
 		ext3_error(sb,"ext3_get_inode_block","group >= groups count");
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index d9176dba36..2aa7101b27 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1000,7 +1000,12 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
 	if (bh) {
 		unsigned long ino = le32_to_cpu(de->inode);
 		brelse (bh);
-		inode = iget(dir->i_sb, ino);
+		if (!ext3_valid_inum(dir->i_sb, ino)) {
+			ext3_error(dir->i_sb, "ext3_lookup",
+				   "bad inode number: %lu", ino);
+			inode = NULL;
+		} else
+			inode = iget(dir->i_sb, ino);
 
 		if (!inode)
 			return ERR_PTR(-EACCES);
@@ -1028,7 +1033,13 @@ struct dentry *ext3_get_parent(struct dentry *child)
 		return ERR_PTR(-ENOENT);
 	ino = le32_to_cpu(de->inode);
 	brelse(bh);
-	inode = iget(child->d_inode->i_sb, ino);
+
+	if (!ext3_valid_inum(child->d_inode->i_sb, ino)) {
+		ext3_error(child->d_inode->i_sb, "ext3_get_parent",
+			   "bad inode number: %lu", ino);
+		inode = NULL;
+	} else
+		inode = iget(child->d_inode->i_sb, ino);
 
 	if (!inode)
 		return ERR_PTR(-EACCES);
-- 
cgit v1.2.2


From d1bbf14f37261c2c0dba71404602e1ddcec069d2 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sun, 30 Jul 2006 03:03:16 -0700
Subject: [PATCH] knfsd: Fix stale file handle problem with subtree_checking.

A recent commit (7fc90ec93a5eb71f4b08403baf5ba7176b3ec6b1) moved the
call to nfsd_setuser out of the 'find a dentry for a filehandle' branch
of fh_verify so that it would always be called.

This had the unfortunately side-effect of moving *after* the call to
decode_fh, so the prober fsuid was not set when nfsd_acceptable was called,
the 'permission' check did the wrong thing.

This patch moves the nfsd_setuser call back where it was, and add as call
in the other branch of the if.

Cc: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfsfh.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index ecc439d256..501d838845 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -187,6 +187,11 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 			goto out;
 		}
 
+		/* Set user creds for this exportpoint */
+		error = nfserrno(nfsd_setuser(rqstp, exp));
+		if (error)
+			goto out;
+
 		/*
 		 * Look up the dentry using the NFS file handle.
 		 */
@@ -241,16 +246,17 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 		dprintk("nfsd: fh_verify - just checking\n");
 		dentry = fhp->fh_dentry;
 		exp = fhp->fh_export;
+		/* Set user creds for this exportpoint; necessary even
+		 * in the "just checking" case because this may be a
+		 * filehandle that was created by fh_compose, and that
+		 * is about to be used in another nfsv4 compound
+		 * operation */
+		error = nfserrno(nfsd_setuser(rqstp, exp));
+		if (error)
+			goto out;
 	}
 	cache_get(&exp->h);
 
-	/* Set user creds for this exportpoint; necessary even in the "just
-	 * checking" case because this may be a filehandle that was created by
-	 * fh_compose, and that is about to be used in another nfsv4 compound
-	 * operation */
-	error = nfserrno(nfsd_setuser(rqstp, exp));
-	if (error)
-		goto out;
 
 	error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type);
 	if (error)
-- 
cgit v1.2.2


From 0e1dfc66b6ec94984a4778132147a8aa36461d58 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 30 Jul 2006 03:03:28 -0700
Subject: [PATCH] invalidate_bdev() speedup

We can immediately bail from invalidate_bdev() if the blockdev has no
pagecache.

This solves the huge IPI storms which hald is causing on the big ia64
machines when it polls CDROM drives.

Acked-by: Jes Sorensen <jes@sgi.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 3660dcb975..71649ef9b6 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -470,13 +470,18 @@ out:
    pass does the actual I/O. */
 void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers)
 {
+	struct address_space *mapping = bdev->bd_inode->i_mapping;
+
+	if (mapping->nrpages == 0)
+		return;
+
 	invalidate_bh_lrus();
 	/*
 	 * FIXME: what about destroy_dirty_buffers?
 	 * We really want to use invalidate_inode_pages2() for
 	 * that, but not until that's cleaned up.
 	 */
-	invalidate_inode_pages(bdev->bd_inode->i_mapping);
+	invalidate_inode_pages(mapping);
 }
 
 /*
-- 
cgit v1.2.2


From cfa224e928f782e1593b5222688fad84c2cad3e8 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olh@suse.de>
Date: Sun, 30 Jul 2006 03:03:51 -0700
Subject: [PATCH] enable mac partition label per default on pmac

Enable mac partition table support per default also for a powermac config.

Signed-off-by: Olaf Hering <olh@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/partitions/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig
index c9a4780992..e478f19418 100644
--- a/fs/partitions/Kconfig
+++ b/fs/partitions/Kconfig
@@ -99,7 +99,7 @@ config IBM_PARTITION
 
 config MAC_PARTITION
 	bool "Macintosh partition map support" if PARTITION_ADVANCED
-	default y if MAC
+	default y if (MAC || PPC_PMAC)
 	help
 	  Say Y here if you would like to use hard disks under Linux which
 	  were partitioned on a Macintosh.
-- 
cgit v1.2.2


From 5b6509aa8c2f292caea7c0602ec361f920951508 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 30 Jul 2006 03:03:54 -0700
Subject: [PATCH] inotify: fix deadlock found by lockdep

This is a real deadlock, a nice complex one:
(warning: long explanation follows so that Andrew can have a complete
patch description)

it's an ABCDA deadlock:

A iprune_mutex
B inode->inotify_mutex
C ih->mutex
D dev->ev_mutex

The AB relationship comes straight from invalidate_inodes()

int invalidate_inodes(struct super_block * sb)
{
        int busy;
        LIST_HEAD(throw_away);

        mutex_lock(&iprune_mutex);
        spin_lock(&inode_lock);
        inotify_unmount_inodes(&sb->s_inodes);

where inotify_umount_inodes() takes the
                mutex_lock(&inode->inotify_mutex);

The BC relationship comes directly from inotify_find_update_watch():
s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
                              u32 mask)
{
   ...
        mutex_lock(&inode->inotify_mutex);
        mutex_lock(&ih->mutex);

The CD relationship comes from inotify_rm_wd:
inotify_rm_wd does
        mutex_lock(&inode->inotify_mutex);
        mutex_lock(&ih->mutex)
and then calls inotify_remove_watch_locked() which calls
notify_dev_queue_event() which does
	        mutex_lock(&dev->ev_mutex);

(this strictly is a BCD relationship)

The DA relationship comes from the most interesting part:

  [<ffffffff8022d9f2>] shrink_icache_memory+0x42/0x270
  [<ffffffff80240dc4>] shrink_slab+0x11d/0x1c9
  [<ffffffff802b5104>] try_to_free_pages+0x187/0x244
  [<ffffffff8020efed>] __alloc_pages+0x1cd/0x2e0
  [<ffffffff8025e1f8>] cache_alloc_refill+0x3f8/0x821
  [<ffffffff8020a5e5>] kmem_cache_alloc+0x85/0xcb
  [<ffffffff802db027>] kernel_event+0x2e/0x122
  [<ffffffff8021d61c>] inotify_dev_queue_event+0xcc/0x140

inotify_dev_queue_event schedules a kernel_event which does a
kmem_cache_alloc( , GFP_KERNEL) which may try to shrink slabs, including
the inode cache .. which then takes iprune_mutex.

And voila, there is an AB, a BC, a CD relationship (even a direct BCD),
and also now a DA relationship -> a circular type AB-BA deadlock but
involving 4 locks.

The solution is simple: kernel_event() is NOT allowed to use GFP_KERNEL,
but must use GFP_NOFS to not cause recursion into the VFS.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Robert Love <rml@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inotify_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index f2386442ad..017cb0f134 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -187,7 +187,7 @@ static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
 {
 	struct inotify_kernel_event *kevent;
 
-	kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL);
+	kevent = kmem_cache_alloc(event_cachep, GFP_NOFS);
 	if (unlikely(!kevent))
 		return NULL;
 
-- 
cgit v1.2.2


From 6ecbc4e1a395062a8e99e4f5fe328f6ba166d9c8 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josht@us.ibm.com>
Date: Sun, 30 Jul 2006 03:03:56 -0700
Subject: [PATCH] Remove incorrect unlock_kernel from allocation failure path
 in coda_open()

Commit 398c53a757702e1e3a7a2c24860c7ad26acb53ed (in the historical GIT
tree) moved the lock_kernel() in coda_open after the allocation of a
coda_file_info struct, but left an unlock_kernel() in the allocation
failure error path; remove it.

Signed-off-by: Josh Triplett <josh@freedesktop.org>
Acked-by: Jan Harkes <jaharkes@cs.cmu.edu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/coda/file.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/coda/file.c b/fs/coda/file.c
index cc66c681bd..dbfbcfa5b3 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -136,10 +136,8 @@ int coda_open(struct inode *coda_inode, struct file *coda_file)
 	coda_vfs_stat.open++;
 
 	cfi = kmalloc(sizeof(struct coda_file_info), GFP_KERNEL);
-	if (!cfi) {
-		unlock_kernel();
+	if (!cfi)
 		return -ENOMEM;
-	}
 
 	lock_kernel();
 
-- 
cgit v1.2.2


From 0aa9e4f147880b2d7d1eef1f0b45112af0e36f9f Mon Sep 17 00:00:00 2001
From: Josh Triplett <josht@us.ibm.com>
Date: Sun, 30 Jul 2006 03:03:58 -0700
Subject: [PATCH] efs: Remove incorrect unlock_kernel from failure path in
 efs_symlink_readpage()

If efs_symlink_readpage hits the -ENAMETOOLONG error path, it will call
unlock_kernel without ever having called lock_kernel(); fix this by
creating and jumping to a new label fail_notlocked rather than the fail
label used after calling lock_kernel().

Signed-off-by: Josh Triplett <josh@freedesktop.org>
Cc: Marcelo Tosatti <marcelo.tosatti@cyclades.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/efs/symlink.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c
index e249cf733a..1d30d2ff44 100644
--- a/fs/efs/symlink.c
+++ b/fs/efs/symlink.c
@@ -22,7 +22,7 @@ static int efs_symlink_readpage(struct file *file, struct page *page)
   
 	err = -ENAMETOOLONG;
 	if (size > 2 * EFS_BLOCKSIZE)
-		goto fail;
+		goto fail_notlocked;
   
 	lock_kernel();
 	/* read first 512 bytes of link target */
@@ -47,6 +47,7 @@ static int efs_symlink_readpage(struct file *file, struct page *page)
 	return 0;
 fail:
 	unlock_kernel();
+fail_notlocked:
 	SetPageError(page);
 	kunmap(page);
 	unlock_page(page);
-- 
cgit v1.2.2


From 344fe78669d2d1cff9e8939598f6d0d865b6a75b Mon Sep 17 00:00:00 2001
From: Josh Triplett <josht@us.ibm.com>
Date: Sun, 30 Jul 2006 03:03:59 -0700
Subject: [PATCH] ufs: remove incorrect unlock_kernel from failure path in
 ufs_symlink()

ufs_symlink, in one of its error paths, calls unlock_kernel without ever
having called lock_kernel(); fix this by creating and jumping to a new
label out_notlocked rather than the out label used after calling
lock_kernel().

Signed-off-by: Josh Triplett <josh@freedesktop.org>
Cc: Evgeniy Dushistov <dushistov@mail.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ufs/namei.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index abd5f23a42..d344b411e2 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -129,7 +129,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
 	struct inode * inode;
 
 	if (l > sb->s_blocksize)
-		goto out;
+		goto out_notlocked;
 
 	lock_kernel();
 	inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO);
@@ -155,6 +155,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
 	err = ufs_add_nondir(dentry, inode);
 out:
 	unlock_kernel();
+out_notlocked:
 	return err;
 
 out_fail:
-- 
cgit v1.2.2


From 685d16ddb07b74537fb18972784e6214840fdd20 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Sun, 30 Jul 2006 03:04:08 -0700
Subject: [PATCH] fuse: fix zero timeout

An attribute and entry timeout of zero should mean, that the entity is
invalidated immediately after the operation.  Previously invalidation only
happened at the next clock tick.

Reported and tested by Craig Davies.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 72a74cde6d..6db66ec386 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -25,8 +25,11 @@
  */
 static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec)
 {
-	struct timespec ts = {sec, nsec};
-	return jiffies + timespec_to_jiffies(&ts);
+	if (sec || nsec) {
+		struct timespec ts = {sec, nsec};
+		return jiffies + timespec_to_jiffies(&ts);
+	} else
+		return jiffies - 1;
 }
 
 /*
-- 
cgit v1.2.2


From 0a0898cf413876d4ed6e371f3e04bf38600a9205 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Sun, 30 Jul 2006 03:04:10 -0700
Subject: [PATCH] fuse: use jiffies_64

It is entirely possible (though rare) that jiffies half-wraps around, while a
dentry/inode remains in the cache.  This could mean that the dentry/inode is
not invalidated for another half wraparound-time.

To get around this problem, use 64-bit jiffies.  The only problem with this is
that dentry->d_time is 32 bits on 32-bit archs.  So use d_fsdata as the high
32 bits.  This is an ugly hack, but far simpler, than having to allocate
private data just for this purpose.

Since 64-bit jiffies can be assumed never to wrap around, simple comparison
can be used, and a zero time value can represent "invalid".

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c    | 44 ++++++++++++++++++++++++++++++++++++--------
 fs/fuse/fuse_i.h |  2 +-
 fs/fuse/inode.c  |  2 +-
 3 files changed, 38 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 6db66ec386..409ce6a7cc 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -14,6 +14,33 @@
 #include <linux/sched.h>
 #include <linux/namei.h>
 
+#if BITS_PER_LONG >= 64
+static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
+{
+	entry->d_time = time;
+}
+
+static inline u64 fuse_dentry_time(struct dentry *entry)
+{
+	return entry->d_time;
+}
+#else
+/*
+ * On 32 bit archs store the high 32 bits of time in d_fsdata
+ */
+static void fuse_dentry_settime(struct dentry *entry, u64 time)
+{
+	entry->d_time = time;
+	entry->d_fsdata = (void *) (unsigned long) (time >> 32);
+}
+
+static u64 fuse_dentry_time(struct dentry *entry)
+{
+	return (u64) entry->d_time +
+		((u64) (unsigned long) entry->d_fsdata << 32);
+}
+#endif
+
 /*
  * FUSE caches dentries and attributes with separate timeout.  The
  * time in jiffies until the dentry/attributes are valid is stored in
@@ -23,13 +50,13 @@
 /*
  * Calculate the time in jiffies until a dentry/attributes are valid
  */
-static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec)
+static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
 {
 	if (sec || nsec) {
 		struct timespec ts = {sec, nsec};
-		return jiffies + timespec_to_jiffies(&ts);
+		return get_jiffies_64() + timespec_to_jiffies(&ts);
 	} else
-		return jiffies - 1;
+		return 0;
 }
 
 /*
@@ -38,7 +65,8 @@ static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec)
  */
 static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o)
 {
-	entry->d_time = time_to_jiffies(o->entry_valid, o->entry_valid_nsec);
+	fuse_dentry_settime(entry,
+		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
 	if (entry->d_inode)
 		get_fuse_inode(entry->d_inode)->i_time =
 			time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
@@ -50,7 +78,7 @@ static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o)
  */
 void fuse_invalidate_attr(struct inode *inode)
 {
-	get_fuse_inode(inode)->i_time = jiffies - 1;
+	get_fuse_inode(inode)->i_time = 0;
 }
 
 /*
@@ -63,7 +91,7 @@ void fuse_invalidate_attr(struct inode *inode)
  */
 static void fuse_invalidate_entry_cache(struct dentry *entry)
 {
-	entry->d_time = jiffies - 1;
+	fuse_dentry_settime(entry, 0);
 }
 
 /*
@@ -105,7 +133,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 
 	if (inode && is_bad_inode(inode))
 		return 0;
-	else if (time_after(jiffies, entry->d_time)) {
+	else if (fuse_dentry_time(entry) < get_jiffies_64()) {
 		int err;
 		struct fuse_entry_out outarg;
 		struct fuse_conn *fc;
@@ -669,7 +697,7 @@ static int fuse_revalidate(struct dentry *entry)
 	if (!fuse_allow_task(fc, current))
 		return -EACCES;
 	if (get_node_id(inode) != FUSE_ROOT_ID &&
-	    time_before_eq(jiffies, fi->i_time))
+	    fi->i_time >= get_jiffies_64())
 		return 0;
 
 	return fuse_do_getattr(inode);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 0dbf966218..69c7750d55 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -59,7 +59,7 @@ struct fuse_inode {
 	struct fuse_req *forget_req;
 
 	/** Time in jiffies until the file attributes are valid */
-	unsigned long i_time;
+	u64 i_time;
 };
 
 /** FUSE specific file data */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index dcaaabd3b9..7d25092262 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -51,7 +51,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 		return NULL;
 
 	fi = get_fuse_inode(inode);
-	fi->i_time = jiffies - 1;
+	fi->i_time = 0;
 	fi->nodeid = 0;
 	fi->nlookup = 0;
 	fi->forget_req = fuse_request_alloc();
-- 
cgit v1.2.2


From 873302c71c0e60234eb187b15f83c2d79e84c40a Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Sun, 30 Jul 2006 03:04:10 -0700
Subject: [PATCH] fuse: fix typo

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/control.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index a3bce3a772..46fe60b2da 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -105,7 +105,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
 
 /*
  * Add a connection to the control filesystem (if it exists).  Caller
- * must host fuse_mutex
+ * must hold fuse_mutex
  */
 int fuse_ctl_add_conn(struct fuse_conn *fc)
 {
@@ -139,7 +139,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
 
 /*
  * Remove a connection from the control filesystem (if it exists).
- * Caller must host fuse_mutex
+ * Caller must hold fuse_mutex
  */
 void fuse_ctl_remove_conn(struct fuse_conn *fc)
 {
-- 
cgit v1.2.2


From bc65ac6a0ffc66c56d1e6893685d7fe87c63cc44 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josht@us.ibm.com>
Date: Sun, 30 Jul 2006 03:04:12 -0700
Subject: [PATCH] freevxfs: Add missing lock_kernel() to vxfs_readdir

Commit 7b2fd697427e73c81d5fa659efd91bd07d303b0e in the historical GIT tree
stopped calling the readdir member of a file_operations struct with the big
kernel lock held, and fixed up all the readdir functions to do their own
locking.  However, that change added calls to unlock_kernel() in
vxfs_readdir, but no call to lock_kernel().  Fix this by adding a call to
lock_kernel().

Signed-off-by: Josh Triplett <josh@freedesktop.org>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/freevxfs/vxfs_lookup.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 29cce456c7..43886fa00a 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -246,6 +246,8 @@ vxfs_readdir(struct file *fp, void *retp, filldir_t filler)
 	u_long			page, npages, block, pblocks, nblocks, offset;
 	loff_t			pos;
 
+	lock_kernel();
+
 	switch ((long)fp->f_pos) {
 	case 0:
 		if (filler(retp, ".", 1, fp->f_pos, ip->i_ino, DT_DIR) < 0)
-- 
cgit v1.2.2


From 0e31f51d8177320d61ec5786ca4aafa7b7a749b4 Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Sun, 30 Jul 2006 03:04:14 -0700
Subject: [PATCH] ext3 -nobh option causes oops

For files other than IFREG, nobh option doesn't make sense.  Modifications
to them are journalled and needs buffer heads to do that.  Without this
patch, we get kernel oops in page_buffers().

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ab034d3053..c5ee9f0691 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1158,7 +1158,7 @@ retry:
 		ret = PTR_ERR(handle);
 		goto out;
 	}
-	if (test_opt(inode->i_sb, NOBH))
+	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
 		ret = nobh_prepare_write(page, from, to, ext3_get_block);
 	else
 		ret = block_prepare_write(page, from, to, ext3_get_block);
@@ -1244,7 +1244,7 @@ static int ext3_writeback_commit_write(struct file *file, struct page *page,
 	if (new_i_size > EXT3_I(inode)->i_disksize)
 		EXT3_I(inode)->i_disksize = new_i_size;
 
-	if (test_opt(inode->i_sb, NOBH))
+	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
 		ret = nobh_commit_write(file, page, from, to);
 	else
 		ret = generic_commit_write(file, page, from, to);
@@ -1494,7 +1494,7 @@ static int ext3_writeback_writepage(struct page *page,
 		goto out_fail;
 	}
 
-	if (test_opt(inode->i_sb, NOBH))
+	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
 		ret = nobh_writepage(page, ext3_get_block, wbc);
 	else
 		ret = block_write_full_page(page, ext3_get_block, wbc);
-- 
cgit v1.2.2


From 4c90c68aca278f425afc0b48d86298b960fbc0ce Mon Sep 17 00:00:00 2001
From: Russ Ross <russross@gmail.com>
Date: Sun, 30 Jul 2006 03:04:15 -0700
Subject: [PATCH] 9p: fix marshalling bug in tcreate with empty extension field

Signed-off-by: Russ Ross <russross@gmail.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/conv.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index 1e898144eb..56d88c1a09 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -673,8 +673,10 @@ struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
 	struct cbuf *bufp = &buffer;
 
 	size = 4 + 2 + strlen(name) + 4 + 1;	/* fid[4] name[s] perm[4] mode[1] */
-	if (extended && extension!=NULL)
-		size += 2 + strlen(extension);	/* extension[s] */
+	if (extended) {
+		size += 2 +			/* extension[s] */
+		    (extension == NULL ? 0 : strlen(extension));
+	}
 
 	fc = v9fs_create_common(bufp, size, TCREATE);
 	if (IS_ERR(fc))
-- 
cgit v1.2.2


From 834a9b8ca7a01c34570be021f88e18884a29f048 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@hera.kernel.org>
Date: Sun, 30 Jul 2006 03:04:16 -0700
Subject: [PATCH] 9p: fix fid behavior on failed remove

Based on a bug report from Russ Ross <russruss@gmail.com>

According to the spec:

"The remove request asks the file server both to remove the file
 represented by fid and to clunk the fid, even if the remove fails."

but the Linux client seems to expect the fid to be valid after a failed
remove attempt.  Specifically, I'm getting this behavior when attempting to
remove a non-empty directory.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/9p/vfs_inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 2f580a197b..eae50c9d6d 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -434,11 +434,11 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
 	result = v9fs_t_remove(v9ses, fid, &fcall);
 	if (result < 0) {
 		PRINT_FCALL_ERROR("remove fails", fcall);
-	} else {
-		v9fs_put_idpool(fid, &v9ses->fidpool);
-		v9fs_fid_destroy(v9fid);
 	}
 
+	v9fs_put_idpool(fid, &v9ses->fidpool);
+	v9fs_fid_destroy(v9fid);
+
 	kfree(fcall);
 	return result;
 }
-- 
cgit v1.2.2