From e051fda4fd14fe878e6d2183b3a4640febe9e9a8 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 1 Feb 2007 11:40:16 -0800
Subject: ocfs2: ocfs2_link() journal credits update

Commit 592282cf2eaa33409c6511ddd3f3ecaa57daeaaa fixed some missing directory
c/mtime updates in part by introducing a dinode update in ocfs2_add_entry().
Unfortunately, ocfs2_link() (which didn't update the directory inode before)
is now missing a single journal credit. Fix this by doubling the number of
inode updates expected during hard link creation.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/journal.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index e1216364d191..d026b4f27757 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -306,8 +306,8 @@ int                  ocfs2_journal_dirty_data(handle_t *handle,
  * for the dinode, one for the new block. */
 #define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2)
 
-/* file update (nlink, etc) + dir entry block */
-#define OCFS2_LINK_CREDITS  (OCFS2_INODE_UPDATE_CREDITS + 1)
+/* file update (nlink, etc) + directory mtime/ctime + dir entry block */
+#define OCFS2_LINK_CREDITS  (2*OCFS2_INODE_UPDATE_CREDITS + 1)
 
 /* inode + dir inode (if we unlink a dir), + dir entry block + orphan
  * dir inode link */
-- 
cgit v1.2.2


From fc2dd2e51a1940acac665696e6a70a1a73dc90a4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 1 Feb 2007 13:52:43 +0000
Subject: [PATCH] endianness bug: ntohl() misspelled as >> 24 in fh_verify().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfsd/nfsfh.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 98338a569dc0..c59d6fbb7a6b 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -269,7 +269,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 			"acc=%x, error=%d\n",
 			dentry->d_parent->d_name.name,
 			dentry->d_name.name,
-			access, (error >> 24));
+			access, ntohl(error));
 	}
 out:
 	if (exp && !IS_ERR(exp))
-- 
cgit v1.2.2


From 7d8952440f4090522b740257f1c6b2cf96413969 Mon Sep 17 00:00:00 2001
From: Guillaume Chazarain <guichaz@yahoo.fr>
Date: Wed, 31 Jan 2007 23:48:14 -0800
Subject: [PATCH] procfs: Fix listing of /proc/NOT_A_TGID/task

Listing /proc/PID/task were PID is not a TGID should not result in
duplicated entries.

	[g ~]$ pidof thunderbird-bin
	2751
	[g ~]$ ls /proc/2751/task
	2751  2770  2771  2824  2826  2834  2835  2851  2853
	[g ~]$ ls /proc/2770/task
	2751  2770  2771  2824  2826  2834  2835  2851  2853
	2770  2771  2824  2826  2834  2835  2851  2853
	[g ~]$

Signed-off-by: Guillaume Chazarain <guichaz@yahoo.fr>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index ff7a66850602..1a979ea3b379 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2328,13 +2328,23 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
 {
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
-	struct task_struct *leader = get_proc_task(inode);
+	struct task_struct *leader = NULL;
 	struct task_struct *task;
 	int retval = -ENOENT;
 	ino_t ino;
 	int tid;
 	unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */
 
+	task = get_proc_task(inode);
+	if (!task)
+		goto out_no_task;
+	rcu_read_lock();
+	if (pid_alive(task)) {
+		leader = task->group_leader;
+		get_task_struct(leader);
+	}
+	rcu_read_unlock();
+	put_task_struct(task);
 	if (!leader)
 		goto out_no_task;
 	retval = 0;
-- 
cgit v1.2.2


From dee11c2364f51cac53df17d742a0c69097e29a4e Mon Sep 17 00:00:00 2001
From: Ken Chen <kenchen@google.com>
Date: Sat, 3 Feb 2007 01:13:45 -0800
Subject: [PATCH] aio: fix buggy put_ioctx call in aio_complete - v2

An AIO bug was reported that sleeping function is being called in softirq
context:

BUG: warning at kernel/mutex.c:132/__mutex_lock_common()
Call Trace:
     [<a000000100577b00>] __mutex_lock_slowpath+0x640/0x6c0
     [<a000000100577ba0>] mutex_lock+0x20/0x40
     [<a0000001000a25b0>] flush_workqueue+0xb0/0x1a0
     [<a00000010018c0c0>] __put_ioctx+0xc0/0x240
     [<a00000010018d470>] aio_complete+0x2f0/0x420
     [<a00000010019cc80>] finished_one_bio+0x200/0x2a0
     [<a00000010019d1c0>] dio_bio_complete+0x1c0/0x200
     [<a00000010019d260>] dio_bio_end_aio+0x60/0x80
     [<a00000010014acd0>] bio_endio+0x110/0x1c0
     [<a0000001002770e0>] __end_that_request_first+0x180/0xba0
     [<a000000100277b90>] end_that_request_chunk+0x30/0x60
     [<a0000002073c0c70>] scsi_end_request+0x50/0x300 [scsi_mod]
     [<a0000002073c1240>] scsi_io_completion+0x200/0x8a0 [scsi_mod]
     [<a0000002074729b0>] sd_rw_intr+0x330/0x860 [sd_mod]
     [<a0000002073b3ac0>] scsi_finish_command+0x100/0x1c0 [scsi_mod]
     [<a0000002073c2910>] scsi_softirq_done+0x230/0x300 [scsi_mod]
     [<a000000100277d20>] blk_done_softirq+0x160/0x1c0
     [<a000000100083e00>] __do_softirq+0x200/0x240
     [<a000000100083eb0>] do_softirq+0x70/0xc0

See report: http://marc.theaimsgroup.com/?l=linux-kernel&m=116599593200888&w=2

flush_workqueue() is not allowed to be called in the softirq context.
However, aio_complete() called from I/O interrupt can potentially call
put_ioctx with last ref count on ioctx and triggers bug.  It is simply
incorrect to perform ioctx freeing from aio_complete.

The bug is trigger-able from a race between io_destroy() and aio_complete().
A possible scenario:

cpu0                               cpu1
io_destroy                         aio_complete
  wait_for_all_aios {                __aio_put_req
     ...                                 ctx->reqs_active--;
     if (!ctx->reqs_active)
        return;
  }
  ...
  put_ioctx(ioctx)

                                     put_ioctx(ctx);
                                        __put_ioctx
                                          bam! Bug trigger!

The real problem is that the condition check of ctx->reqs_active in
wait_for_all_aios() is incorrect that access to reqs_active is not
being properly protected by spin lock.

This patch adds that protective spin lock, and at the same time removes
all duplicate ref counting for each kiocb as reqs_active is already used
as a ref count for each active ioctx.  This also ensures that buggy call
to flush_workqueue() in softirq context is eliminated.

Signed-off-by: "Ken Chen" <kenchen@google.com>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: Suparna Bhattacharya <suparna@in.ibm.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: <stable@kernel.org>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index ee20fc4240e0..55991e4132a7 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -298,17 +298,23 @@ static void wait_for_all_aios(struct kioctx *ctx)
 	struct task_struct *tsk = current;
 	DECLARE_WAITQUEUE(wait, tsk);
 
+	spin_lock_irq(&ctx->ctx_lock);
 	if (!ctx->reqs_active)
-		return;
+		goto out;
 
 	add_wait_queue(&ctx->wait, &wait);
 	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 	while (ctx->reqs_active) {
+		spin_unlock_irq(&ctx->ctx_lock);
 		schedule();
 		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+		spin_lock_irq(&ctx->ctx_lock);
 	}
 	__set_task_state(tsk, TASK_RUNNING);
 	remove_wait_queue(&ctx->wait, &wait);
+
+out:
+	spin_unlock_irq(&ctx->ctx_lock);
 }
 
 /* wait_on_sync_kiocb:
@@ -424,7 +430,6 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx)
 	ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0);
 	if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) {
 		list_add(&req->ki_list, &ctx->active_reqs);
-		get_ioctx(ctx);
 		ctx->reqs_active++;
 		okay = 1;
 	}
@@ -536,8 +541,6 @@ int fastcall aio_put_req(struct kiocb *req)
 	spin_lock_irq(&ctx->ctx_lock);
 	ret = __aio_put_req(ctx, req);
 	spin_unlock_irq(&ctx->ctx_lock);
-	if (ret)
-		put_ioctx(ctx);
 	return ret;
 }
 
@@ -779,8 +782,7 @@ static int __aio_run_iocbs(struct kioctx *ctx)
 		 */
 		iocb->ki_users++;       /* grab extra reference */
 		aio_run_iocb(iocb);
-		if (__aio_put_req(ctx, iocb))  /* drop extra ref */
-			put_ioctx(ctx);
+		__aio_put_req(ctx, iocb);
  	}
 	if (!list_empty(&ctx->run_list))
 		return 1;
@@ -997,14 +999,10 @@ put_rq:
 	/* everything turned out well, dispose of the aiocb. */
 	ret = __aio_put_req(ctx, iocb);
 
-	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
-
 	if (waitqueue_active(&ctx->wait))
 		wake_up(&ctx->wait);
 
-	if (ret)
-		put_ioctx(ctx);
-
+	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 	return ret;
 }
 
-- 
cgit v1.2.2


From b2e895dbd80c420bfc0937c3729b4afe073b3848 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 3 Feb 2007 01:14:01 -0800
Subject: [PATCH] revert blockdev direct io back to 2.6.19 version

Andrew Vasquez is reporting as-iosched oopses and a 65% throughput
slowdown due to the recent special-casing of direct-io against
blockdevs.  We don't know why either of these things are occurring.

The patch minimally reverts us back to the 2.6.19 code for a 2.6.20
release.

Cc: Andrew Vasquez <andrew.vasquez@qlogic.com>
Cc: Ken Chen <kenchen@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/block_dev.c | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index d9bdf2b3ade2..fc7028b685f2 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -129,6 +129,46 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
 	return 0;
 }
 
+static int
+blkdev_get_blocks(struct inode *inode, sector_t iblock,
+		struct buffer_head *bh, int create)
+{
+	sector_t end_block = max_block(I_BDEV(inode));
+	unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
+
+	if ((iblock + max_blocks) > end_block) {
+		max_blocks = end_block - iblock;
+		if ((long)max_blocks <= 0) {
+			if (create)
+				return -EIO;	/* write fully beyond EOF */
+			/*
+			 * It is a read which is fully beyond EOF.  We return
+			 * a !buffer_mapped buffer
+			 */
+			max_blocks = 0;
+		}
+	}
+
+	bh->b_bdev = I_BDEV(inode);
+	bh->b_blocknr = iblock;
+	bh->b_size = max_blocks << inode->i_blkbits;
+	if (max_blocks)
+		set_buffer_mapped(bh);
+	return 0;
+}
+
+static ssize_t
+blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+			loff_t offset, unsigned long nr_segs)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+
+	return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode),
+				iov, offset, nr_segs, blkdev_get_blocks, NULL);
+}
+
+#if 0
 static int blk_end_aio(struct bio *bio, unsigned int bytes_done, int error)
 {
 	struct kiocb *iocb = bio->bi_private;
@@ -323,6 +363,7 @@ backout:
 		return PTR_ERR(page);
 	goto completion;
 }
+#endif
 
 static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
 {
-- 
cgit v1.2.2