From 72f465033702ebfe20db8f50edaad59f0f38b0f5 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 23 Aug 2010 13:35:09 +0200
Subject: bio-integrity.c: remove dependency on __GFP_NOFAIL

The kmalloc() in bio_integrity_prep() is failable, so remove __GFP_NOFAIL
from its mask.

Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/bio-integrity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 612a5c38d3c1..a8f4cc679983 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -413,7 +413,7 @@ int bio_integrity_prep(struct bio *bio)
 
 	/* Allocate kernel buffer for protection data */
 	len = sectors * blk_integrity_tuple_size(bi);
-	buf = kmalloc(len, GFP_NOIO | __GFP_NOFAIL | q->bounce_gfp);
+	buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
 	if (unlikely(buf == NULL)) {
 		printk(KERN_ERR "could not allocate integrity buffer\n");
 		return -EIO;
-- 
cgit v1.2.2


From 220eb7fd984bfc7e6b4005fdf32efe9cd8af7cf2 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 23 Aug 2010 13:36:20 +0200
Subject: fs/bio-integrity.c: return -ENOMEM on kmalloc failure

Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/bio-integrity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index a8f4cc679983..4d0ff5ee27b8 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -416,7 +416,7 @@ int bio_integrity_prep(struct bio *bio)
 	buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
 	if (unlikely(buf == NULL)) {
 		printk(KERN_ERR "could not allocate integrity buffer\n");
-		return -EIO;
+		return -ENOMEM;
 	}
 
 	end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-- 
cgit v1.2.2


From b76b4014f9d988d2412b873e4d4c13c7f9afc4e4 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Sat, 28 Aug 2010 08:52:10 +0200
Subject: writeback: Fix lost wake-up shutting down writeback thread

Setting the task state here may cause us to miss the wake up from
kthread_stop(), so we need to recheck kthread_should_stop() or risk
sleeping forever in the following schedule().

Symptom was an indefinite hang on an NFSv4 mount.  (NFSv4 may create
multiple mounts in a temporary namespace while traversing the mount
path, and since the temporary namespace is immediately destroyed, it may
end up destroying a mount very soon after it was created, possibly
making this race more likely.)

INFO: task mount.nfs4:4314 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
mount.nfs4    D 0000000000000000  2880  4314   4313 0x00000000
 ffff88001ed6da28 0000000000000046 ffff88001ed6dfd8 ffff88001ed6dfd8
 ffff88001ed6c000 ffff88001ed6c000 ffff88001ed6c000 ffff88001e5003a0
 ffff88001ed6dfd8 ffff88001e5003a8 ffff88001ed6c000 ffff88001ed6dfd8
Call Trace:
 [<ffffffff8196090d>] schedule_timeout+0x1cd/0x2e0
 [<ffffffff8106a31c>] ? mark_held_locks+0x6c/0xa0
 [<ffffffff819639a0>] ? _raw_spin_unlock_irq+0x30/0x60
 [<ffffffff8106a5fd>] ? trace_hardirqs_on_caller+0x14d/0x190
 [<ffffffff819671fe>] ? sub_preempt_count+0xe/0xd0
 [<ffffffff8195fc80>] wait_for_common+0x120/0x190
 [<ffffffff81033c70>] ? default_wake_function+0x0/0x20
 [<ffffffff8195fdcd>] wait_for_completion+0x1d/0x20
 [<ffffffff810595fa>] kthread_stop+0x4a/0x150
 [<ffffffff81061a60>] ? thaw_process+0x70/0x80
 [<ffffffff810cc68a>] bdi_unregister+0x10a/0x1a0
 [<ffffffff81229dc9>] nfs_put_super+0x19/0x20
 [<ffffffff810ee8c4>] generic_shutdown_super+0x54/0xe0
 [<ffffffff810ee9b6>] kill_anon_super+0x16/0x60
 [<ffffffff8122d3b9>] nfs4_kill_super+0x39/0x90
 [<ffffffff810eda45>] deactivate_locked_super+0x45/0x60
 [<ffffffff810edfb9>] deactivate_super+0x49/0x70
 [<ffffffff81108294>] mntput_no_expire+0x84/0xe0
 [<ffffffff811084ef>] release_mounts+0x9f/0xc0
 [<ffffffff81108575>] put_mnt_ns+0x65/0x80
 [<ffffffff8122cc56>] nfs_follow_remote_path+0x1e6/0x420
 [<ffffffff8122cfbf>] nfs4_try_mount+0x6f/0xd0
 [<ffffffff8122d0c2>] nfs4_get_sb+0xa2/0x360
 [<ffffffff810edcb8>] vfs_kern_mount+0x88/0x1f0
 [<ffffffff810ede92>] do_kern_mount+0x52/0x130
 [<ffffffff81963d9a>] ? _lock_kernel+0x6a/0x170
 [<ffffffff81108e9e>] do_mount+0x26e/0x7f0
 [<ffffffff81106b3a>] ? copy_mount_options+0xea/0x190
 [<ffffffff811094b8>] sys_mount+0x98/0xf0
 [<ffffffff810024d8>] system_call_fastpath+0x16/0x1b
1 lock held by mount.nfs4/4314:
 #0:  (&type->s_umount_key#24){+.+...}, at: [<ffffffff810edfb1>] deactivate_super+0x41/0x70

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Acked-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 fs/fs-writeback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7d9d06ba184b..81e086d8aa57 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -808,7 +808,7 @@ int bdi_writeback_thread(void *data)
 			wb->last_active = jiffies;
 
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (!list_empty(&bdi->work_list)) {
+		if (!list_empty(&bdi->work_list) || kthread_should_stop()) {
 			__set_current_state(TASK_RUNNING);
 			continue;
 		}
-- 
cgit v1.2.2


From 4afc31345e5f543e5d89a47aeadaaad1d91a5bc8 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Sun, 29 Aug 2010 01:55:38 +0900
Subject: nilfs2: fix leak of shadow dat inode in error path of load_nilfs

If load_nilfs() gets an error while doing recovery, it will fail to
free the shadow inode of dat (nilfs->ns_gc_dat).

This fixes the leak issue.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/the_nilfs.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 4317f177ea7c..ba7c10c917fc 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -446,6 +446,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
 	nilfs_mdt_destroy(nilfs->ns_cpfile);
 	nilfs_mdt_destroy(nilfs->ns_sufile);
 	nilfs_mdt_destroy(nilfs->ns_dat);
+	nilfs_mdt_destroy(nilfs->ns_gc_dat);
 
  failed:
 	nilfs_clear_recovery_info(&ri);
-- 
cgit v1.2.2


From 8f587df479c3cea14ba1a9b9d58f34fd2fd6d58b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 4 Aug 2010 16:27:45 +0000
Subject: 9p: potential ERR_PTR() dereference

p9_client_walk() can return error values if we run out of space or there
is a problem with the network.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/fid.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 358563689064..6406f896bf95 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -242,7 +242,8 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
 	}
 	kfree(wnames);
 fid_out:
-	v9fs_fid_add(dentry, fid);
+	if (!IS_ERR(fid))
+		v9fs_fid_add(dentry, fid);
 err_out:
 	up_read(&v9ses->rename_sem);
 	return fid;
-- 
cgit v1.2.2


From 9bc08a45fb117c696e4940cfa1208cb1cc7a2f25 Mon Sep 17 00:00:00 2001
From: Dave Chinner <david@fromorbit.com>
Date: Thu, 2 Sep 2010 15:14:38 +1000
Subject: xfs: improve buffer cache hash scalability

When doing large parallel file creates on a 16p machines, large amounts of
time is being spent in _xfs_buf_find(). A system wide profile with perf top
shows this:

          1134740.00 19.3% _xfs_buf_find
           733142.00 12.5% __ticket_spin_lock

The problem is that the hash contains 45,000 buffers, and the hash table width
is only 256 buffers. That means we've got around 200 buffers per chain, and
searching it is quite expensive. The hash table size needs to increase.

Secondly, every time we do a lookup, we promote the buffer we find to the head
of the hash chain. This is causing cachelines to be dirtied and causes
invalidation of cachelines across all CPUs that may have walked the hash chain
recently. hence every walk of the hash chain is effectively a cold cache walk.
Remove the promotion to avoid this invalidation.

The results are:

          1045043.00 21.2% __ticket_spin_lock
           326184.00  6.6% _xfs_buf_find

A 70% drop in the CPU usage when looking up buffers. Unfortunately that does
not result in an increase in performance underthis workload as contention on
the inode_lock soaks up most of the reduction in CPU usage.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/linux-2.6/xfs_buf.c | 8 +-------
 fs/xfs/linux-2.6/xfs_buf.h | 1 -
 2 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ea79072f5210..d72cf2bb054a 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -440,12 +440,7 @@ _xfs_buf_find(
 		ASSERT(btp == bp->b_target);
 		if (bp->b_file_offset == range_base &&
 		    bp->b_buffer_length == range_length) {
-			/*
-			 * If we look at something, bring it to the
-			 * front of the list for next time.
-			 */
 			atomic_inc(&bp->b_hold);
-			list_move(&bp->b_hash_list, &hash->bh_list);
 			goto found;
 		}
 	}
@@ -1443,8 +1438,7 @@ xfs_alloc_bufhash(
 {
 	unsigned int		i;
 
-	btp->bt_hashshift = external ? 3 : 8;	/* 8 or 256 buckets */
-	btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
+	btp->bt_hashshift = external ? 3 : 12;	/* 8 or 4096 buckets */
 	btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
 					 sizeof(xfs_bufhash_t));
 	for (i = 0; i < (1 << btp->bt_hashshift); i++) {
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index d072e5ff923b..2a05614f0b92 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -137,7 +137,6 @@ typedef struct xfs_buftarg {
 	size_t			bt_smask;
 
 	/* per device buffer hash table */
-	uint			bt_hashmask;
 	uint			bt_hashshift;
 	xfs_bufhash_t		*bt_hash;
 
-- 
cgit v1.2.2


From 23963e54ce187ca6e907c83176c15508b0f6e60d Mon Sep 17 00:00:00 2001
From: Arkadiusz Mi?kiewicz <arekm@maven.pl>
Date: Thu, 26 Aug 2010 10:19:43 +0000
Subject: xfs: Disallow 32bit project quota id

Currently on-disk structure is able to keep only 16bit project quota
id, so disallow 32bit ones. This fixes a problem where parts of
kernel structures holding project quota id are 32bit while parts
(on-disk) are 16bit variables which causes project quota member
files to be inaccessible for some operations (like mv/rm).

Signed-off-by: Arkadiusz Mi?kiewicz <arekm@maven.pl>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 237f5ffb2ee8..4fec427b83ef 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -906,6 +906,13 @@ xfs_ioctl_setattr(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
+	/*
+	 * Disallow 32bit project ids because on-disk structure
+	 * is 16bit only.
+	 */
+	if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1))
+		return XFS_ERROR(EINVAL);
+
 	/*
 	 * If disk quotas is on, we make sure that the dquots do exist on disk,
 	 * before we start any other transactions. Trying to do this later
-- 
cgit v1.2.2


From 8f34a430ac16d5fbd9d6b383184d35e152f5a963 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 2 Sep 2010 15:23:16 -0400
Subject: nfsd4: mask out non-access bits in nfs4_access_to_omode

This fixes an unnecessary BUG().

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3dfef0623968..cf0d2ffb3c84 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -440,7 +440,7 @@ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
 
 static int nfs4_access_to_omode(u32 access)
 {
-	switch (access) {
+	switch (access & NFS4_SHARE_ACCESS_BOTH) {
 	case NFS4_SHARE_ACCESS_READ:
 		return O_RDONLY;
 	case NFS4_SHARE_ACCESS_WRITE:
-- 
cgit v1.2.2


From 72656c46f50b8dfe50e15793692982e636e3df20 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Fri, 3 Sep 2010 12:19:33 +1000
Subject: xfs: prevent 32bit overflow in space reservation

If we attempt to preallocate more than 2^32 blocks of space in a
single syscall, the transaction block reservation will overflow
leading to a hangs in the superblock block accounting code. This
is trivially reproduced with xfs_io. Fix the problem by capping the
allocation reservation to the maximum number of blocks a single
xfs_bmapi() call can allocate (2^21 blocks).

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_vnodeops.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 66d585c6917c..4c7c7bfb2b2f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -2299,15 +2299,22 @@ xfs_alloc_file_space(
 			e = allocatesize_fsb;
 		}
 
+		/*
+		 * The transaction reservation is limited to a 32-bit block
+		 * count, hence we need to limit the number of blocks we are
+		 * trying to reserve to avoid an overflow. We can't allocate
+		 * more than @nimaps extents, and an extent is limited on disk
+		 * to MAXEXTLEN (21 bits), so use that to enforce the limit.
+		 */
+		resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps));
 		if (unlikely(rt)) {
-			resrtextents = qblocks = (uint)(e - s);
+			resrtextents = qblocks = resblks;
 			resrtextents /= mp->m_sb.sb_rextsize;
 			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
 			quota_flag = XFS_QMOPT_RES_RTBLKS;
 		} else {
 			resrtextents = 0;
-			resblks = qblocks = \
-				XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s));
+			resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
 			quota_flag = XFS_QMOPT_RES_REGBLKS;
 		}
 
-- 
cgit v1.2.2


From 9af25465081480a75824fd7a16a37a5cfebeede9 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 30 Aug 2010 02:44:03 +0000
Subject: xfs: Make fiemap work with sparse files

In xfs_vn_fiemap, we set bvm_count to fi_extent_max + 1 and want
to return fi_extent_max extents, but actually it won't work for
a sparse file. The reason is that in xfs_getbmap we will
calculate holes and set it in 'out', while out is malloced by
bmv_count(fi_extent_max+1) which didn't consider holes. So in the
worst case, if 'out' vector looks like
[hole, extent, hole, extent, hole, ... hole, extent, hole],
we will only return half of fi_extent_max extents.

This patch add a new parameter BMV_IF_NO_HOLES for bvm_iflags.
So with this flags, we don't use our 'out' in xfs_getbmap for
a hole. The solution is a bit ugly by just don't increasing
index of 'out' vector. I felt that it is not easy to skip it
at the very beginning since we have the complicated check and
some function like xfs_getbmapx_fix_eof_hole to adjust 'out'.

Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_iops.c |  2 +-
 fs/xfs/xfs_bmap.c           | 14 +++++++++++++-
 fs/xfs/xfs_fs.h             |  4 +++-
 3 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 68be25dcd301..b1fc2a6bfe83 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -664,7 +664,7 @@ xfs_vn_fiemap(
 					fieinfo->fi_extents_max + 1;
 	bm.bmv_count = min_t(__s32, bm.bmv_count,
 			     (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
-	bm.bmv_iflags = BMV_IF_PREALLOC;
+	bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
 	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
 		bm.bmv_iflags |= BMV_IF_ATTRFORK;
 	if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 23f14e595c18..f90dadd5a968 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5533,12 +5533,24 @@ xfs_getbmap(
 					map[i].br_startblock))
 				goto out_free_map;
 
-			nexleft--;
 			bmv->bmv_offset =
 				out[cur_ext].bmv_offset +
 				out[cur_ext].bmv_length;
 			bmv->bmv_length =
 				max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
+
+			/*
+			 * In case we don't want to return the hole,
+			 * don't increase cur_ext so that we can reuse
+			 * it in the next loop.
+			 */
+			if ((iflags & BMV_IF_NO_HOLES) &&
+			    map[i].br_startblock == HOLESTARTBLOCK) {
+				memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
+				continue;
+			}
+
+			nexleft--;
 			bmv->bmv_entries++;
 			cur_ext++;
 		}
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 7cf7220e7d5f..87c2e9d02288 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -114,8 +114,10 @@ struct getbmapx {
 #define BMV_IF_NO_DMAPI_READ	0x2	/* Do not generate DMAPI read event  */
 #define BMV_IF_PREALLOC		0x4	/* rtn status BMV_OF_PREALLOC if req */
 #define BMV_IF_DELALLOC		0x8	/* rtn status BMV_OF_DELALLOC if req */
+#define BMV_IF_NO_HOLES		0x10	/* Do not return holes */
 #define BMV_IF_VALID	\
-	(BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC)
+	(BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|	\
+	 BMV_IF_DELALLOC|BMV_IF_NO_HOLES)
 
 /*	bmv_oflags values - returned for each non-header segment */
 #define BMV_OF_PREALLOC		0x1	/* segment = unwritten pre-allocation */
-- 
cgit v1.2.2


From 57f9bdac2510cd7fda58e4a111d250861eb1ebeb Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 25 Aug 2010 09:12:29 +0200
Subject: sysfs: checking for NULL instead of ERR_PTR

d_path() returns an ERR_PTR and it doesn't return NULL.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Cc: stable <stable@kernel.org>
Reviewed-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1b27b5688f62..da3fefe91a8f 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -340,7 +340,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
 	char *p;
 
 	p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
-	if (p)
+	if (!IS_ERR(p))
 		memmove(last_sysfs_file, p, strlen(p) + 1);
 
 	/* need attr_sd for attr and ops, its parent for kobj */
-- 
cgit v1.2.2


From 595afaf9e6ee1b48e13ec4b8bcc8c7dee888161a Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 7 Sep 2010 13:42:41 +0200
Subject: fuse: flush background queue on connection close

David Bartly reported that fuse can hang in fuse_get_req_nofail() when
the connection to the filesystem server is no longer active.

If bg_queue is not empty then flush_bg_queue() called from
request_end() can put more requests on to the pending queue.  If this
happens while ending requests on the processing queue then those
background requests will be queued to the pending list and never
ended.

Another problem is that fuse_dev_release() didn't wake up processes
sleeping on blocked_waitq.

Solve this by:

 a) flushing the background queue before calling end_requests() on the
    pending and processing queues

 b) setting blocked = 0 and waking up processes waiting on
    blocked_waitq()

Thanks to David for an excellent bug report.

Reported-by: David Bartley <andareed@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: stable@kernel.org
---
 fs/fuse/dev.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 69ad053ffd78..b4fc47ff4bc2 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1769,6 +1769,14 @@ __acquires(&fc->lock)
 	}
 }
 
+static void end_queued_requests(struct fuse_conn *fc)
+{
+	fc->max_background = UINT_MAX;
+	flush_bg_queue(fc);
+	end_requests(fc, &fc->pending);
+	end_requests(fc, &fc->processing);
+}
+
 /*
  * Abort all requests.
  *
@@ -1795,8 +1803,7 @@ void fuse_abort_conn(struct fuse_conn *fc)
 		fc->connected = 0;
 		fc->blocked = 0;
 		end_io_requests(fc);
-		end_requests(fc, &fc->pending);
-		end_requests(fc, &fc->processing);
+		end_queued_requests(fc);
 		wake_up_all(&fc->waitq);
 		wake_up_all(&fc->blocked_waitq);
 		kill_fasync(&fc->fasync, SIGIO, POLL_IN);
@@ -1811,8 +1818,9 @@ int fuse_dev_release(struct inode *inode, struct file *file)
 	if (fc) {
 		spin_lock(&fc->lock);
 		fc->connected = 0;
-		end_requests(fc, &fc->pending);
-		end_requests(fc, &fc->processing);
+		fc->blocked = 0;
+		end_queued_requests(fc);
+		wake_up_all(&fc->blocked_waitq);
 		spin_unlock(&fc->lock);
 		fuse_conn_put(fc);
 	}
-- 
cgit v1.2.2


From b9ca67b2ddf021491a3741d9555e8ff59b2175ba Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 7 Sep 2010 13:42:41 +0200
Subject: fuse: fix lock annotations

Sparse doesn't understand lock annotations of the form
__releases(&foo->lock).  Change them to __releases(foo->lock).  Same
for __acquires().

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dev.c  | 26 ++++++++++++++------------
 fs/fuse/file.c |  8 ++++----
 2 files changed, 18 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index b4fc47ff4bc2..d367af1514ef 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -276,7 +276,7 @@ static void flush_bg_queue(struct fuse_conn *fc)
  * Called with fc->lock, unlocks it
  */
 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
-__releases(&fc->lock)
+__releases(fc->lock)
 {
 	void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
 	req->end = NULL;
@@ -306,8 +306,8 @@ __releases(&fc->lock)
 
 static void wait_answer_interruptible(struct fuse_conn *fc,
 				      struct fuse_req *req)
-__releases(&fc->lock)
-__acquires(&fc->lock)
+__releases(fc->lock)
+__acquires(fc->lock)
 {
 	if (signal_pending(current))
 		return;
@@ -325,8 +325,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
 }
 
 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
-__releases(&fc->lock)
-__acquires(&fc->lock)
+__releases(fc->lock)
+__acquires(fc->lock)
 {
 	if (!fc->no_interrupt) {
 		/* Any signal may interrupt this */
@@ -905,8 +905,8 @@ static int request_pending(struct fuse_conn *fc)
 
 /* Wait until a request is available on the pending list */
 static void request_wait(struct fuse_conn *fc)
-__releases(&fc->lock)
-__acquires(&fc->lock)
+__releases(fc->lock)
+__acquires(fc->lock)
 {
 	DECLARE_WAITQUEUE(wait, current);
 
@@ -934,7 +934,7 @@ __acquires(&fc->lock)
  */
 static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
 			       size_t nbytes, struct fuse_req *req)
-__releases(&fc->lock)
+__releases(fc->lock)
 {
 	struct fuse_in_header ih;
 	struct fuse_interrupt_in arg;
@@ -1720,8 +1720,8 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
  * This function releases and reacquires fc->lock
  */
 static void end_requests(struct fuse_conn *fc, struct list_head *head)
-__releases(&fc->lock)
-__acquires(&fc->lock)
+__releases(fc->lock)
+__acquires(fc->lock)
 {
 	while (!list_empty(head)) {
 		struct fuse_req *req;
@@ -1744,8 +1744,8 @@ __acquires(&fc->lock)
  * locked).
  */
 static void end_io_requests(struct fuse_conn *fc)
-__releases(&fc->lock)
-__acquires(&fc->lock)
+__releases(fc->lock)
+__acquires(fc->lock)
 {
 	while (!list_empty(&fc->io)) {
 		struct fuse_req *req =
@@ -1770,6 +1770,8 @@ __acquires(&fc->lock)
 }
 
 static void end_queued_requests(struct fuse_conn *fc)
+__releases(fc->lock)
+__acquires(fc->lock)
 {
 	fc->max_background = UINT_MAX;
 	flush_bg_queue(fc);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 147c1f71bdb9..c8224587123f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1144,8 +1144,8 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
 
 /* Called under fc->lock, may release and reacquire it */
 static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
-__releases(&fc->lock)
-__acquires(&fc->lock)
+__releases(fc->lock)
+__acquires(fc->lock)
 {
 	struct fuse_inode *fi = get_fuse_inode(req->inode);
 	loff_t size = i_size_read(req->inode);
@@ -1183,8 +1183,8 @@ __acquires(&fc->lock)
  * Called with fc->lock
  */
 void fuse_flush_writepages(struct inode *inode)
-__releases(&fc->lock)
-__acquires(&fc->lock)
+__releases(fc->lock)
+__acquires(fc->lock)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	struct fuse_inode *fi = get_fuse_inode(inode);
-- 
cgit v1.2.2


From 7a2e8a8faab76386d8eaae9ded739ee5615be174 Mon Sep 17 00:00:00 2001
From: Valerie Aurora <vaurora@redhat.com>
Date: Thu, 26 Aug 2010 11:07:22 -0700
Subject: VFS: Sanity check mount flags passed to change_mnt_propagation()

Sanity check the flags passed to change_mnt_propagation().  Exactly
one flag should be set.  Return EINVAL otherwise.

Userspace can pass in arbitrary combinations of MS_* flags to mount().
do_change_type() is called if any of MS_SHARED, MS_PRIVATE, MS_SLAVE,
or MS_UNBINDABLE is set.  do_change_type() clears MS_REC and then
calls change_mnt_propagation() with the rest of the user-supplied
flags.  change_mnt_propagation() clearly assumes only one flag is set
but do_change_type() does not check that this is true.  For example,
mount() with flags MS_SHARED | MS_RDONLY does not actually make the
mount shared or read-only but does clear MNT_UNBINDABLE.

Signed-off-by: Valerie Aurora <vaurora@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namespace.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index de402eb6eafb..a72eaabfe8f2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1483,6 +1483,23 @@ out_unlock:
 	return err;
 }
 
+/*
+ * Sanity check the flags to change_mnt_propagation.
+ */
+
+static int flags_to_propagation_type(int flags)
+{
+	int type = flags & ~MS_REC;
+
+	/* Fail if any non-propagation flags are set */
+	if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
+		return 0;
+	/* Only one propagation flag should be set */
+	if (!is_power_of_2(type))
+		return 0;
+	return type;
+}
+
 /*
  * recursively change the type of the mountpoint.
  */
@@ -1490,7 +1507,7 @@ static int do_change_type(struct path *path, int flag)
 {
 	struct vfsmount *m, *mnt = path->mnt;
 	int recurse = flag & MS_REC;
-	int type = flag & ~MS_REC;
+	int type;
 	int err = 0;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -1499,6 +1516,10 @@ static int do_change_type(struct path *path, int flag)
 	if (path->dentry != path->mnt->mnt_root)
 		return -EINVAL;
 
+	type = flags_to_propagation_type(flag);
+	if (!type)
+		return -EINVAL;
+
 	down_write(&namespace_sem);
 	if (type == MS_SHARED) {
 		err = invent_group_ids(mnt, recurse);
-- 
cgit v1.2.2


From dc696aced9f09f05b1f927b93f5a7918017a3e49 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Thu, 12 Aug 2010 16:24:25 -0700
Subject: ocfs2: Fix metaecc error messages

Like tools, the checksum validate function now prints the values in hex.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Singed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/blockcheck.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index ec6d12339593..c7ee03c22226 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -439,7 +439,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
 
 	ocfs2_blockcheck_inc_failure(stats);
 	mlog(ML_ERROR,
-	     "CRC32 failed: stored: %u, computed %u.  Applying ECC.\n",
+	     "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n",
 	     (unsigned int)check.bc_crc32e, (unsigned int)crc);
 
 	/* Ok, try ECC fixups */
@@ -453,7 +453,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
 		goto out;
 	}
 
-	mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
+	mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n",
 	     (unsigned int)check.bc_crc32e, (unsigned int)crc);
 
 	rc = -EIO;
-- 
cgit v1.2.2


From f5ce5a08a40f2086435858ddc80cb40394b082eb Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Thu, 12 Aug 2010 16:24:26 -0700
Subject: ocfs2: Fix incorrect checksum validation error

For local mounts, ocfs2_read_locked_inode() calls ocfs2_read_blocks_sync() to
read the inode off the disk. The latter first checks to see if that block is
cached in the journal, and, if so, returns that block. That is ok.

But ocfs2_read_locked_inode() goes wrong when it tries to validate the checksum
of such blocks. Blocks that are cached in the journal may not have had their
checksum computed as yet. We should not validate the checksums of such blocks.

Fixes ossbz#1282
http://oss.oracle.com/bugzilla/show_bug.cgi?id=1282

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Cc: stable@kernel.org
Singed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/inode.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 0492464916b1..eece3e05d9d0 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -488,7 +488,11 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 						     OCFS2_BH_IGNORE_CACHE);
 	} else {
 		status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
-		if (!status)
+		/*
+		 * If buffer is in jbd, then its checksum may not have been
+		 * computed as yet.
+		 */
+		if (!status && !buffer_jbd(bh))
 			status = ocfs2_validate_inode_block(osb->sb, bh);
 	}
 	if (status < 0) {
-- 
cgit v1.2.2


From f63afdb2c32db850fa1bfccf84643a8885cbeb61 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Sat, 17 Jul 2010 21:45:49 +0800
Subject: ocfs2: make __ocfs2_page_mkwrite handle file end properly.

__ocfs2_page_mkwrite now is broken in handling file end.
1. the last page should be the page contains i_size - 1.
2. the len in the last page is also calculated wrong.
So change them accordingly.

Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/mmap.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index af2b8fe1f139..4c18f4ad93b4 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -74,9 +74,11 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
 	/*
 	 * Another node might have truncated while we were waiting on
 	 * cluster locks.
+	 * We don't check size == 0 before the shift. This is borrowed
+	 * from do_generic_file_read.
 	 */
-	last_index = size >> PAGE_CACHE_SHIFT;
-	if (page->index > last_index) {
+	last_index = (size - 1) >> PAGE_CACHE_SHIFT;
+	if (unlikely(!size || page->index > last_index)) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -107,7 +109,7 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
 	 * because the "write" would invalidate their data.
 	 */
 	if (page->index == last_index)
-		len = size & ~PAGE_CACHE_MASK;
+		len = ((size - 1) & ~PAGE_CACHE_MASK) + 1;
 
 	ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page,
 				       &fsdata, di_bh, page);
-- 
cgit v1.2.2


From 04eda1a18019bb387dc7e97ee99979dd88dc608a Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 5 Aug 2010 20:32:45 +0200
Subject: ocfs2: Flush drive's caches on fdatasync

When 'barrier' mount option is specified, we have to issue a cache flush
during fdatasync(2). We have to do this even if inode doesn't have
I_DIRTY_DATASYNC set because we still have to get written *data* to disk so
that they are not lost in case of crash.

Acked-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Singed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/file.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 81296b4e3646..6b2be0f2eacd 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -36,6 +36,7 @@
 #include <linux/writeback.h>
 #include <linux/falloc.h>
 #include <linux/quotaops.h>
+#include <linux/blkdev.h>
 
 #define MLOG_MASK_PREFIX ML_INODE
 #include <cluster/masklog.h>
@@ -190,8 +191,16 @@ static int ocfs2_sync_file(struct file *file, int datasync)
 	if (err)
 		goto bail;
 
-	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
+		/*
+		 * We still have to flush drive's caches to get data to the
+		 * platter
+		 */
+		if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
+			blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
+					   NULL, BLKDEV_IFL_WAIT);
 		goto bail;
+	}
 
 	journal = osb->journal->j_journal;
 	err = jbd2_journal_force_commit(journal);
-- 
cgit v1.2.2


From 889f004a8c83d515f275078687f859bc0d5ede9d Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Thu, 2 Sep 2010 13:10:10 +0800
Subject: ocfs2: Use the right group in nfs sync check.

We have added discontig block group now, and now an inode
can be allocated in an discontig block group. So get
it in ocfs2_get_suballoc_slot_bit.

The old ocfs2_test_suballoc_bit gets group block no
from the allocation inode which is wrong. Fix it by
passing the right group.

Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/suballoc.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index a8e6a95a353f..8a009ee1f7fd 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2567,7 +2567,8 @@ out:
  * suballoc_bit.
  */
 static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
-				       u16 *suballoc_slot, u16 *suballoc_bit)
+				       u16 *suballoc_slot, u64 *group_blkno,
+				       u16 *suballoc_bit)
 {
 	int status;
 	struct buffer_head *inode_bh = NULL;
@@ -2604,6 +2605,8 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
 		*suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot);
 	if (suballoc_bit)
 		*suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit);
+	if (group_blkno)
+		*group_blkno = le64_to_cpu(inode_fe->i_suballoc_loc);
 
 bail:
 	brelse(inode_bh);
@@ -2621,7 +2624,8 @@ bail:
  */
 static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
 				   struct inode *suballoc,
-				   struct buffer_head *alloc_bh, u64 blkno,
+				   struct buffer_head *alloc_bh,
+				   u64 group_blkno, u64 blkno,
 				   u16 bit, int *res)
 {
 	struct ocfs2_dinode *alloc_di;
@@ -2642,10 +2646,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	if (alloc_di->i_suballoc_loc)
-		bg_blkno = le64_to_cpu(alloc_di->i_suballoc_loc);
-	else
-		bg_blkno = ocfs2_which_suballoc_group(blkno, bit);
+	bg_blkno = group_blkno ? group_blkno :
+		   ocfs2_which_suballoc_group(blkno, bit);
 	status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
 					     &group_bh);
 	if (status < 0) {
@@ -2680,6 +2682,7 @@ bail:
 int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
 {
 	int status;
+	u64 group_blkno = 0;
 	u16 suballoc_bit = 0, suballoc_slot = 0;
 	struct inode *inode_alloc_inode;
 	struct buffer_head *alloc_bh = NULL;
@@ -2687,7 +2690,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
 	mlog_entry("blkno: %llu", (unsigned long long)blkno);
 
 	status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
-					     &suballoc_bit);
+					     &group_blkno, &suballoc_bit);
 	if (status < 0) {
 		mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status);
 		goto bail;
@@ -2715,7 +2718,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
 	}
 
 	status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
-					 blkno, suballoc_bit, res);
+					 group_blkno, blkno, suballoc_bit, res);
 	if (status < 0)
 		mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
 
-- 
cgit v1.2.2


From b2b6ebf5f740e015b2155343958f067e594323ea Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Thu, 26 Aug 2010 13:06:50 -0700
Subject: ocfs2: properly set and use inode group alloc hint

We were setting ac->ac_last_group in ocfs2_claim_suballoc_bits from
res->sr_bg_blkno.  Unfortunately, res->sr_bg_blkno is going to be zero under
normal (non-fragmented) circumstances. The discontig block group patches
effectively turned off that feature. Fix this by correctly calculating what
the next group hint should be.

Acked-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.de>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/suballoc.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 8a009ee1f7fd..b93d7e72175a 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -62,6 +62,17 @@ struct ocfs2_suballoc_result {
 	unsigned int	sr_bits;	/* How many bits we claimed */
 };
 
+static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res)
+{
+	if (res->sr_blkno == 0)
+		return 0;
+
+	if (res->sr_bg_blkno)
+		return res->sr_bg_blkno;
+
+	return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset);
+}
+
 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
@@ -1845,6 +1856,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
 	int status;
 	u16 victim, i;
 	u16 bits_left = 0;
+	u64 hint = ac->ac_last_group;
 	struct ocfs2_chain_list *cl;
 	struct ocfs2_dinode *fe;
 
@@ -1872,7 +1884,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
 		goto bail;
 	}
 
-	res->sr_bg_blkno = ac->ac_last_group;
+	res->sr_bg_blkno = hint;
 	if (res->sr_bg_blkno) {
 		/* Attempt to short-circuit the usual search mechanism
 		 * by jumping straight to the most recently used
@@ -1896,8 +1908,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
 
 	status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
 				    res, &bits_left);
-	if (!status)
+	if (!status) {
+		hint = ocfs2_group_from_res(res);
 		goto set_hint;
+	}
 	if (status < 0 && status != -ENOSPC) {
 		mlog_errno(status);
 		goto bail;
@@ -1920,8 +1934,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
 		ac->ac_chain = i;
 		status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
 					    res, &bits_left);
-		if (!status)
+		if (!status) {
+			hint = ocfs2_group_from_res(res);
 			break;
+		}
 		if (status < 0 && status != -ENOSPC) {
 			mlog_errno(status);
 			goto bail;
@@ -1936,7 +1952,7 @@ set_hint:
 		if (bits_left < min_bits)
 			ac->ac_last_group = 0;
 		else
-			ac->ac_last_group = res->sr_bg_blkno;
+			ac->ac_last_group = hint;
 	}
 
 bail:
-- 
cgit v1.2.2


From 9b4c0ff32ccd87ab52d4c5bd0a0536febce11370 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 24 Aug 2010 14:28:03 +0200
Subject: ocfs2: Fix deadlock when allocating page

We cannot call grab_cache_page() when holding filesystem locks or with
a transaction started as grab_cache_page() calls page allocation with
GFP_KERNEL flag and thus page reclaim can recurse back into the filesystem
causing deadlocks or various assertion failures. We have to use
find_or_create_page() instead and pass it GFP_NOFS as we do with other
allocations.

Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/alloc.c        | 2 +-
 fs/ocfs2/file.c         | 2 +-
 fs/ocfs2/refcounttree.c | 5 +++--
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 215e12ce1d85..592fae5007d1 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6672,7 +6672,7 @@ int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
 	last_page_bytes = PAGE_ALIGN(end);
 	index = start >> PAGE_CACHE_SHIFT;
 	do {
-		pages[numpages] = grab_cache_page(mapping, index);
+		pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS);
 		if (!pages[numpages]) {
 			ret = -ENOMEM;
 			mlog_errno(ret);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6b2be0f2eacd..2caa3a7a1a39 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -783,7 +783,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
 	BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
 	BUG_ON(abs_from & (inode->i_blkbits - 1));
 
-	page = grab_cache_page(mapping, index);
+	page = find_or_create_page(mapping, index, GFP_NOFS);
 	if (!page) {
 		ret = -ENOMEM;
 		mlog_errno(ret);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 73a11ccfd4c2..0afeda83120f 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2960,7 +2960,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
 		if (map_end & (PAGE_CACHE_SIZE - 1))
 			to = map_end & (PAGE_CACHE_SIZE - 1);
 
-		page = grab_cache_page(mapping, page_index);
+		page = find_or_create_page(mapping, page_index, GFP_NOFS);
 
 		/*
 		 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
@@ -3179,7 +3179,8 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb,
 		if (map_end > end)
 			map_end = end;
 
-		page = grab_cache_page(context->inode->i_mapping, page_index);
+		page = find_or_create_page(context->inode->i_mapping,
+					   page_index, GFP_NOFS);
 		BUG_ON(!page);
 
 		wait_on_page_writeback(page);
-- 
cgit v1.2.2


From 81c8c82b5a39f9127e8b239e9b406a6c3a41b228 Mon Sep 17 00:00:00 2001
From: Tristan Ye <tristan.ye@oracle.com>
Date: Thu, 19 Aug 2010 15:15:00 +0800
Subject: Ocfs2: Fix a regression bug from mainline
 commit(6b933c8e6f1a2f3118082c455eef25f9b1ac7b45).

The patch is to fix the regression bug brought from commit 6b933c8...( 'ocfs2:
Avoid direct write if we fall back to buffered I/O'):

http://oss.oracle.com/bugzilla/show_bug.cgi?id=1285

The commit 6b933c8e6f1a2f3118082c455eef25f9b1ac7b45 changed __generic_file_aio_write
to generic_file_buffered_write, which didn't call filemap_{write,wait}_range to  flush
the pagecaches when we were falling O_DIRECT writes back to buffered ones. it did hurt
the O_DIRECT semantics somehow in extented odirect writes.

This patch tries to guarantee O_DIRECT writes of 'fall back to buffered' to be correctly
flushed.

Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 2caa3a7a1a39..9a03c151b5ce 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2338,7 +2338,7 @@ out_dio:
 	BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
 
 	if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
-	    ((file->f_flags & O_DIRECT) && has_refcount)) {
+	    ((file->f_flags & O_DIRECT) && !direct_io)) {
 		ret = filemap_fdatawrite_range(file->f_mapping, pos,
 					       pos + count - 1);
 		if (ret < 0)
-- 
cgit v1.2.2


From 021960cab320ae3cc4e9aba9cca42f9f5ce785f3 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Fri, 13 Aug 2010 15:15:15 -0700
Subject: ocfs2: split out inode alloc code from ocfs2_mknod_locked

Do this by splitting the bulk of the function away from the inode allocation
code at the very tom of ocfs2_mknod_locked(). Existing callers don't need to
change and won't see any difference. The new function created,
__ocfs2_mknod_locked() will be used shortly.

Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/namei.c | 55 +++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 37 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f171b51a74f7..2aa66b695fab 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -472,32 +472,23 @@ leave:
 	return status;
 }
 
-static int ocfs2_mknod_locked(struct ocfs2_super *osb,
-			      struct inode *dir,
-			      struct inode *inode,
-			      dev_t dev,
-			      struct buffer_head **new_fe_bh,
-			      struct buffer_head *parent_fe_bh,
-			      handle_t *handle,
-			      struct ocfs2_alloc_context *inode_ac)
+static int __ocfs2_mknod_locked(struct inode *dir,
+				struct inode *inode,
+				dev_t dev,
+				struct buffer_head **new_fe_bh,
+				struct buffer_head *parent_fe_bh,
+				handle_t *handle,
+				struct ocfs2_alloc_context *inode_ac,
+				u64 fe_blkno, u64 suballoc_loc, u16 suballoc_bit)
 {
 	int status = 0;
+	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
 	struct ocfs2_dinode *fe = NULL;
 	struct ocfs2_extent_list *fel;
-	u64 suballoc_loc, fe_blkno = 0;
-	u16 suballoc_bit;
 	u16 feat;
 
 	*new_fe_bh = NULL;
 
-	status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh,
-				       inode_ac, &suballoc_loc,
-				       &suballoc_bit, &fe_blkno);
-	if (status < 0) {
-		mlog_errno(status);
-		goto leave;
-	}
-
 	/* populate as many fields early on as possible - many of
 	 * these are used by the support functions here and in
 	 * callers. */
@@ -591,6 +582,34 @@ leave:
 	return status;
 }
 
+static int ocfs2_mknod_locked(struct ocfs2_super *osb,
+			      struct inode *dir,
+			      struct inode *inode,
+			      dev_t dev,
+			      struct buffer_head **new_fe_bh,
+			      struct buffer_head *parent_fe_bh,
+			      handle_t *handle,
+			      struct ocfs2_alloc_context *inode_ac)
+{
+	int status = 0;
+	u64 suballoc_loc, fe_blkno = 0;
+	u16 suballoc_bit;
+
+	*new_fe_bh = NULL;
+
+	status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh,
+				       inode_ac, &suballoc_loc,
+				       &suballoc_bit, &fe_blkno);
+	if (status < 0) {
+		mlog_errno(status);
+		return status;
+	}
+
+	return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh,
+				    parent_fe_bh, handle, inode_ac,
+				    fe_blkno, suballoc_loc, suballoc_bit);
+}
+
 static int ocfs2_mkdir(struct inode *dir,
 		       struct dentry *dentry,
 		       int mode)
-- 
cgit v1.2.2


From d51349829c378c06ba4aa7d4b16ca23739858608 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Fri, 13 Aug 2010 15:15:16 -0700
Subject: ocfs2: use ocfs2_alloc_dinode_update_counts() instead of open coding

ocfs2_search_chain() makes the same updates as
ocfs2_alloc_dinode_update_counts to the alloc inode. Instead of open coding
the bitmap update, use our helper function.

Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/suballoc.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index b93d7e72175a..e7edda8c6a11 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1719,7 +1719,6 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 {
 	int status;
 	u16 chain;
-	u32 tmp_used;
 	u64 next_group;
 	struct inode *alloc_inode = ac->ac_inode;
 	struct buffer_head *group_bh = NULL;
@@ -1807,22 +1806,14 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 		}
 	}
 
-	/* Ok, claim our bits now: set the info on dinode, chainlist
-	 * and then the group */
-	status = ocfs2_journal_access_di(handle,
-					 INODE_CACHE(alloc_inode),
-					 ac->ac_bh,
-					 OCFS2_JOURNAL_ACCESS_WRITE);
-	if (status < 0) {
+	status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle,
+						  ac->ac_bh, res->sr_bits,
+						  chain);
+	if (status) {
 		mlog_errno(status);
 		goto bail;
 	}
 
-	tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
-	fe->id1.bitmap1.i_used = cpu_to_le32(res->sr_bits + tmp_used);
-	le32_add_cpu(&cl->cl_recs[chain].c_free, -res->sr_bits);
-	ocfs2_journal_dirty(handle, ac->ac_bh);
-
 	status = ocfs2_block_group_set_bits(handle,
 					    alloc_inode,
 					    bg,
-- 
cgit v1.2.2


From e49e27674d1dd2717ad90b21ece8f83102153315 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Fri, 13 Aug 2010 15:15:17 -0700
Subject: ocfs2: allow return of new inode block location before allocation of
 the inode

This allows code which needs to know the eventual block number of an inode
but can't allocate it yet due to transaction or lock ordering. For example,
ocfs2_create_inode_in_orphan() currently gives a junk blkno for preparation
of the orphan dir because it can't yet know where the actual inode is placed
- that code is actually in ocfs2_mknod_locked. This is a problem when the
orphan dirs are indexed as the junk inode number will create an index entry
which goes unused (and fails the later removal from the orphan dir).  Now
with these interfaces, ocfs2_create_inode_in_orphan() can run the block
group search (and get back the inode block number) *before* any actual
allocation occurs.

Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/suballoc.c | 159 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/suballoc.h |  21 +++++++
 2 files changed, 180 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index e7edda8c6a11..8a286f54dca1 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -57,6 +57,12 @@ struct ocfs2_suballoc_result {
 	u64		sr_bg_blkno;	/* The bg we allocated from.  Set
 					   to 0 when a block group is
 					   contiguous. */
+	u64		sr_bg_stable_blkno; /*
+					     * Doesn't change, always
+					     * set to target block
+					     * group descriptor
+					     * block.
+					     */
 	u64		sr_blkno;	/* The first allocated block */
 	unsigned int	sr_bit_offset;	/* The bit in the bg */
 	unsigned int	sr_bits;	/* How many bits we claimed */
@@ -149,6 +155,10 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
 	brelse(ac->ac_bh);
 	ac->ac_bh = NULL;
 	ac->ac_resv = NULL;
+	if (ac->ac_find_loc_priv) {
+		kfree(ac->ac_find_loc_priv);
+		ac->ac_find_loc_priv = NULL;
+	}
 }
 
 void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
@@ -1689,6 +1699,15 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
 	if (!ret)
 		ocfs2_bg_discontig_fix_result(ac, gd, res);
 
+	/*
+	 * sr_bg_blkno might have been changed by
+	 * ocfs2_bg_discontig_fix_result
+	 */
+	res->sr_bg_stable_blkno = group_bh->b_blocknr;
+
+	if (ac->ac_find_loc_only)
+		goto out_loc_only;
+
 	ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
 					       res->sr_bits,
 					       le16_to_cpu(gd->bg_chain));
@@ -1702,6 +1721,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
 	if (ret < 0)
 		mlog_errno(ret);
 
+out_loc_only:
 	*bits_left = le16_to_cpu(gd->bg_free_bits_count);
 
 out:
@@ -1780,6 +1800,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 	if (!status)
 		ocfs2_bg_discontig_fix_result(ac, bg, res);
 
+	/*
+	 * sr_bg_blkno might have been changed by
+	 * ocfs2_bg_discontig_fix_result
+	 */
+	res->sr_bg_stable_blkno = group_bh->b_blocknr;
 
 	/*
 	 * Keep track of previous block descriptor read. When
@@ -1806,6 +1831,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 		}
 	}
 
+	if (ac->ac_find_loc_only)
+		goto out_loc_only;
+
 	status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle,
 						  ac->ac_bh, res->sr_bits,
 						  chain);
@@ -1828,6 +1856,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
 	mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits,
 	     (unsigned long long)le64_to_cpu(fe->i_blkno));
 
+out_loc_only:
 	*bits_left = le16_to_cpu(bg->bg_free_bits_count);
 bail:
 	brelse(group_bh);
@@ -2023,6 +2052,136 @@ static inline void ocfs2_save_inode_ac_group(struct inode *dir,
 	OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
 }
 
+int ocfs2_find_new_inode_loc(struct inode *dir,
+			     struct buffer_head *parent_fe_bh,
+			     struct ocfs2_alloc_context *ac,
+			     u64 *fe_blkno)
+{
+	int ret;
+	handle_t *handle = NULL;
+	struct ocfs2_suballoc_result *res;
+
+	BUG_ON(!ac);
+	BUG_ON(ac->ac_bits_given != 0);
+	BUG_ON(ac->ac_bits_wanted != 1);
+	BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
+
+	res = kzalloc(sizeof(*res), GFP_NOFS);
+	if (res == NULL) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
+
+	/*
+	 * The handle started here is for chain relink. Alternatively,
+	 * we could just disable relink for these calls.
+	 */
+	handle = ocfs2_start_trans(OCFS2_SB(dir->i_sb), OCFS2_SUBALLOC_ALLOC);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		handle = NULL;
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/*
+	 * This will instruct ocfs2_claim_suballoc_bits and
+	 * ocfs2_search_one_group to search but save actual allocation
+	 * for later.
+	 */
+	ac->ac_find_loc_only = 1;
+
+	ret = ocfs2_claim_suballoc_bits(ac, handle, 1, 1, res);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ac->ac_find_loc_priv = res;
+	*fe_blkno = res->sr_blkno;
+
+out:
+	if (handle)
+		ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle);
+
+	if (ret)
+		kfree(res);
+
+	return ret;
+}
+
+int ocfs2_claim_new_inode_at_loc(handle_t *handle,
+				 struct inode *dir,
+				 struct ocfs2_alloc_context *ac,
+				 u64 *suballoc_loc,
+				 u16 *suballoc_bit,
+				 u64 di_blkno)
+{
+	int ret;
+	u16 chain;
+	struct ocfs2_suballoc_result *res = ac->ac_find_loc_priv;
+	struct buffer_head *bg_bh = NULL;
+	struct ocfs2_group_desc *bg;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *) ac->ac_bh->b_data;
+
+	/*
+	 * Since di_blkno is being passed back in, we check for any
+	 * inconsistencies which may have happened between
+	 * calls. These are code bugs as di_blkno is not expected to
+	 * change once returned from ocfs2_find_new_inode_loc()
+	 */
+	BUG_ON(res->sr_blkno != di_blkno);
+
+	ret = ocfs2_read_group_descriptor(ac->ac_inode, di,
+					  res->sr_bg_stable_blkno, &bg_bh);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	bg = (struct ocfs2_group_desc *) bg_bh->b_data;
+	chain = le16_to_cpu(bg->bg_chain);
+
+	ret = ocfs2_alloc_dinode_update_counts(ac->ac_inode, handle,
+					       ac->ac_bh, res->sr_bits,
+					       chain);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_block_group_set_bits(handle,
+					 ac->ac_inode,
+					 bg,
+					 bg_bh,
+					 res->sr_bit_offset,
+					 res->sr_bits);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits,
+	     (unsigned long long)di_blkno);
+
+	atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
+
+	BUG_ON(res->sr_bits != 1);
+
+	*suballoc_loc = res->sr_bg_blkno;
+	*suballoc_bit = res->sr_bit_offset;
+	ac->ac_bits_given++;
+	ocfs2_save_inode_ac_group(dir, ac);
+
+out:
+	brelse(bg_bh);
+
+	return ret;
+}
+
 int ocfs2_claim_new_inode(handle_t *handle,
 			  struct inode *dir,
 			  struct buffer_head *parent_fe_bh,
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index a017dd3ee7d9..b8afabfeede4 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -56,6 +56,9 @@ struct ocfs2_alloc_context {
 	u64    ac_max_block;  /* Highest block number to allocate. 0 is
 				 is the same as ~0 - unlimited */
 
+	int    ac_find_loc_only;  /* hack for reflink operation ordering */
+	struct ocfs2_suballoc_result *ac_find_loc_priv; /* */
+
 	struct ocfs2_alloc_reservation	*ac_resv;
 };
 
@@ -197,4 +200,22 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
 			  struct ocfs2_alloc_context **meta_ac);
 
 int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res);
+
+
+
+/*
+ * The following two interfaces are for ocfs2_create_inode_in_orphan().
+ */
+int ocfs2_find_new_inode_loc(struct inode *dir,
+			     struct buffer_head *parent_fe_bh,
+			     struct ocfs2_alloc_context *ac,
+			     u64 *fe_blkno);
+
+int ocfs2_claim_new_inode_at_loc(handle_t *handle,
+				 struct inode *dir,
+				 struct ocfs2_alloc_context *ac,
+				 u64 *suballoc_loc,
+				 u16 *suballoc_bit,
+				 u64 di_blkno);
+
 #endif /* _CHAINALLOC_H_ */
-- 
cgit v1.2.2


From dd43bcde23c527f64897eef41aa1fed2c9905ea9 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Fri, 13 Aug 2010 15:15:18 -0700
Subject: ocfs2: split out ocfs2_prepare_orphan_dir() into locking and prep
 functions

We do this because ocfs2_create_inode_in_orphan() wants to order locking of
the orphan dir with respect to locking of the inode allocator *before*
making any changes to the directory.

Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/namei.c | 120 ++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 88 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 2aa66b695fab..54c629855357 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1871,61 +1871,117 @@ bail:
 	return status;
 }
 
-static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
-				    struct inode **ret_orphan_dir,
-				    u64 blkno,
-				    char *name,
-				    struct ocfs2_dir_lookup_result *lookup)
+static int ocfs2_lookup_lock_orphan_dir(struct ocfs2_super *osb,
+					struct inode **ret_orphan_dir,
+					struct buffer_head **ret_orphan_dir_bh)
 {
 	struct inode *orphan_dir_inode;
 	struct buffer_head *orphan_dir_bh = NULL;
-	int status = 0;
-
-	status = ocfs2_blkno_stringify(blkno, name);
-	if (status < 0) {
-		mlog_errno(status);
-		return status;
-	}
+	int ret = 0;
 
 	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
 						       ORPHAN_DIR_SYSTEM_INODE,
 						       osb->slot_num);
 	if (!orphan_dir_inode) {
-		status = -ENOENT;
-		mlog_errno(status);
-		return status;
+		ret = -ENOENT;
+		mlog_errno(ret);
+		return ret;
 	}
 
 	mutex_lock(&orphan_dir_inode->i_mutex);
 
-	status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
-	if (status < 0) {
-		mlog_errno(status);
-		goto leave;
+	ret = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
+	if (ret < 0) {
+		mutex_unlock(&orphan_dir_inode->i_mutex);
+		iput(orphan_dir_inode);
+
+		mlog_errno(ret);
+		return ret;
 	}
 
-	status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
-					      orphan_dir_bh, name,
-					      OCFS2_ORPHAN_NAMELEN, lookup);
-	if (status < 0) {
-		ocfs2_inode_unlock(orphan_dir_inode, 1);
+	*ret_orphan_dir = orphan_dir_inode;
+	*ret_orphan_dir_bh = orphan_dir_bh;
 
-		mlog_errno(status);
-		goto leave;
+	return 0;
+}
+
+static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode,
+				      struct buffer_head *orphan_dir_bh,
+				      u64 blkno,
+				      char *name,
+				      struct ocfs2_dir_lookup_result *lookup)
+{
+	int ret;
+	struct ocfs2_super *osb = OCFS2_SB(orphan_dir_inode->i_sb);
+
+	ret = ocfs2_blkno_stringify(blkno, name);
+	if (ret < 0) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	ret = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
+					   orphan_dir_bh, name,
+					   OCFS2_ORPHAN_NAMELEN, lookup);
+	if (ret < 0) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * ocfs2_prepare_orphan_dir() - Prepare an orphan directory for
+ * insertion of an orphan.
+ * @osb: ocfs2 file system
+ * @ret_orphan_dir: Orphan dir inode - returned locked!
+ * @blkno: Actual block number of the inode to be inserted into orphan dir.
+ * @lookup: dir lookup result, to be passed back into functions like
+ *          ocfs2_orphan_add
+ *
+ * Returns zero on success and the ret_orphan_dir, name and lookup
+ * fields will be populated.
+ *
+ * Returns non-zero on failure. 
+ */
+static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
+				    struct inode **ret_orphan_dir,
+				    u64 blkno,
+				    char *name,
+				    struct ocfs2_dir_lookup_result *lookup)
+{
+	struct inode *orphan_dir_inode = NULL;
+	struct buffer_head *orphan_dir_bh = NULL;
+	int ret = 0;
+
+	ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir_inode,
+					   &orphan_dir_bh);
+	if (ret < 0) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh,
+					 blkno, name, lookup);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
 	}
 
 	*ret_orphan_dir = orphan_dir_inode;
 
-leave:
-	if (status) {
+out:
+	brelse(orphan_dir_bh);
+
+	if (ret) {
+		ocfs2_inode_unlock(orphan_dir_inode, 1);
 		mutex_unlock(&orphan_dir_inode->i_mutex);
 		iput(orphan_dir_inode);
 	}
 
-	brelse(orphan_dir_bh);
-
-	mlog_exit(status);
-	return status;
+	mlog_exit(ret);
+	return ret;
 }
 
 static int ocfs2_orphan_add(struct ocfs2_super *osb,
-- 
cgit v1.2.2


From 97b8f4a9dfd932997677136e11980eb2fafea91d Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.com>
Date: Fri, 13 Aug 2010 15:15:19 -0700
Subject: ocfs2: Fix orphan add in ocfs2_create_inode_in_orphan

ocfs2_create_inode_in_orphan() is used by reflink to create the newly
reflinked inode simultaneously in the orphan dir. This allows us to easily
handle partially-reflinked files during recovery cleanup.

We have a problem though - the orphan dir stringifies inode # to determine
a unique name under which the orphan entry dirent can be created. Since
ocfs2_create_inode_in_orphan() needs the space allocated in the orphan dir
before it can allocate the inode, we currently call into the orphan code:

       /*
        * We give the orphan dir the root blkno to fake an orphan name,
        * and allocate enough space for our insertion.
        */
       status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
                                         osb->root_blkno,
                                         orphan_name, &orphan_insert);

Using osb->root_blkno might work fine on unindexed directories, but the
orphan dir can have an index.  When it has that index, the above code fails
to allocate the proper index entry.  Later, when we try to remove the file
from the orphan dir (using the actual inode #), the reflink operation will
fail.

To fix this, I created a function ocfs2_alloc_orphaned_file() which uses the
newly split out orphan and inode alloc code to figure out what the inode
block number will be (once allocated) and then prepare the orphan dir from
that data.

Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/namei.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 107 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 54c629855357..a00dda2e4f16 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2128,6 +2128,99 @@ leave:
 	return status;
 }
 
+/**
+ * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to recieve a newly
+ * allocated file. This is different from the typical 'add to orphan dir'
+ * operation in that the inode does not yet exist. This is a problem because
+ * the orphan dir stringifies the inode block number to come up with it's
+ * dirent. Obviously if the inode does not yet exist we have a chicken and egg
+ * problem. This function works around it by calling deeper into the orphan
+ * and suballoc code than other callers. Use this only by necessity.
+ * @dir: The directory which this inode will ultimately wind up under - not the
+ * orphan dir!
+ * @dir_bh: buffer_head the @dir inode block
+ * @orphan_name: string of length (CFS2_ORPHAN_NAMELEN + 1). Will be filled
+ * with the string to be used for orphan dirent. Pass back to the orphan dir
+ * code.
+ * @ret_orphan_dir: orphan dir inode returned to be passed back into orphan
+ * dir code.
+ * @ret_di_blkno: block number where the new inode will be allocated.
+ * @orphan_insert: Dir insert context to be passed back into orphan dir code.
+ * @ret_inode_ac: Inode alloc context to be passed back to the allocator.
+ *
+ * Returns zero on success and the ret_orphan_dir, name and lookup
+ * fields will be populated.
+ *
+ * Returns non-zero on failure. 
+ */
+static int ocfs2_prep_new_orphaned_file(struct inode *dir,
+					struct buffer_head *dir_bh,
+					char *orphan_name,
+					struct inode **ret_orphan_dir,
+					u64 *ret_di_blkno,
+					struct ocfs2_dir_lookup_result *orphan_insert,
+					struct ocfs2_alloc_context **ret_inode_ac)
+{
+	int ret;
+	u64 di_blkno;
+	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+	struct inode *orphan_dir = NULL;
+	struct buffer_head *orphan_dir_bh = NULL;
+	struct ocfs2_alloc_context *inode_ac = NULL;
+
+	ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir, &orphan_dir_bh);
+	if (ret < 0) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	/* reserve an inode spot */
+	ret = ocfs2_reserve_new_inode(osb, &inode_ac);
+	if (ret < 0) {
+		if (ret != -ENOSPC)
+			mlog_errno(ret);
+		goto out;
+	}
+
+	ret = ocfs2_find_new_inode_loc(dir, dir_bh, inode_ac,
+				       &di_blkno);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	ret = __ocfs2_prepare_orphan_dir(orphan_dir, orphan_dir_bh,
+					 di_blkno, orphan_name, orphan_insert);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+out:
+	if (ret == 0) {
+		*ret_orphan_dir = orphan_dir;
+		*ret_di_blkno = di_blkno;
+		*ret_inode_ac = inode_ac;
+		/*
+		 * orphan_name and orphan_insert are already up to
+		 * date via prepare_orphan_dir
+		 */
+	} else {
+		/* Unroll reserve_new_inode* */
+		if (inode_ac)
+			ocfs2_free_alloc_context(inode_ac);
+
+		/* Unroll orphan dir locking */
+		mutex_unlock(&orphan_dir->i_mutex);
+		ocfs2_inode_unlock(orphan_dir, 1);
+		iput(orphan_dir);
+	}
+
+	brelse(orphan_dir_bh);
+
+	return 0;
+}
+
 int ocfs2_create_inode_in_orphan(struct inode *dir,
 				 int mode,
 				 struct inode **new_inode)
@@ -2143,6 +2236,8 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
 	struct buffer_head *new_di_bh = NULL;
 	struct ocfs2_alloc_context *inode_ac = NULL;
 	struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
+	u64 uninitialized_var(di_blkno), suballoc_loc;
+	u16 suballoc_bit;
 
 	status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
 	if (status < 0) {
@@ -2151,20 +2246,9 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
 		return status;
 	}
 
-	/*
-	 * We give the orphan dir the root blkno to fake an orphan name,
-	 * and allocate enough space for our insertion.
-	 */
-	status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
-					  osb->root_blkno,
-					  orphan_name, &orphan_insert);
-	if (status < 0) {
-		mlog_errno(status);
-		goto leave;
-	}
-
-	/* reserve an inode spot */
-	status = ocfs2_reserve_new_inode(osb, &inode_ac);
+	status = ocfs2_prep_new_orphaned_file(dir, parent_di_bh,
+					      orphan_name, &orphan_dir,
+					      &di_blkno, &orphan_insert, &inode_ac);
 	if (status < 0) {
 		if (status != -ENOSPC)
 			mlog_errno(status);
@@ -2191,17 +2275,20 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
 		goto leave;
 	did_quota_inode = 1;
 
-	inode->i_nlink = 0;
-	/* do the real work now. */
-	status = ocfs2_mknod_locked(osb, dir, inode,
-				    0, &new_di_bh, parent_di_bh, handle,
-				    inode_ac);
+	status = ocfs2_claim_new_inode_at_loc(handle, dir, inode_ac,
+					      &suballoc_loc,
+					      &suballoc_bit, di_blkno);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
 	}
 
-	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, orphan_name);
+	inode->i_nlink = 0;
+	/* do the real work now. */
+	status = __ocfs2_mknod_locked(dir, inode,
+				      0, &new_di_bh, parent_di_bh, handle,
+				      inode_ac, di_blkno, suballoc_loc,
+				      suballoc_bit);
 	if (status < 0) {
 		mlog_errno(status);
 		goto leave;
-- 
cgit v1.2.2


From 7100ae97266e387d25d0c8a5d9934931f0b07dbc Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Wed, 8 Sep 2010 20:54:49 +0000
Subject: Revert "[CIFS] Eliminate unused variable warning"

The change to kernel crypto and fixes to ntlvm2 and ntlmssp
series, introduced a regression.  Deferring this patch series
to 2.6.37 after Shirish fixes it.

This reverts commit c89e5198b26a869ce2842bad8519264f3394dee9.

Signed-off-by: Steve French <sfrench@us.ibm.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
CC: Shirish Pargaonkar <shirishp@us.ibm.com>
---
 fs/cifs/sess.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 795095f4eac6..4788e16a02cc 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -620,6 +620,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
 	struct key *spnego_key = NULL;
 	__le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
 	bool first_time;
+	char *ntlmsspblob;
 
 	if (ses == NULL)
 		return -EINVAL;
@@ -867,8 +868,6 @@ ssetup_ntlmssp_authenticate:
 				iov[1].iov_base = &pSMB->req.SecurityBlob[0];
 			} else if (phase == NtLmAuthenticate) {
 				int blob_len;
-				char *ntlmsspblob;
-
 				ntlmsspblob = kmalloc(5 *
 					sizeof(struct _AUTHENTICATE_MESSAGE),
 					GFP_KERNEL);
-- 
cgit v1.2.2


From 56234e2767496c125a858f880f1b3a62e04a3406 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Wed, 8 Sep 2010 20:57:05 +0000
Subject: Revert "Eliminate sparse warning - bad constant expression"

This reverts commit 2d20ca835867d93ead6ce61780d883a4b128106d.

    The change to kernel crypto and fixes to ntlvm2 and ntlmssp
    series, introduced a regression.  Deferring this patch series
    to 2.6.37 after Shirish fixes it.

Signed-off-by: Steve French <sfrench@us.ibm.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
CC: Shirish Pargaonkar <shirishp@us.ibm.com>
---
 fs/cifs/cifsencrypt.c | 193 +++++++++++++++++++-------------------------------
 fs/cifs/cifsglob.h    |   7 --
 2 files changed, 72 insertions(+), 128 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 709f2296bdb4..eef78c24e0cc 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -45,38 +45,39 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
 static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
 			struct TCP_Server_Info *server, char *signature)
 {
-	int rc;
+	int rc = 0;
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(server->ntlmssp.md5)];
+	} sdesc;
 
 	if (cifs_pdu == NULL || server == NULL || signature == NULL)
 		return -EINVAL;
 
-	if (!server->ntlmssp.sdescmd5) {
-		cERROR(1,
-			"cifs_calculate_signature: can't generate signature\n");
-		return -1;
-	}
+	sdesc.shash.tfm = server->ntlmssp.md5;
+	sdesc.shash.flags = 0x0;
 
-	rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash);
+	rc = crypto_shash_init(&sdesc.shash);
 	if (rc) {
-		cERROR(1, "cifs_calculate_signature: oould not init md5\n");
+		cERROR(1, "could not initialize master crypto API hmacmd5\n");
 		return rc;
 	}
 
 	if (server->secType == RawNTLMSSP)
-		crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+		crypto_shash_update(&sdesc.shash,
 			server->session_key.data.ntlmv2.key,
 			CIFS_NTLMV2_SESSKEY_SIZE);
 	else
-		crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+		crypto_shash_update(&sdesc.shash,
 			(char *)&server->session_key.data,
 			server->session_key.len);
 
-	crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+	crypto_shash_update(&sdesc.shash,
 			cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
 
-	rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature);
+	rc = crypto_shash_final(&sdesc.shash, signature);
 
-	return rc;
+	return 0;
 }
 
 
@@ -114,28 +115,30 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
 			struct TCP_Server_Info *server, char *signature)
 {
 	int i;
-	int rc;
+	int rc = 0;
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(server->ntlmssp.md5)];
+	} sdesc;
 
 	if (iov == NULL || server == NULL || signature == NULL)
 		return -EINVAL;
 
-	if (!server->ntlmssp.sdescmd5) {
-		cERROR(1, "cifs_calc_signature2: can't generate signature\n");
-		return -1;
-	}
+	sdesc.shash.tfm = server->ntlmssp.md5;
+	sdesc.shash.flags = 0x0;
 
-	rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash);
+	rc = crypto_shash_init(&sdesc.shash);
 	if (rc) {
-		cERROR(1, "cifs_calc_signature2: oould not init md5\n");
+		cERROR(1, "could not initialize master crypto API hmacmd5\n");
 		return rc;
 	}
 
 	if (server->secType == RawNTLMSSP)
-		crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+		crypto_shash_update(&sdesc.shash,
 			server->session_key.data.ntlmv2.key,
 			CIFS_NTLMV2_SESSKEY_SIZE);
 	else
-		crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+		crypto_shash_update(&sdesc.shash,
 			(char *)&server->session_key.data,
 			server->session_key.len);
 
@@ -143,7 +146,7 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
 		if (iov[i].iov_len == 0)
 			continue;
 		if (iov[i].iov_base == NULL) {
-			cERROR(1, "cifs_calc_signature2: null iovec entry");
+			cERROR(1, "null iovec entry");
 			return -EIO;
 		}
 		/* The first entry includes a length field (which does not get
@@ -151,16 +154,16 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
 		if (i == 0) {
 			if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
 				break; /* nothing to sign or corrupt header */
-			crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+			crypto_shash_update(&sdesc.shash,
 				iov[i].iov_base + 4, iov[i].iov_len - 4);
 		} else
-			crypto_shash_update(&server->ntlmssp.sdescmd5->shash,
+			crypto_shash_update(&sdesc.shash,
 				iov[i].iov_base, iov[i].iov_len);
 	}
 
-	rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature);
+	rc = crypto_shash_final(&sdesc.shash, signature);
 
-	return rc;
+	return 0;
 }
 
 int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
@@ -310,48 +313,43 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
 	wchar_t *user;
 	wchar_t *domain;
 	wchar_t *server;
-
-	if (!ses->server->ntlmssp.sdeschmacmd5) {
-		cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
-		return -1;
-	}
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(ses->server->ntlmssp.hmacmd5)];
+	} sdesc;
 
 	/* calculate md4 hash of password */
 	E_md4hash(ses->password, nt_hash);
 
+	sdesc.shash.tfm = ses->server->ntlmssp.hmacmd5;
+	sdesc.shash.flags = 0x0;
+
 	crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, nt_hash,
 				CIFS_NTHASH_SIZE);
 
-	rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash);
+	rc = crypto_shash_init(&sdesc.shash);
 	if (rc) {
-		cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n");
+		cERROR(1, "could not initialize master crypto API hmacmd5\n");
 		return rc;
 	}
 
 	/* convert ses->userName to unicode and uppercase */
 	len = strlen(ses->userName);
 	user = kmalloc(2 + (len * 2), GFP_KERNEL);
-	if (user == NULL) {
-		cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
-		rc = -ENOMEM;
+	if (user == NULL)
 		goto calc_exit_2;
-	}
 	len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
 	UniStrupr(user);
 
-	crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
-				(char *)user, 2 * len);
+	crypto_shash_update(&sdesc.shash, (char *)user, 2 * len);
 
 	/* convert ses->domainName to unicode and uppercase */
 	if (ses->domainName) {
 		len = strlen(ses->domainName);
 
 		domain = kmalloc(2 + (len * 2), GFP_KERNEL);
-		if (domain == NULL) {
-			cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure");
-			rc = -ENOMEM;
+		if (domain == NULL)
 			goto calc_exit_1;
-		}
 		len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
 					nls_cp);
 		/* the following line was removed since it didn't work well
@@ -359,19 +357,15 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
 		   Maybe converting the domain name earlier makes sense */
 		/* UniStrupr(domain); */
 
-		crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
-					(char *)domain, 2 * len);
+		crypto_shash_update(&sdesc.shash, (char *)domain, 2 * len);
 
 		kfree(domain);
 	} else if (ses->serverName) {
 		len = strlen(ses->serverName);
 
 		server = kmalloc(2 + (len * 2), GFP_KERNEL);
-		if (server == NULL) {
-			cERROR(1, "calc_ntlmv2_hash: server mem alloc failure");
-			rc = -ENOMEM;
+		if (server == NULL)
 			goto calc_exit_1;
-		}
 		len = cifs_strtoUCS((__le16 *)server, ses->serverName, len,
 					nls_cp);
 		/* the following line was removed since it didn't work well
@@ -379,20 +373,16 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
 		   Maybe converting the domain name earlier makes sense */
 		/* UniStrupr(domain); */
 
-		crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
-					(char *)server, 2 * len);
+		crypto_shash_update(&sdesc.shash, (char *)server, 2 * len);
 
 		kfree(server);
 	}
-
-	rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash,
-					ses->server->ntlmv2_hash);
-
 calc_exit_1:
 	kfree(user);
 calc_exit_2:
 	/* BB FIXME what about bytes 24 through 40 of the signing key?
 	   compare with the NTLM example */
+	rc = crypto_shash_final(&sdesc.shash, ses->server->ntlmv2_hash);
 
 	return rc;
 }
@@ -452,33 +442,34 @@ CalcNTLMv2_response(const struct TCP_Server_Info *server,
 			 char *v2_session_response)
 {
 	int rc;
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(server->ntlmssp.hmacmd5)];
+	} sdesc;
 
-	if (!server->ntlmssp.sdeschmacmd5) {
-		cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
-		return -1;
-	}
+	sdesc.shash.tfm = server->ntlmssp.hmacmd5;
+	sdesc.shash.flags = 0x0;
 
 	crypto_shash_setkey(server->ntlmssp.hmacmd5, server->ntlmv2_hash,
 		CIFS_HMAC_MD5_HASH_SIZE);
 
-	rc = crypto_shash_init(&server->ntlmssp.sdeschmacmd5->shash);
+	rc = crypto_shash_init(&sdesc.shash);
 	if (rc) {
-		cERROR(1, "CalcNTLMv2_response: could not init hmacmd5");
+		cERROR(1, "could not initialize master crypto API hmacmd5\n");
 		return rc;
 	}
 
 	memcpy(v2_session_response + CIFS_SERVER_CHALLENGE_SIZE,
 		server->cryptKey, CIFS_SERVER_CHALLENGE_SIZE);
-	crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash,
+	crypto_shash_update(&sdesc.shash,
 		v2_session_response + CIFS_SERVER_CHALLENGE_SIZE,
 		sizeof(struct ntlmv2_resp) - CIFS_SERVER_CHALLENGE_SIZE);
 
 	if (server->tilen)
-		crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash,
+		crypto_shash_update(&sdesc.shash,
 					server->tiblob, server->tilen);
 
-	rc = crypto_shash_final(&server->ntlmssp.sdeschmacmd5->shash,
-					v2_session_response);
+	rc = crypto_shash_final(&sdesc.shash, v2_session_response);
 
 	return rc;
 }
@@ -489,6 +480,10 @@ setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
 {
 	int rc = 0;
 	struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf;
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(ses->server->ntlmssp.hmacmd5)];
+	} sdesc;
 
 	buf->blob_signature = cpu_to_le32(0x00000101);
 	buf->reserved = 0;
@@ -516,24 +511,21 @@ setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
 		return rc;
 	}
 
-	if (!ses->server->ntlmssp.sdeschmacmd5) {
-		cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
-		return -1;
-	}
-
 	crypto_shash_setkey(ses->server->ntlmssp.hmacmd5,
 			ses->server->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
 
-	rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash);
+	sdesc.shash.tfm = ses->server->ntlmssp.hmacmd5;
+	sdesc.shash.flags = 0x0;
+
+	rc = crypto_shash_init(&sdesc.shash);
 	if (rc) {
-		cERROR(1, "setup_ntlmv2_rsp: could not init hmacmd5\n");
+		cERROR(1, "could not initialize master crypto API hmacmd5\n");
 		return rc;
 	}
 
-	crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash,
-				resp_buf, CIFS_HMAC_MD5_HASH_SIZE);
+	crypto_shash_update(&sdesc.shash, resp_buf, CIFS_HMAC_MD5_HASH_SIZE);
 
-	rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash,
+	rc = crypto_shash_final(&sdesc.shash,
 		ses->server->session_key.data.ntlmv2.key);
 
 	memcpy(&ses->server->session_key.data.ntlmv2.resp, resp_buf,
@@ -586,65 +578,24 @@ cifs_crypto_shash_release(struct TCP_Server_Info *server)
 
 	if (server->ntlmssp.hmacmd5)
 		crypto_free_shash(server->ntlmssp.hmacmd5);
-
-	kfree(server->ntlmssp.sdeschmacmd5);
-
-	kfree(server->ntlmssp.sdescmd5);
 }
 
 int
 cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
 {
-	int rc;
-	unsigned int size;
-
 	server->ntlmssp.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
 	if (!server->ntlmssp.hmacmd5 ||
 			IS_ERR(server->ntlmssp.hmacmd5)) {
-		cERROR(1, "could not allocate crypto hmacmd5\n");
+		cERROR(1, "could not allocate master crypto API hmacmd5\n");
 		return 1;
 	}
 
 	server->ntlmssp.md5 = crypto_alloc_shash("md5", 0, 0);
 	if (!server->ntlmssp.md5 || IS_ERR(server->ntlmssp.md5)) {
-		cERROR(1, "could not allocate crypto md5\n");
-		rc = 1;
-		goto cifs_crypto_shash_allocate_ret1;
-	}
-
-	size = sizeof(struct shash_desc) +
-			crypto_shash_descsize(server->ntlmssp.hmacmd5);
-	server->ntlmssp.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
-	if (!server->ntlmssp.sdeschmacmd5) {
-		cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n");
-		rc = -ENOMEM;
-		goto cifs_crypto_shash_allocate_ret2;
-	}
-	server->ntlmssp.sdeschmacmd5->shash.tfm = server->ntlmssp.hmacmd5;
-	server->ntlmssp.sdeschmacmd5->shash.flags = 0x0;
-
-
-	size = sizeof(struct shash_desc) +
-			crypto_shash_descsize(server->ntlmssp.md5);
-	server->ntlmssp.sdescmd5 = kmalloc(size, GFP_KERNEL);
-	if (!server->ntlmssp.sdescmd5) {
-		cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n");
-		rc = -ENOMEM;
-		goto cifs_crypto_shash_allocate_ret3;
+		crypto_free_shash(server->ntlmssp.hmacmd5);
+		cERROR(1, "could not allocate master crypto API md5\n");
+		return 1;
 	}
-	server->ntlmssp.sdescmd5->shash.tfm = server->ntlmssp.md5;
-	server->ntlmssp.sdescmd5->shash.flags = 0x0;
 
 	return 0;
-
-cifs_crypto_shash_allocate_ret3:
-	kfree(server->ntlmssp.sdeschmacmd5);
-
-cifs_crypto_shash_allocate_ret2:
-	crypto_free_shash(server->ntlmssp.md5);
-
-cifs_crypto_shash_allocate_ret1:
-	crypto_free_shash(server->ntlmssp.hmacmd5);
-
-	return rc;
 }
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index c9d0cfc086eb..49563e0c1725 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -123,19 +123,12 @@ struct cifs_cred {
 	struct cifs_ace *aces;
 };
 
-struct sdesc {
-	struct shash_desc shash;
-	char ctx[];
-};
-
 struct ntlmssp_auth {
 	__u32 client_flags;
 	__u32 server_flags;
 	unsigned char ciphertext[CIFS_CPHTXT_SIZE];
 	struct crypto_shash *hmacmd5;
 	struct crypto_shash *md5;
-	struct sdesc *sdeschmacmd5;
-	struct sdesc *sdescmd5;
 };
 
 /*
-- 
cgit v1.2.2


From 745e507a9c79c6e1385d3414d5e56f3d4621a375 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Wed, 8 Sep 2010 21:09:27 +0000
Subject: Revert "missing changes during ntlmv2/ntlmssp auth and sign"

This reverts commit 3ec6bbcdb4e85403f2c5958876ca9492afdf4031.

    The change to kernel crypto and fixes to ntlvm2 and ntlmssp
    series, introduced a regression.  Deferring this patch series
    to 2.6.37 after Shirish fixes it.

Signed-off-by: Steve French <sfrench@us.ibm.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
CC: Shirish Pargaonkar <shirishp@us.ibm.com>
---
 fs/cifs/cifsencrypt.c |  2 --
 fs/cifs/sess.c        | 13 +++++--------
 2 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index eef78c24e0cc..051d00011ca3 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -553,8 +553,6 @@ calc_seckey(struct TCP_Server_Info *server)
 		return 1;
 	}
 
-	desc.tfm = tfm_arc4;
-
 	crypto_blkcipher_setkey(tfm_arc4,
 		server->session_key.data.ntlmv2.key, CIFS_CPHTXT_SIZE);
 	sg_init_one(&sgin, sec_key, CIFS_CPHTXT_SIZE);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 4788e16a02cc..41fc5328120d 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -408,8 +408,6 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
 	/* BB spec says that if AvId field of MsvAvTimestamp is populated then
 		we must set the MIC field of the AUTHENTICATE_MESSAGE */
 
-	ses->server->ntlmssp.server_flags = le32_to_cpu(pblob->NegotiateFlags);
-
 	tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset);
 	tilen = cpu_to_le16(pblob->TargetInfoArray.Length);
 	ses->server->tilen = tilen;
@@ -442,13 +440,12 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
 	/* BB is NTLMV2 session security format easier to use here? */
 	flags = NTLMSSP_NEGOTIATE_56 |	NTLMSSP_REQUEST_TARGET |
 		NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
-		NTLMSSP_NEGOTIATE_NTLM;
+		NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM;
 	if (ses->server->secMode &
-	   (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
-		flags |= NTLMSSP_NEGOTIATE_SIGN |
-			NTLMSSP_NEGOTIATE_KEY_XCH |
-			NTLMSSP_NEGOTIATE_EXTENDED_SEC;
-	}
+	   (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+		flags |= NTLMSSP_NEGOTIATE_SIGN;
+	if (ses->server->secMode & SECMODE_SIGN_REQUIRED)
+		flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
 
 	sec_blob->NegotiateFlags |= cpu_to_le32(flags);
 
-- 
cgit v1.2.2


From c8e56f1f4fb9f82f63e4ce6d73a14501d0432c76 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Wed, 8 Sep 2010 21:10:58 +0000
Subject: Revert "[CIFS] Fix ntlmv2 auth with ntlmssp"

This reverts commit 9fbc590860e75785bdaf8b83e48fabfe4d4f7d58.

The change to kernel crypto and fixes to ntlvm2 and ntlmssp
series, introduced a regression.  Deferring this patch series
to 2.6.37 after Shirish fixes it.

Signed-off-by: Steve French <sfrench@us.ibm.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
CC: Shirish Pargaonkar <shirishp@us.ibm.com>
---
 fs/cifs/Kconfig       |   2 -
 fs/cifs/asn1.c        |   6 +-
 fs/cifs/cifsencrypt.c | 416 +++++++++++++++-----------------------------------
 fs/cifs/cifsglob.h    |  18 +--
 fs/cifs/cifspdu.h     |   7 +-
 fs/cifs/cifsproto.h   |  12 +-
 fs/cifs/cifssmb.c     |  13 +-
 fs/cifs/connect.c     |  13 +-
 fs/cifs/ntlmssp.h     |  13 --
 fs/cifs/sess.c        | 118 ++++----------
 fs/cifs/transport.c   |   6 +-
 11 files changed, 172 insertions(+), 452 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 0da1debd499d..917b7d449bb2 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -2,8 +2,6 @@ config CIFS
 	tristate "CIFS support (advanced network filesystem, SMBFS successor)"
 	depends on INET
 	select NLS
-	select CRYPTO_MD5
-	select CRYPTO_ARC4
 	help
 	  This is the client VFS module for the Common Internet File System
 	  (CIFS) protocol which is the successor to the Server Message Block
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 21f0fbd86989..cfd1ce34e0bc 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -597,13 +597,13 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 				if (compare_oid(oid, oidlen, MSKRB5_OID,
 						MSKRB5_OID_LEN))
 					server->sec_mskerberos = true;
-				if (compare_oid(oid, oidlen, KRB5U2U_OID,
+				else if (compare_oid(oid, oidlen, KRB5U2U_OID,
 						     KRB5U2U_OID_LEN))
 					server->sec_kerberosu2u = true;
-				if (compare_oid(oid, oidlen, KRB5_OID,
+				else if (compare_oid(oid, oidlen, KRB5_OID,
 						     KRB5_OID_LEN))
 					server->sec_kerberos = true;
-				if (compare_oid(oid, oidlen, NTLMSSP_OID,
+				else if (compare_oid(oid, oidlen, NTLMSSP_OID,
 						     NTLMSSP_OID_LEN))
 					server->sec_ntlmssp = true;
 
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 051d00011ca3..847628dfdc44 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -27,7 +27,6 @@
 #include "md5.h"
 #include "cifs_unicode.h"
 #include "cifsproto.h"
-#include "ntlmssp.h"
 #include <linux/ctype.h>
 #include <linux/random.h>
 
@@ -43,44 +42,21 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
 		       unsigned char *p24);
 
 static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
-			struct TCP_Server_Info *server, char *signature)
+				    const struct mac_key *key, char *signature)
 {
-	int rc = 0;
-	struct {
-		struct shash_desc shash;
-		char ctx[crypto_shash_descsize(server->ntlmssp.md5)];
-	} sdesc;
+	struct	MD5Context context;
 
-	if (cifs_pdu == NULL || server == NULL || signature == NULL)
+	if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL))
 		return -EINVAL;
 
-	sdesc.shash.tfm = server->ntlmssp.md5;
-	sdesc.shash.flags = 0x0;
-
-	rc = crypto_shash_init(&sdesc.shash);
-	if (rc) {
-		cERROR(1, "could not initialize master crypto API hmacmd5\n");
-		return rc;
-	}
-
-	if (server->secType == RawNTLMSSP)
-		crypto_shash_update(&sdesc.shash,
-			server->session_key.data.ntlmv2.key,
-			CIFS_NTLMV2_SESSKEY_SIZE);
-	else
-		crypto_shash_update(&sdesc.shash,
-			(char *)&server->session_key.data,
-			server->session_key.len);
-
-	crypto_shash_update(&sdesc.shash,
-			cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
-
-	rc = crypto_shash_final(&sdesc.shash, signature);
+	cifs_MD5_init(&context);
+	cifs_MD5_update(&context, (char *)&key->data, key->len);
+	cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
 
+	cifs_MD5_final(signature, &context);
 	return 0;
 }
 
-
 int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
 		  __u32 *pexpected_response_sequence_number)
 {
@@ -102,7 +78,8 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
 	server->sequence_number++;
 	spin_unlock(&GlobalMid_Lock);
 
-	rc = cifs_calculate_signature(cifs_pdu, server, smb_signature);
+	rc = cifs_calculate_signature(cifs_pdu, &server->mac_signing_key,
+				      smb_signature);
 	if (rc)
 		memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
 	else
@@ -112,36 +89,16 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
 }
 
 static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
-			struct TCP_Server_Info *server, char *signature)
+				const struct mac_key *key, char *signature)
 {
+	struct  MD5Context context;
 	int i;
-	int rc = 0;
-	struct {
-		struct shash_desc shash;
-		char ctx[crypto_shash_descsize(server->ntlmssp.md5)];
-	} sdesc;
 
-	if (iov == NULL || server == NULL || signature == NULL)
+	if ((iov == NULL) || (signature == NULL) || (key == NULL))
 		return -EINVAL;
 
-	sdesc.shash.tfm = server->ntlmssp.md5;
-	sdesc.shash.flags = 0x0;
-
-	rc = crypto_shash_init(&sdesc.shash);
-	if (rc) {
-		cERROR(1, "could not initialize master crypto API hmacmd5\n");
-		return rc;
-	}
-
-	if (server->secType == RawNTLMSSP)
-		crypto_shash_update(&sdesc.shash,
-			server->session_key.data.ntlmv2.key,
-			CIFS_NTLMV2_SESSKEY_SIZE);
-	else
-		crypto_shash_update(&sdesc.shash,
-			(char *)&server->session_key.data,
-			server->session_key.len);
-
+	cifs_MD5_init(&context);
+	cifs_MD5_update(&context, (char *)&key->data, key->len);
 	for (i = 0; i < n_vec; i++) {
 		if (iov[i].iov_len == 0)
 			continue;
@@ -154,18 +111,18 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
 		if (i == 0) {
 			if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
 				break; /* nothing to sign or corrupt header */
-			crypto_shash_update(&sdesc.shash,
-				iov[i].iov_base + 4, iov[i].iov_len - 4);
+			cifs_MD5_update(&context, iov[0].iov_base+4,
+				  iov[0].iov_len-4);
 		} else
-			crypto_shash_update(&sdesc.shash,
-				iov[i].iov_base, iov[i].iov_len);
+			cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len);
 	}
 
-	rc = crypto_shash_final(&sdesc.shash, signature);
+	cifs_MD5_final(signature, &context);
 
 	return 0;
 }
 
+
 int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
 		   __u32 *pexpected_response_sequence_number)
 {
@@ -188,7 +145,8 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
 	server->sequence_number++;
 	spin_unlock(&GlobalMid_Lock);
 
-	rc = cifs_calc_signature2(iov, n_vec, server, smb_signature);
+	rc = cifs_calc_signature2(iov, n_vec, &server->mac_signing_key,
+				      smb_signature);
 	if (rc)
 		memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
 	else
@@ -198,14 +156,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
 }
 
 int cifs_verify_signature(struct smb_hdr *cifs_pdu,
-			  struct TCP_Server_Info *server,
+			  const struct mac_key *mac_key,
 			  __u32 expected_sequence_number)
 {
-	int rc;
+	unsigned int rc;
 	char server_response_sig[8];
 	char what_we_think_sig_should_be[20];
 
-	if (cifs_pdu == NULL || server == NULL)
+	if ((cifs_pdu == NULL) || (mac_key == NULL))
 		return -EINVAL;
 
 	if (cifs_pdu->Command == SMB_COM_NEGOTIATE)
@@ -234,7 +192,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
 					cpu_to_le32(expected_sequence_number);
 	cifs_pdu->Signature.Sequence.Reserved = 0;
 
-	rc = cifs_calculate_signature(cifs_pdu, server,
+	rc = cifs_calculate_signature(cifs_pdu, mac_key,
 		what_we_think_sig_should_be);
 
 	if (rc)
@@ -251,7 +209,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
 }
 
 /* We fill in key by putting in 40 byte array which was allocated by caller */
-int cifs_calculate_session_key(struct session_key *key, const char *rn,
+int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
 			   const char *password)
 {
 	char temp_key[16];
@@ -265,6 +223,63 @@ int cifs_calculate_session_key(struct session_key *key, const char *rn,
 	return 0;
 }
 
+int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *ses,
+			       const struct nls_table *nls_info)
+{
+	char temp_hash[16];
+	struct HMACMD5Context ctx;
+	char *ucase_buf;
+	__le16 *unicode_buf;
+	unsigned int i, user_name_len, dom_name_len;
+
+	if (ses == NULL)
+		return -EINVAL;
+
+	E_md4hash(ses->password, temp_hash);
+
+	hmac_md5_init_limK_to_64(temp_hash, 16, &ctx);
+	user_name_len = strlen(ses->userName);
+	if (user_name_len > MAX_USERNAME_SIZE)
+		return -EINVAL;
+	if (ses->domainName == NULL)
+		return -EINVAL; /* BB should we use CIFS_LINUX_DOM */
+	dom_name_len = strlen(ses->domainName);
+	if (dom_name_len > MAX_USERNAME_SIZE)
+		return -EINVAL;
+
+	ucase_buf = kmalloc((MAX_USERNAME_SIZE+1), GFP_KERNEL);
+	if (ucase_buf == NULL)
+		return -ENOMEM;
+	unicode_buf = kmalloc((MAX_USERNAME_SIZE+1)*4, GFP_KERNEL);
+	if (unicode_buf == NULL) {
+		kfree(ucase_buf);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < user_name_len; i++)
+		ucase_buf[i] = nls_info->charset2upper[(int)ses->userName[i]];
+	ucase_buf[i] = 0;
+	user_name_len = cifs_strtoUCS(unicode_buf, ucase_buf,
+				      MAX_USERNAME_SIZE*2, nls_info);
+	unicode_buf[user_name_len] = 0;
+	user_name_len++;
+
+	for (i = 0; i < dom_name_len; i++)
+		ucase_buf[i] = nls_info->charset2upper[(int)ses->domainName[i]];
+	ucase_buf[i] = 0;
+	dom_name_len = cifs_strtoUCS(unicode_buf+user_name_len, ucase_buf,
+				     MAX_USERNAME_SIZE*2, nls_info);
+
+	unicode_buf[user_name_len + dom_name_len] = 0;
+	hmac_md5_update((const unsigned char *) unicode_buf,
+		(user_name_len+dom_name_len)*2, &ctx);
+
+	hmac_md5_final(ses->server->ntlmv2_hash, &ctx);
+	kfree(ucase_buf);
+	kfree(unicode_buf);
+	return 0;
+}
+
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
 			char *lnm_session_key)
@@ -309,29 +324,21 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
 {
 	int rc = 0;
 	int len;
-	char nt_hash[CIFS_NTHASH_SIZE];
+	char nt_hash[16];
+	struct HMACMD5Context *pctxt;
 	wchar_t *user;
 	wchar_t *domain;
-	wchar_t *server;
-	struct {
-		struct shash_desc shash;
-		char ctx[crypto_shash_descsize(ses->server->ntlmssp.hmacmd5)];
-	} sdesc;
 
-	/* calculate md4 hash of password */
-	E_md4hash(ses->password, nt_hash);
+	pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL);
 
-	sdesc.shash.tfm = ses->server->ntlmssp.hmacmd5;
-	sdesc.shash.flags = 0x0;
+	if (pctxt == NULL)
+		return -ENOMEM;
 
-	crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, nt_hash,
-				CIFS_NTHASH_SIZE);
+	/* calculate md4 hash of password */
+	E_md4hash(ses->password, nt_hash);
 
-	rc = crypto_shash_init(&sdesc.shash);
-	if (rc) {
-		cERROR(1, "could not initialize master crypto API hmacmd5\n");
-		return rc;
-	}
+	/* convert Domainname to unicode and uppercase */
+	hmac_md5_init_limK_to_64(nt_hash, 16, pctxt);
 
 	/* convert ses->userName to unicode and uppercase */
 	len = strlen(ses->userName);
@@ -340,8 +347,7 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
 		goto calc_exit_2;
 	len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
 	UniStrupr(user);
-
-	crypto_shash_update(&sdesc.shash, (char *)user, 2 * len);
+	hmac_md5_update((char *)user, 2*len, pctxt);
 
 	/* convert ses->domainName to unicode and uppercase */
 	if (ses->domainName) {
@@ -357,243 +363,65 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
 		   Maybe converting the domain name earlier makes sense */
 		/* UniStrupr(domain); */
 
-		crypto_shash_update(&sdesc.shash, (char *)domain, 2 * len);
+		hmac_md5_update((char *)domain, 2*len, pctxt);
 
 		kfree(domain);
-	} else if (ses->serverName) {
-		len = strlen(ses->serverName);
-
-		server = kmalloc(2 + (len * 2), GFP_KERNEL);
-		if (server == NULL)
-			goto calc_exit_1;
-		len = cifs_strtoUCS((__le16 *)server, ses->serverName, len,
-					nls_cp);
-		/* the following line was removed since it didn't work well
-		   with lower cased domain name that passed as an option.
-		   Maybe converting the domain name earlier makes sense */
-		/* UniStrupr(domain); */
-
-		crypto_shash_update(&sdesc.shash, (char *)server, 2 * len);
-
-		kfree(server);
 	}
 calc_exit_1:
 	kfree(user);
 calc_exit_2:
 	/* BB FIXME what about bytes 24 through 40 of the signing key?
 	   compare with the NTLM example */
-	rc = crypto_shash_final(&sdesc.shash, ses->server->ntlmv2_hash);
-
-	return rc;
-}
-
-static int
-find_domain_name(struct cifsSesInfo *ses)
-{
-	int rc = 0;
-	unsigned int attrsize;
-	unsigned int type;
-	unsigned char *blobptr;
-	struct ntlmssp2_name *attrptr;
-
-	if (ses->server->tiblob) {
-		blobptr = ses->server->tiblob;
-		attrptr = (struct ntlmssp2_name *) blobptr;
-
-		while ((type = attrptr->type) != 0) {
-			blobptr += 2; /* advance attr type */
-			attrsize = attrptr->length;
-			blobptr += 2; /* advance attr size */
-			if (type == NTLMSSP_AV_NB_DOMAIN_NAME) {
-				if (!ses->domainName) {
-					ses->domainName =
-						kmalloc(attrptr->length + 1,
-								GFP_KERNEL);
-					if (!ses->domainName)
-							return -ENOMEM;
-					cifs_from_ucs2(ses->domainName,
-						(__le16 *)blobptr,
-						attrptr->length,
-						attrptr->length,
-						load_nls_default(), false);
-				}
-			}
-			blobptr += attrsize; /* advance attr  value */
-			attrptr = (struct ntlmssp2_name *) blobptr;
-		}
-	} else {
-		ses->server->tilen = 2 * sizeof(struct ntlmssp2_name);
-		ses->server->tiblob = kmalloc(ses->server->tilen, GFP_KERNEL);
-		if (!ses->server->tiblob) {
-			ses->server->tilen = 0;
-			cERROR(1, "Challenge target info allocation failure");
-			return -ENOMEM;
-		}
-		memset(ses->server->tiblob, 0x0, ses->server->tilen);
-		attrptr = (struct ntlmssp2_name *) ses->server->tiblob;
-		attrptr->type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE);
-	}
+	hmac_md5_final(ses->server->ntlmv2_hash, pctxt);
 
+	kfree(pctxt);
 	return rc;
 }
 
-static int
-CalcNTLMv2_response(const struct TCP_Server_Info *server,
-			 char *v2_session_response)
-{
-	int rc;
-	struct {
-		struct shash_desc shash;
-		char ctx[crypto_shash_descsize(server->ntlmssp.hmacmd5)];
-	} sdesc;
-
-	sdesc.shash.tfm = server->ntlmssp.hmacmd5;
-	sdesc.shash.flags = 0x0;
-
-	crypto_shash_setkey(server->ntlmssp.hmacmd5, server->ntlmv2_hash,
-		CIFS_HMAC_MD5_HASH_SIZE);
-
-	rc = crypto_shash_init(&sdesc.shash);
-	if (rc) {
-		cERROR(1, "could not initialize master crypto API hmacmd5\n");
-		return rc;
-	}
-
-	memcpy(v2_session_response + CIFS_SERVER_CHALLENGE_SIZE,
-		server->cryptKey, CIFS_SERVER_CHALLENGE_SIZE);
-	crypto_shash_update(&sdesc.shash,
-		v2_session_response + CIFS_SERVER_CHALLENGE_SIZE,
-		sizeof(struct ntlmv2_resp) - CIFS_SERVER_CHALLENGE_SIZE);
-
-	if (server->tilen)
-		crypto_shash_update(&sdesc.shash,
-					server->tiblob, server->tilen);
-
-	rc = crypto_shash_final(&sdesc.shash, v2_session_response);
-
-	return rc;
-}
-
-int
-setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
+void setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
 		      const struct nls_table *nls_cp)
 {
-	int rc = 0;
+	int rc;
 	struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf;
-	struct {
-		struct shash_desc shash;
-		char ctx[crypto_shash_descsize(ses->server->ntlmssp.hmacmd5)];
-	} sdesc;
+	struct HMACMD5Context context;
 
 	buf->blob_signature = cpu_to_le32(0x00000101);
 	buf->reserved = 0;
 	buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
 	get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
 	buf->reserved2 = 0;
-
-	if (!ses->domainName) {
-		rc = find_domain_name(ses);
-		if (rc) {
-			cERROR(1, "could not get domain/server name rc %d", rc);
-			return rc;
-		}
-	}
+	buf->names[0].type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE);
+	buf->names[0].length = 0;
+	buf->names[1].type = 0;
+	buf->names[1].length = 0;
 
 	/* calculate buf->ntlmv2_hash */
 	rc = calc_ntlmv2_hash(ses, nls_cp);
-	if (rc) {
-		cERROR(1, "could not get v2 hash rc %d", rc);
-		return rc;
-	}
-	rc = CalcNTLMv2_response(ses->server, resp_buf);
-	if (rc) {
+	if (rc)
 		cERROR(1, "could not get v2 hash rc %d", rc);
-		return rc;
-	}
-
-	crypto_shash_setkey(ses->server->ntlmssp.hmacmd5,
-			ses->server->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
-
-	sdesc.shash.tfm = ses->server->ntlmssp.hmacmd5;
-	sdesc.shash.flags = 0x0;
-
-	rc = crypto_shash_init(&sdesc.shash);
-	if (rc) {
-		cERROR(1, "could not initialize master crypto API hmacmd5\n");
-		return rc;
-	}
-
-	crypto_shash_update(&sdesc.shash, resp_buf, CIFS_HMAC_MD5_HASH_SIZE);
+	CalcNTLMv2_response(ses, resp_buf);
 
-	rc = crypto_shash_final(&sdesc.shash,
-		ses->server->session_key.data.ntlmv2.key);
+	/* now calculate the MAC key for NTLMv2 */
+	hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context);
+	hmac_md5_update(resp_buf, 16, &context);
+	hmac_md5_final(ses->server->mac_signing_key.data.ntlmv2.key, &context);
 
-	memcpy(&ses->server->session_key.data.ntlmv2.resp, resp_buf,
-			sizeof(struct ntlmv2_resp));
-	ses->server->session_key.len = 16 + sizeof(struct ntlmv2_resp);
-
-	return rc;
+	memcpy(&ses->server->mac_signing_key.data.ntlmv2.resp, resp_buf,
+	       sizeof(struct ntlmv2_resp));
+	ses->server->mac_signing_key.len = 16 + sizeof(struct ntlmv2_resp);
 }
 
-int
-calc_seckey(struct TCP_Server_Info *server)
-{
-	int rc;
-	unsigned char sec_key[CIFS_NTLMV2_SESSKEY_SIZE];
-	struct crypto_blkcipher *tfm_arc4;
-	struct scatterlist sgin, sgout;
-	struct blkcipher_desc desc;
-
-	get_random_bytes(sec_key, CIFS_NTLMV2_SESSKEY_SIZE);
-
-	tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)",
-						0, CRYPTO_ALG_ASYNC);
-	if (!tfm_arc4 || IS_ERR(tfm_arc4)) {
-		cERROR(1, "could not allocate " "master crypto API arc4\n");
-		return 1;
-	}
-
-	crypto_blkcipher_setkey(tfm_arc4,
-		server->session_key.data.ntlmv2.key, CIFS_CPHTXT_SIZE);
-	sg_init_one(&sgin, sec_key, CIFS_CPHTXT_SIZE);
-	sg_init_one(&sgout, server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE);
-	rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE);
-
-	if (!rc)
-		memcpy(server->session_key.data.ntlmv2.key,
-				sec_key, CIFS_NTLMV2_SESSKEY_SIZE);
-
-	crypto_free_blkcipher(tfm_arc4);
-
-	return 0;
-}
-
-void
-cifs_crypto_shash_release(struct TCP_Server_Info *server)
-{
-	if (server->ntlmssp.md5)
-		crypto_free_shash(server->ntlmssp.md5);
-
-	if (server->ntlmssp.hmacmd5)
-		crypto_free_shash(server->ntlmssp.hmacmd5);
-}
-
-int
-cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
+void CalcNTLMv2_response(const struct cifsSesInfo *ses,
+			 char *v2_session_response)
 {
-	server->ntlmssp.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
-	if (!server->ntlmssp.hmacmd5 ||
-			IS_ERR(server->ntlmssp.hmacmd5)) {
-		cERROR(1, "could not allocate master crypto API hmacmd5\n");
-		return 1;
-	}
+	struct HMACMD5Context context;
+	/* rest of v2 struct already generated */
+	memcpy(v2_session_response + 8, ses->server->cryptKey, 8);
+	hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context);
 
-	server->ntlmssp.md5 = crypto_alloc_shash("md5", 0, 0);
-	if (!server->ntlmssp.md5 || IS_ERR(server->ntlmssp.md5)) {
-		crypto_free_shash(server->ntlmssp.hmacmd5);
-		cERROR(1, "could not allocate master crypto API md5\n");
-		return 1;
-	}
+	hmac_md5_update(v2_session_response+8,
+			sizeof(struct ntlmv2_resp) - 8, &context);
 
-	return 0;
+	hmac_md5_final(v2_session_response, &context);
+/*	cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */
 }
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 49563e0c1725..0cdfb8c32ac6 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -25,9 +25,6 @@
 #include <linux/workqueue.h>
 #include "cifs_fs_sb.h"
 #include "cifsacl.h"
-#include <crypto/internal/hash.h>
-#include <linux/scatterlist.h>
-
 /*
  * The sizes of various internal tables and strings
  */
@@ -100,7 +97,7 @@ enum protocolEnum {
 	/* Netbios frames protocol not supported at this time */
 };
 
-struct session_key {
+struct mac_key {
 	unsigned int len;
 	union {
 		char ntlm[CIFS_SESS_KEY_SIZE + 16];
@@ -123,14 +120,6 @@ struct cifs_cred {
 	struct cifs_ace *aces;
 };
 
-struct ntlmssp_auth {
-	__u32 client_flags;
-	__u32 server_flags;
-	unsigned char ciphertext[CIFS_CPHTXT_SIZE];
-	struct crypto_shash *hmacmd5;
-	struct crypto_shash *md5;
-};
-
 /*
  *****************************************************************
  * Except the CIFS PDUs themselves all the
@@ -193,14 +182,11 @@ struct TCP_Server_Info {
 	/* 16th byte of RFC1001 workstation name is always null */
 	char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
 	__u32 sequence_number; /* needed for CIFS PDU signature */
-	struct session_key session_key;
+	struct mac_key mac_signing_key;
 	char ntlmv2_hash[16];
 	unsigned long lstrp; /* when we got last response from this server */
 	u16 dialect; /* dialect index that server chose */
 	/* extended security flavors that server supports */
-	unsigned int tilen; /* length of the target info blob */
-	unsigned char *tiblob; /* target info blob in challenge response */
-	struct ntlmssp_auth ntlmssp; /* various keys, ciphers, flags */
 	bool	sec_kerberos;		/* supports plain Kerberos */
 	bool	sec_mskerberos;		/* supports legacy MS Kerberos */
 	bool	sec_kerberosu2u;	/* supports U2U Kerberos */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 320e0fd0ba7b..14d036d8db11 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -134,12 +134,6 @@
  * Size of the session key (crypto key encrypted with the password
  */
 #define CIFS_SESS_KEY_SIZE (24)
-#define CIFS_CLIENT_CHALLENGE_SIZE (8)
-#define CIFS_SERVER_CHALLENGE_SIZE (8)
-#define CIFS_HMAC_MD5_HASH_SIZE (16)
-#define CIFS_CPHTXT_SIZE (16)
-#define CIFS_NTLMV2_SESSKEY_SIZE (16)
-#define CIFS_NTHASH_SIZE (16)
 
 /*
  * Maximum user name length
@@ -669,6 +663,7 @@ struct ntlmv2_resp {
 	__le64  time;
 	__u64  client_chal; /* random */
 	__u32  reserved2;
+	struct ntlmssp2_name names[2];
 	/* array of name entries could follow ending in minimum 4 byte struct */
 } __attribute__((packed));
 
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1378d9133844..1f5450814087 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -361,15 +361,15 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
 extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
 			  __u32 *);
 extern int cifs_verify_signature(struct smb_hdr *,
-				 struct TCP_Server_Info *server,
+				 const struct mac_key *mac_key,
 				__u32 expected_sequence_number);
-extern int cifs_calculate_session_key(struct session_key *key, const char *rn,
+extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
 				 const char *pass);
-extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
+extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *,
+			const struct nls_table *);
+extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
+extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
 			     const struct nls_table *);
-extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
-extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
-extern int calc_seckey(struct TCP_Server_Info *);
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 extern void calc_lanman_hash(const char *password, const char *cryptkey,
 				bool encrypt, char *lnm_session_key);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4bda920d1f75..c65c3419dd37 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -604,14 +604,11 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 			else
 				rc = -EINVAL;
 
-			if (server->secType == Kerberos) {
-				if (!server->sec_kerberos &&
-						!server->sec_mskerberos)
-					rc = -EOPNOTSUPP;
-			} else if (server->secType == RawNTLMSSP) {
-				if (!server->sec_ntlmssp)
-					rc = -EOPNOTSUPP;
-			} else
+			if (server->sec_kerberos || server->sec_mskerberos)
+				server->secType = Kerberos;
+			else if (server->sec_ntlmssp)
+				server->secType = RawNTLMSSP;
+			else
 				rc = -EOPNOTSUPP;
 		}
 	} else
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ec0ea4a43bdb..0ea52e9f9065 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1708,7 +1708,6 @@ cifs_put_smb_ses(struct cifsSesInfo *ses)
 		CIFSSMBLogoff(xid, ses);
 		_FreeXid(xid);
 	}
-	cifs_crypto_shash_release(server);
 	sesInfoFree(ses);
 	cifs_put_tcp_session(server);
 }
@@ -1788,23 +1787,13 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 	ses->linux_uid = volume_info->linux_uid;
 	ses->overrideSecFlg = volume_info->secFlg;
 
-	rc = cifs_crypto_shash_allocate(server);
-	if (rc) {
-		cERROR(1, "could not setup hash structures rc %d", rc);
-		goto get_ses_fail;
-	}
-	server->tilen = 0;
-	server->tiblob = NULL;
-
 	mutex_lock(&ses->session_mutex);
 	rc = cifs_negotiate_protocol(xid, ses);
 	if (!rc)
 		rc = cifs_setup_session(xid, ses, volume_info->local_nls);
 	mutex_unlock(&ses->session_mutex);
-	if (rc) {
-		cifs_crypto_shash_release(ses->server);
+	if (rc)
 		goto get_ses_fail;
-	}
 
 	/* success, put it on the list */
 	write_lock(&cifs_tcp_ses_lock);
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index 1db0f0746a5b..49c9a4e75319 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -61,19 +61,6 @@
 #define NTLMSSP_NEGOTIATE_KEY_XCH   0x40000000
 #define NTLMSSP_NEGOTIATE_56        0x80000000
 
-/* Define AV Pair Field IDs */
-#define NTLMSSP_AV_EOL			0
-#define NTLMSSP_AV_NB_COMPUTER_NAME	1
-#define NTLMSSP_AV_NB_DOMAIN_NAME	2
-#define NTLMSSP_AV_DNS_COMPUTER_NAME	3
-#define NTLMSSP_AV_DNS_DOMAIN_NAME	4
-#define NTLMSSP_AV_DNS_TREE_NAME	5
-#define NTLMSSP_AV_FLAGS		6
-#define NTLMSSP_AV_TIMESTAMP		7
-#define NTLMSSP_AV_RESTRICTION		8
-#define NTLMSSP_AV_TARGET_NAME		9
-#define NTLMSSP_AV_CHANNEL_BINDINGS	10
-
 /* Although typedefs are not commonly used for structure definitions */
 /* in the Linux kernel, in this particular case they are useful      */
 /* to more closely match the standards document for NTLMSSP from     */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 41fc5328120d..0a57cb7db5dd 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -383,9 +383,6 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
 static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
 				    struct cifsSesInfo *ses)
 {
-	unsigned int tioffset; /* challeng message target info area */
-	unsigned int tilen; /* challeng message target info area length  */
-
 	CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr;
 
 	if (blob_len < sizeof(CHALLENGE_MESSAGE)) {
@@ -408,18 +405,6 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
 	/* BB spec says that if AvId field of MsvAvTimestamp is populated then
 		we must set the MIC field of the AUTHENTICATE_MESSAGE */
 
-	tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset);
-	tilen = cpu_to_le16(pblob->TargetInfoArray.Length);
-	ses->server->tilen = tilen;
-	if (tilen) {
-		ses->server->tiblob = kmalloc(tilen, GFP_KERNEL);
-		if (!ses->server->tiblob) {
-			cERROR(1, "Challenge target info allocation failure");
-			return -ENOMEM;
-		}
-		memcpy(ses->server->tiblob,  bcc_ptr + tioffset, tilen);
-	}
-
 	return 0;
 }
 
@@ -466,12 +451,10 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 				   struct cifsSesInfo *ses,
 				   const struct nls_table *nls_cp, bool first)
 {
-	int rc;
-	unsigned int size;
 	AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
 	__u32 flags;
 	unsigned char *tmp;
-	struct ntlmv2_resp ntlmv2_response = {};
+	char ntlm_session_key[CIFS_SESS_KEY_SIZE];
 
 	memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
 	sec_blob->MessageType = NtLmAuthenticate;
@@ -494,25 +477,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 	sec_blob->LmChallengeResponse.Length = 0;
 	sec_blob->LmChallengeResponse.MaximumLength = 0;
 
-	sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
-	rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp);
-	if (rc) {
-		cERROR(1, "error rc: %d during ntlmssp ntlmv2 setup", rc);
-		goto setup_ntlmv2_ret;
-	}
-	size =  sizeof(struct ntlmv2_resp);
-	memcpy(tmp, (char *)&ntlmv2_response, size);
-	tmp += size;
-	if (ses->server->tilen > 0) {
-		memcpy(tmp, ses->server->tiblob, ses->server->tilen);
-		tmp += ses->server->tilen;
-	} else
-		ses->server->tilen = 0;
+	/* calculate session key,  BB what about adding similar ntlmv2 path? */
+	SMBNTencrypt(ses->password, ses->server->cryptKey, ntlm_session_key);
+	if (first)
+		cifs_calculate_mac_key(&ses->server->mac_signing_key,
+				       ntlm_session_key, ses->password);
 
-	sec_blob->NtChallengeResponse.Length = cpu_to_le16(size +
-				ses->server->tilen);
+	memcpy(tmp, ntlm_session_key, CIFS_SESS_KEY_SIZE);
+	sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
+	sec_blob->NtChallengeResponse.Length = cpu_to_le16(CIFS_SESS_KEY_SIZE);
 	sec_blob->NtChallengeResponse.MaximumLength =
-		cpu_to_le16(size + ses->server->tilen);
+				cpu_to_le16(CIFS_SESS_KEY_SIZE);
+
+	tmp += CIFS_SESS_KEY_SIZE;
 
 	if (ses->domainName == NULL) {
 		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
@@ -524,6 +501,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 		len = cifs_strtoUCS((__le16 *)tmp, ses->domainName,
 				    MAX_USERNAME_SIZE, nls_cp);
 		len *= 2; /* unicode is 2 bytes each */
+		len += 2; /* trailing null */
 		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
 		sec_blob->DomainName.Length = cpu_to_le16(len);
 		sec_blob->DomainName.MaximumLength = cpu_to_le16(len);
@@ -540,6 +518,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 		len = cifs_strtoUCS((__le16 *)tmp, ses->userName,
 				    MAX_USERNAME_SIZE, nls_cp);
 		len *= 2; /* unicode is 2 bytes each */
+		len += 2; /* trailing null */
 		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
 		sec_blob->UserName.Length = cpu_to_le16(len);
 		sec_blob->UserName.MaximumLength = cpu_to_le16(len);
@@ -551,26 +530,9 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 	sec_blob->WorkstationName.MaximumLength = 0;
 	tmp += 2;
 
-	if ((ses->server->ntlmssp.server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) &&
-			!calc_seckey(ses->server)) {
-		memcpy(tmp, ses->server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE);
-		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
-		sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
-		sec_blob->SessionKey.MaximumLength =
-			cpu_to_le16(CIFS_CPHTXT_SIZE);
-		tmp += CIFS_CPHTXT_SIZE;
-	} else {
-		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
-		sec_blob->SessionKey.Length = 0;
-		sec_blob->SessionKey.MaximumLength = 0;
-	}
-
-	ses->server->sequence_number = 0;
-
-setup_ntlmv2_ret:
-	if (ses->server->tilen > 0)
-		kfree(ses->server->tiblob);
-
+	sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+	sec_blob->SessionKey.Length = 0;
+	sec_blob->SessionKey.MaximumLength = 0;
 	return tmp - pbuffer;
 }
 
@@ -584,14 +546,15 @@ static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB,
 	return;
 }
 
-static int setup_ntlmssp_auth_req(char *ntlmsspblob,
+static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB,
 				  struct cifsSesInfo *ses,
 				  const struct nls_table *nls, bool first_time)
 {
 	int bloblen;
 
-	bloblen = build_ntlmssp_auth_blob(ntlmsspblob, ses, nls,
+	bloblen = build_ntlmssp_auth_blob(&pSMB->req.SecurityBlob[0], ses, nls,
 					  first_time);
+	pSMB->req.SecurityBlobLength = cpu_to_le16(bloblen);
 
 	return bloblen;
 }
@@ -617,7 +580,6 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
 	struct key *spnego_key = NULL;
 	__le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
 	bool first_time;
-	char *ntlmsspblob;
 
 	if (ses == NULL)
 		return -EINVAL;
@@ -728,7 +690,7 @@ ssetup_ntlmssp_authenticate:
 
 		if (first_time) /* should this be moved into common code
 				  with similar ntlmv2 path? */
-			cifs_calculate_session_key(&ses->server->session_key,
+			cifs_calculate_mac_key(&ses->server->mac_signing_key,
 				ntlm_session_key, ses->password);
 		/* copy session key */
 
@@ -767,21 +729,12 @@ ssetup_ntlmssp_authenticate:
 			cpu_to_le16(sizeof(struct ntlmv2_resp));
 
 		/* calculate session key */
-		rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
-		if (rc) {
-			kfree(v2_sess_key);
-			goto ssetup_exit;
-		}
+		setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
 		/* FIXME: calculate MAC key */
 		memcpy(bcc_ptr, (char *)v2_sess_key,
 		       sizeof(struct ntlmv2_resp));
 		bcc_ptr += sizeof(struct ntlmv2_resp);
 		kfree(v2_sess_key);
-		if (ses->server->tilen > 0) {
-			memcpy(bcc_ptr, ses->server->tiblob,
-				ses->server->tilen);
-			bcc_ptr += ses->server->tilen;
-		}
 		if (ses->capabilities & CAP_UNICODE) {
 			if (iov[0].iov_len % 2) {
 				*bcc_ptr = 0;
@@ -812,15 +765,15 @@ ssetup_ntlmssp_authenticate:
 		}
 		/* bail out if key is too long */
 		if (msg->sesskey_len >
-		    sizeof(ses->server->session_key.data.krb5)) {
+		    sizeof(ses->server->mac_signing_key.data.krb5)) {
 			cERROR(1, "Kerberos signing key too long (%u bytes)",
 				msg->sesskey_len);
 			rc = -EOVERFLOW;
 			goto ssetup_exit;
 		}
 		if (first_time) {
-			ses->server->session_key.len = msg->sesskey_len;
-			memcpy(ses->server->session_key.data.krb5,
+			ses->server->mac_signing_key.len = msg->sesskey_len;
+			memcpy(ses->server->mac_signing_key.data.krb5,
 				msg->data, msg->sesskey_len);
 		}
 		pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
@@ -862,26 +815,12 @@ ssetup_ntlmssp_authenticate:
 			if (phase == NtLmNegotiate) {
 				setup_ntlmssp_neg_req(pSMB, ses);
 				iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
-				iov[1].iov_base = &pSMB->req.SecurityBlob[0];
 			} else if (phase == NtLmAuthenticate) {
 				int blob_len;
-				ntlmsspblob = kmalloc(5 *
-					sizeof(struct _AUTHENTICATE_MESSAGE),
-					GFP_KERNEL);
-				if (!ntlmsspblob) {
-					cERROR(1, "Can't allocate NTLMSSP");
-					rc = -ENOMEM;
-					goto ssetup_exit;
-				}
-
-				blob_len = setup_ntlmssp_auth_req(ntlmsspblob,
-								ses,
-								nls_cp,
-								first_time);
+				blob_len = setup_ntlmssp_auth_req(pSMB, ses,
+								  nls_cp,
+								  first_time);
 				iov[1].iov_len = blob_len;
-				iov[1].iov_base = ntlmsspblob;
-				pSMB->req.SecurityBlobLength =
-					cpu_to_le16(blob_len);
 				/* Make sure that we tell the server that we
 				   are using the uid that it just gave us back
 				   on the response (challenge) */
@@ -891,6 +830,7 @@ ssetup_ntlmssp_authenticate:
 				rc = -ENOSYS;
 				goto ssetup_exit;
 			}
+			iov[1].iov_base = &pSMB->req.SecurityBlob[0];
 			/* unicode strings must be word aligned */
 			if ((iov[0].iov_len + iov[1].iov_len) % 2) {
 				*bcc_ptr = 0;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index e0588cdf4cc5..82f78c4d6978 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
 		    (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
 					     SECMODE_SIGN_ENABLED))) {
 			rc = cifs_verify_signature(midQ->resp_buf,
-						ses->server,
+						&ses->server->mac_signing_key,
 						midQ->sequence_number+1);
 			if (rc) {
 				cERROR(1, "Unexpected SMB signature");
@@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
 		    (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
 					     SECMODE_SIGN_ENABLED))) {
 			rc = cifs_verify_signature(out_buf,
-						ses->server,
+						&ses->server->mac_signing_key,
 						midQ->sequence_number+1);
 			if (rc) {
 				cERROR(1, "Unexpected SMB signature");
@@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
 	    (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
 				     SECMODE_SIGN_ENABLED))) {
 		rc = cifs_verify_signature(out_buf,
-					   ses->server,
+					   &ses->server->mac_signing_key,
 					   midQ->sequence_number+1);
 		if (rc) {
 			cERROR(1, "Unexpected SMB signature");
-- 
cgit v1.2.2


From 639e7a913d81f918bfbf506e6ecd54664f787cbd Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 3 Sep 2010 11:50:09 -0400
Subject: cifs: eliminate redundant xdev check in cifs_rename

The VFS always checks that the source and target of a rename are on the
same vfsmount, and hence have the same superblock. So, this check is
redundant. Remove it and simplify the error handling.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/inode.c | 30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 86a164f08a74..93f77d438d3c 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1462,28 +1462,17 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
 {
 	char *fromName = NULL;
 	char *toName = NULL;
-	struct cifs_sb_info *cifs_sb_source;
-	struct cifs_sb_info *cifs_sb_target;
+	struct cifs_sb_info *cifs_sb;
 	struct cifsTconInfo *tcon;
 	FILE_UNIX_BASIC_INFO *info_buf_source = NULL;
 	FILE_UNIX_BASIC_INFO *info_buf_target;
 	int xid, rc, tmprc;
 
-	cifs_sb_target = CIFS_SB(target_dir->i_sb);
-	cifs_sb_source = CIFS_SB(source_dir->i_sb);
-	tcon = cifs_sb_source->tcon;
+	cifs_sb = CIFS_SB(source_dir->i_sb);
+	tcon = cifs_sb->tcon;
 
 	xid = GetXid();
 
-	/*
-	 * BB: this might be allowed if same server, but different share.
-	 * Consider adding support for this
-	 */
-	if (tcon != cifs_sb_target->tcon) {
-		rc = -EXDEV;
-		goto cifs_rename_exit;
-	}
-
 	/*
 	 * we already have the rename sem so we do not need to
 	 * grab it again here to protect the path integrity
@@ -1519,17 +1508,16 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
 		info_buf_target = info_buf_source + 1;
 		tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName,
 					info_buf_source,
-					cifs_sb_source->local_nls,
-					cifs_sb_source->mnt_cifs_flags &
+					cifs_sb->local_nls,
+					cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 		if (tmprc != 0)
 			goto unlink_target;
 
-		tmprc = CIFSSMBUnixQPathInfo(xid, tcon,
-					toName, info_buf_target,
-					cifs_sb_target->local_nls,
-					/* remap based on source sb */
-					cifs_sb_source->mnt_cifs_flags &
+		tmprc = CIFSSMBUnixQPathInfo(xid, tcon, toName,
+					info_buf_target,
+					cifs_sb->local_nls,
+					cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 
 		if (tmprc == 0 && (info_buf_source->UniqueId ==
-- 
cgit v1.2.2


From 4266d9118f85b050a341992f0cfab40d392ef32c Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Wed, 8 Sep 2010 21:17:29 +0000
Subject: [CIFS] ntlmv2/ntlmssp remove-unused-function
 CalcNTLMv2_partial_mac_key

This function is not used, so remove the definition and declaration.

Reviewed-by: Jeff Layton <jlayton@samba.org>
Signed-off-by: Shirish Pargaonkar <shirishpargaonkar@gmail.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsencrypt.c | 57 ---------------------------------------------------
 fs/cifs/cifsproto.h   |  2 --
 2 files changed, 59 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 847628dfdc44..35042d8f7338 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -223,63 +223,6 @@ int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
 	return 0;
 }
 
-int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *ses,
-			       const struct nls_table *nls_info)
-{
-	char temp_hash[16];
-	struct HMACMD5Context ctx;
-	char *ucase_buf;
-	__le16 *unicode_buf;
-	unsigned int i, user_name_len, dom_name_len;
-
-	if (ses == NULL)
-		return -EINVAL;
-
-	E_md4hash(ses->password, temp_hash);
-
-	hmac_md5_init_limK_to_64(temp_hash, 16, &ctx);
-	user_name_len = strlen(ses->userName);
-	if (user_name_len > MAX_USERNAME_SIZE)
-		return -EINVAL;
-	if (ses->domainName == NULL)
-		return -EINVAL; /* BB should we use CIFS_LINUX_DOM */
-	dom_name_len = strlen(ses->domainName);
-	if (dom_name_len > MAX_USERNAME_SIZE)
-		return -EINVAL;
-
-	ucase_buf = kmalloc((MAX_USERNAME_SIZE+1), GFP_KERNEL);
-	if (ucase_buf == NULL)
-		return -ENOMEM;
-	unicode_buf = kmalloc((MAX_USERNAME_SIZE+1)*4, GFP_KERNEL);
-	if (unicode_buf == NULL) {
-		kfree(ucase_buf);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < user_name_len; i++)
-		ucase_buf[i] = nls_info->charset2upper[(int)ses->userName[i]];
-	ucase_buf[i] = 0;
-	user_name_len = cifs_strtoUCS(unicode_buf, ucase_buf,
-				      MAX_USERNAME_SIZE*2, nls_info);
-	unicode_buf[user_name_len] = 0;
-	user_name_len++;
-
-	for (i = 0; i < dom_name_len; i++)
-		ucase_buf[i] = nls_info->charset2upper[(int)ses->domainName[i]];
-	ucase_buf[i] = 0;
-	dom_name_len = cifs_strtoUCS(unicode_buf+user_name_len, ucase_buf,
-				     MAX_USERNAME_SIZE*2, nls_info);
-
-	unicode_buf[user_name_len + dom_name_len] = 0;
-	hmac_md5_update((const unsigned char *) unicode_buf,
-		(user_name_len+dom_name_len)*2, &ctx);
-
-	hmac_md5_final(ses->server->ntlmv2_hash, &ctx);
-	kfree(ucase_buf);
-	kfree(unicode_buf);
-	return 0;
-}
-
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
 			char *lnm_session_key)
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1f5450814087..f399b16cb7d5 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -365,8 +365,6 @@ extern int cifs_verify_signature(struct smb_hdr *,
 				__u32 expected_sequence_number);
 extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
 				 const char *pass);
-extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *,
-			const struct nls_table *);
 extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
 extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
 			     const struct nls_table *);
-- 
cgit v1.2.2


From 522bbe65a2415fabce618186fc7777eb4c502989 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 3 Sep 2010 12:00:49 -0400
Subject: cifs: prevent cifsd from exiting prematurely

When cifs_demultiplex_thread exits, it does a number of cleanup tasks
including freeing the TCP_Server_Info struct. Much of the existing code
in cifs assumes that when there is a cisfSesInfo struct, that it holds a
reference to a valid TCP_Server_Info struct.

We can never allow cifsd to exit when a cifsSesInfo struct is still
holding a reference to the server. The server pointers will then point
to freed memory.

This patch eliminates a couple of questionable conditions where it does
this.  The idea here is to make an -EINTR return from kernel_recvmsg
behave the same way as -ERESTARTSYS or -EAGAIN. If the task was
signalled from cifs_put_tcp_session, then tcpStatus will be CifsExiting,
and the kernel_recvmsg call will return quickly.

There's also another condition where this can occur too -- if the
tcpStatus is still in CifsNew, then it will also exit if the server
closes the socket prematurely.  I think we'll probably also need to fix
that situation, but that requires a bit more consideration.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/connect.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0ea52e9f9065..5f68b968faa7 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -400,7 +400,9 @@ incomplete_rcv:
 			cFYI(1, "call to reconnect done");
 			csocket = server->ssocket;
 			continue;
-		} else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) {
+		} else if (length == -ERESTARTSYS ||
+			   length == -EAGAIN ||
+			   length == -EINTR) {
 			msleep(1); /* minimum sleep to prevent looping
 				allowing socket to clear and app threads to set
 				tcpStatus CifsNeedReconnect if server hung */
@@ -422,10 +424,6 @@ incomplete_rcv:
 				   and so simply return error to mount */
 				break;
 			}
-			if (!try_to_freeze() && (length == -EINTR)) {
-				cFYI(1, "cifsd thread killed");
-				break;
-			}
 			cFYI(1, "Reconnect after unexpected peek error %d",
 				length);
 			cifs_reconnect(server);
@@ -522,8 +520,7 @@ incomplete_rcv:
 		     total_read += length) {
 			length = kernel_recvmsg(csocket, &smb_msg, &iov, 1,
 						pdu_length - total_read, 0);
-			if ((server->tcpStatus == CifsExiting) ||
-			    (length == -EINTR)) {
+			if (server->tcpStatus == CifsExiting) {
 				/* then will exit */
 				reconnect = 2;
 				break;
@@ -534,8 +531,9 @@ incomplete_rcv:
 				/* Now we will reread sock */
 				reconnect = 1;
 				break;
-			} else if ((length == -ERESTARTSYS) ||
-				   (length == -EAGAIN)) {
+			} else if (length == -ERESTARTSYS ||
+				   length == -EAGAIN ||
+				   length == -EINTR) {
 				msleep(1); /* minimum sleep to prevent looping,
 					      allowing socket to clear and app
 					      threads to set tcpStatus
-- 
cgit v1.2.2


From 7332f2a6217ee6925f83ef0e725013067ed316ba Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 3 Sep 2010 12:00:49 -0400
Subject: cifs: eliminate some more premature cifsd exits

If the tcpStatus is still CifsNew, the main cifs_demultiplex_loop can
break out prematurely in some cases. This is wrong as we will almost
always have other structures with pointers to the TCP_Server_Info. If
the main loop breaks under any other condition other than tcpStatus ==
CifsExiting, then it'll face a use-after-free situation.

I don't see any reason to treat a CifsNew tcpStatus differently than
CifsGood. I believe we'll still want to attempt to reconnect in either
case. What should happen in those situations is that the MIDs get marked
as MID_RETRY_NEEDED. This will make CIFSSMBNegotiate return -EAGAIN, and
then the caller can retry the whole thing on a newly reconnected socket.
If that fails again in the same way, the caller of cifs_get_smb_ses
should tear down the TCP_Server_Info struct.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/connect.c | 41 ++++++++++++-----------------------------
 1 file changed, 12 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5f68b968faa7..5fde83f0c75e 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -416,14 +416,6 @@ incomplete_rcv:
 			} else
 				continue;
 		} else if (length <= 0) {
-			if (server->tcpStatus == CifsNew) {
-				cFYI(1, "tcp session abend after SMBnegprot");
-				/* some servers kill the TCP session rather than
-				   returning an SMB negprot error, in which
-				   case reconnecting here is not going to help,
-				   and so simply return error to mount */
-				break;
-			}
 			cFYI(1, "Reconnect after unexpected peek error %d",
 				length);
 			cifs_reconnect(server);
@@ -464,27 +456,18 @@ incomplete_rcv:
 			   an error on SMB negprot response */
 			cFYI(1, "Negative RFC1002 Session Response Error 0x%x)",
 				pdu_length);
-			if (server->tcpStatus == CifsNew) {
-				/* if nack on negprot (rather than
-				ret of smb negprot error) reconnecting
-				not going to help, ret error to mount */
-				break;
-			} else {
-				/* give server a second to
-				clean up before reconnect attempt */
-				msleep(1000);
-				/* always try 445 first on reconnect
-				since we get NACK on some if we ever
-				connected to port 139 (the NACK is
-				since we do not begin with RFC1001
-				session initialize frame) */
-				server->addr.sockAddr.sin_port =
-					htons(CIFS_PORT);
-				cifs_reconnect(server);
-				csocket = server->ssocket;
-				wake_up(&server->response_q);
-				continue;
-			}
+			/* give server a second to clean up  */
+			msleep(1000);
+			/* always try 445 first on reconnect since we get NACK
+			 * on some if we ever connected to port 139 (the NACK
+			 * is since we do not begin with RFC1001 session
+			 * initialize frame)
+			 */
+			server->addr.sockAddr.sin_port = htons(CIFS_PORT);
+			cifs_reconnect(server);
+			csocket = server->ssocket;
+			wake_up(&server->response_q);
+			continue;
 		} else if (temp != (char) 0) {
 			cERROR(1, "Unknown RFC 1002 frame");
 			cifs_dump_mem(" Received Data: ", (char *)smb_buffer,
-- 
cgit v1.2.2


From 32670396e7fc6e4f37451a69339968985461a374 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 3 Sep 2010 12:00:50 -0400
Subject: cifs: prevent possible memory corruption in cifs_demultiplex_thread

cifs_demultiplex_thread sets the addr.sockAddr.sin_port without any
regard for the socket family. While it may be that the error in question
here never occurs on an IPv6 socket, it's probably best to be safe and
set the port properly if it ever does.

Break the port setting code out of cifs_fill_sockaddr and into a new
function, and call that from cifs_demultiplex_thread.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsproto.h |  3 ++-
 fs/cifs/connect.c   |  3 ++-
 fs/cifs/netmisc.c   | 22 +++++++++++++---------
 3 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index f399b16cb7d5..1d60c655e3e0 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -87,8 +87,9 @@ extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
 extern int decode_negTokenInit(unsigned char *security_blob, int length,
 			struct TCP_Server_Info *server);
 extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
+extern int cifs_set_port(struct sockaddr *addr, const unsigned short int port);
 extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
-				unsigned short int port);
+				const unsigned short int port);
 extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
 extern void header_assemble(struct smb_hdr *, char /* command */ ,
 			    const struct cifsTconInfo *, int /* length of
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5fde83f0c75e..67dad54fbfa1 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -463,7 +463,8 @@ incomplete_rcv:
 			 * is since we do not begin with RFC1001 session
 			 * initialize frame)
 			 */
-			server->addr.sockAddr.sin_port = htons(CIFS_PORT);
+			cifs_set_port((struct sockaddr *)
+					&server->addr.sockAddr, CIFS_PORT);
 			cifs_reconnect(server);
 			csocket = server->ssocket;
 			wake_up(&server->response_q);
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index f97851119e6c..9aad47a2d62f 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -206,26 +206,30 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
 }
 
 int
-cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
-		   const unsigned short int port)
+cifs_set_port(struct sockaddr *addr, const unsigned short int port)
 {
-	if (!cifs_convert_address(dst, src, len))
-		return 0;
-
-	switch (dst->sa_family) {
+	switch (addr->sa_family) {
 	case AF_INET:
-		((struct sockaddr_in *)dst)->sin_port = htons(port);
+		((struct sockaddr_in *)addr)->sin_port = htons(port);
 		break;
 	case AF_INET6:
-		((struct sockaddr_in6 *)dst)->sin6_port = htons(port);
+		((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
 		break;
 	default:
 		return 0;
 	}
-
 	return 1;
 }
 
+int
+cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len,
+		   const unsigned short int port)
+{
+	if (!cifs_convert_address(dst, src, len))
+		return 0;
+	return cifs_set_port(dst, port);
+}
+
 /*****************************************************************************
 convert a NT status code to a dos class/code
  *****************************************************************************/
-- 
cgit v1.2.2


From 39aa3cb3e8250db9188a6f1e3fb62ffa1a717678 Mon Sep 17 00:00:00 2001
From: Stefan Bader <stefan.bader@canonical.com>
Date: Tue, 31 Aug 2010 15:52:27 +0200
Subject: mm: Move vma_stack_continue into mm.h

So it can be used by all that need to check for that.

Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 439fc1f1c1c4..271afc48b9a5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -224,7 +224,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 	/* We don't show the stack guard page in /proc/maps */
 	start = vma->vm_start;
 	if (vma->vm_flags & VM_GROWSDOWN)
-		start += PAGE_SIZE;
+		if (!vma_stack_continue(vma->vm_prev, vma->vm_start))
+			start += PAGE_SIZE;
 
 	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
 			start,
-- 
cgit v1.2.2


From 7a801ac6f5067539ceb5fad0fe90ec49fc156e47 Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Thu, 9 Sep 2010 16:37:33 -0700
Subject: O_DIRECT: fix the splitting up of contiguous I/O

commit c2c6ca4 (direct-io: do not merge logically non-contiguous requests)
introduced a bug whereby all O_DIRECT I/Os were submitted a page at a time
to the block layer.  The problem is that the code expected
dio->block_in_file to correspond to the current page in the dio.  In fact,
it corresponds to the previous page submitted via submit_page_section.
This was purely an oversight, as the dio->cur_page_fs_offset field was
introduced for just this purpose.  This patch simply uses the correct
variable when calculating whether there is a mismatch between contiguous
logical blocks and contiguous physical blocks (as described in the
comments).

I also switched the if conditional following this check to an else if, to
ensure that we never call dio_bio_submit twice for the same dio (in
theory, this should not happen, anyway).

I've tested this by running blktrace and verifying that a 64KB I/O was
submitted as a single I/O.  I also ran the patched kernel through
xfstests' aio tests using xfs, ext4 (with 1k and 4k block sizes) and btrfs
and verified that there were no regressions as compared to an unpatched
kernel.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Acked-by: Josef Bacik <jbacik@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: <stable@kernel.org>		[2.6.35.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/direct-io.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 51f270b479b6..48d74c7391d1 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -634,7 +634,7 @@ static int dio_send_cur_page(struct dio *dio)
 	int ret = 0;
 
 	if (dio->bio) {
-		loff_t cur_offset = dio->block_in_file << dio->blkbits;
+		loff_t cur_offset = dio->cur_page_fs_offset;
 		loff_t bio_next_offset = dio->logical_offset_in_bio +
 			dio->bio->bi_size;
 
@@ -659,7 +659,7 @@ static int dio_send_cur_page(struct dio *dio)
 		 * Submit now if the underlying fs is about to perform a
 		 * metadata read
 		 */
-		if (dio->boundary)
+		else if (dio->boundary)
 			dio_bio_submit(dio);
 	}
 
-- 
cgit v1.2.2


From ed430fec756ad65f7cfba24f8ad17c3d5a403290 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 9 Sep 2010 16:37:36 -0700
Subject: proc: export uncached bit properly in /proc/kpageflags

Fix the left-over old ifdef for PG_uncached in /proc/kpageflags.  Now it's
used by x86, too.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/page.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/page.c b/fs/proc/page.c
index 180cf5a0bd67..3b8b45660331 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -146,7 +146,7 @@ u64 stable_page_flags(struct page *page)
 	u |= kpf_copy_bit(k, KPF_HWPOISON,	PG_hwpoison);
 #endif
 
-#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
+#ifdef CONFIG_ARCH_USES_PG_UNCACHED
 	u |= kpf_copy_bit(k, KPF_UNCACHED,	PG_uncached);
 #endif
 
-- 
cgit v1.2.2


From ee3aebdd8f5f8eac41c25c80ceee3d728f920f3b Mon Sep 17 00:00:00 2001
From: Jan Sembera <jsembera@suse.cz>
Date: Thu, 9 Sep 2010 16:37:54 -0700
Subject: binfmt_misc: fix binfmt_misc priority

Commit 74641f584da ("alpha: binfmt_aout fix") (May 2009) introduced a
regression - binfmt_misc is now consulted after binfmt_elf, which will
unfortunately break ia32el.  ia32 ELF binaries on ia64 used to be matched
using binfmt_misc and executed using wrapper.  As 32bit binaries are now
matched by binfmt_elf before bindmt_misc kicks in, the wrapper is ignored.

The fix increases precedence of binfmt_misc to the original state.

Signed-off-by: Jan Sembera <jsembera@suse.cz>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Richard Henderson <rth@twiddle.net
Cc: <stable@kernel.org>		[2.6.everything.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_misc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index a7528b913936..fd0cc0bf9a40 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -724,7 +724,7 @@ static int __init init_misc_binfmt(void)
 {
 	int err = register_filesystem(&bm_fs_type);
 	if (!err) {
-		err = register_binfmt(&misc_format);
+		err = insert_binfmt(&misc_format);
 		if (err)
 			unregister_filesystem(&bm_fs_type);
 	}
-- 
cgit v1.2.2


From 3ab04d5cf9736b7a4e9dfcf28285d8663b01aa0e Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@suse.de>
Date: Thu, 9 Sep 2010 16:38:12 -0700
Subject: vfs: take O_NONBLOCK out of the O_* uniqueness test

O_NONBLOCK on parisc has a dual value:

#define O_NONBLOCK	000200004 /* HPUX has separate NDELAY & NONBLOCK */

It is caught by the O_* bits uniqueness check and leads to a parisc
compile error.  The fix would be to take O_NONBLOCK out.

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Cc: Jamie Lokier <jamie@shareable.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fcntl.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6769fd0f35b8..f8cc34f542c3 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -769,11 +769,15 @@ EXPORT_SYMBOL(kill_fasync);
 
 static int __init fcntl_init(void)
 {
-	/* please add new bits here to ensure allocation uniqueness */
-	BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
+	/*
+	 * Please add new bits here to ensure allocation uniqueness.
+	 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
+	 * is defined as O_NONBLOCK on some platforms and not on others.
+	 */
+	BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
 		O_RDONLY	| O_WRONLY	| O_RDWR	|
 		O_CREAT		| O_EXCL	| O_NOCTTY	|
-		O_TRUNC		| O_APPEND	| O_NONBLOCK	|
+		O_TRUNC		| O_APPEND	| /* O_NONBLOCK	| */
 		__O_SYNC	| O_DSYNC	| FASYNC	|
 		O_DIRECT	| O_LARGEFILE	| O_DIRECTORY	|
 		O_NOFOLLOW	| O_NOATIME	| O_CLOEXEC	|
-- 
cgit v1.2.2


From eee743fd7eac9f2ea69ad06d093dfb5a12538fe5 Mon Sep 17 00:00:00 2001
From: "Jorge Boncompte [DTI2]" <jorge@dti2.net>
Date: Thu, 9 Sep 2010 16:38:19 -0700
Subject: minix: fix regression in minix_mkdir()

Commit 9eed1fb721c ("minix: replace inode uid,gid,mode init with helper")
broke directory creation on minix filesystems.

Fix it by passing the needed mode flag to inode init helper.

Signed-off-by: Jorge Boncompte [DTI2] <jorge@dti2.net>
Cc: Dmitry Monakhov <dmonakhov@openvz.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <stable@kernel.org>		[2.6.35.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/minix/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index e20ee85955d1..f3f3578393a4 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -115,7 +115,7 @@ static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode)
 
 	inode_inc_link_count(dir);
 
-	inode = minix_new_inode(dir, mode, &err);
+	inode = minix_new_inode(dir, S_IFDIR | mode, &err);
 	if (!inode)
 		goto out_dir;
 
-- 
cgit v1.2.2


From a122eb2fdfd78b58c6dd992d6f4b1aaef667eef9 Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <dan.j.rosenberg@gmail.com>
Date: Mon, 6 Sep 2010 18:24:57 -0400
Subject: xfs: prevent reading uninitialized stack memory

The XFS_IOC_FSGETXATTR ioctl allows unprivileged users to read 12
bytes of uninitialized stack memory, because the fsxattr struct
declared on the stack in xfs_ioc_fsgetxattr() does not alter (or zero)
the 12-byte fsx_pad member before copying it back to the user.  This
patch takes care of it.

Signed-off-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 4fec427b83ef..3b9e626f7cd1 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -785,6 +785,8 @@ xfs_ioc_fsgetxattr(
 {
 	struct fsxattr		fa;
 
+	memset(&fa, 0, sizeof(struct fsxattr));
+
 	xfs_ilock(ip, XFS_ILOCK_SHARED);
 	fa.fsx_xflags = xfs_ip2xflags(ip);
 	fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
-- 
cgit v1.2.2


From 1b528181b2ffa14721fb28ad1bd539fe1732c583 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 7 Sep 2010 19:35:49 -0700
Subject: setup_arg_pages: diagnose excessive argument size

The CONFIG_STACK_GROWSDOWN variant of setup_arg_pages() does not
check the size of the argument/environment area on the stack.
When it is unworkably large, shift_arg_pages() hits its BUG_ON.
This is exploitable with a very large RLIMIT_STACK limit, to
create a crash pretty easily.

Check that the initial stack is not too large to make it possible
to map in any executable.  We're not checking that the actual
executable (or intepreter, for binfmt_elf) will fit.  So those
mappings might clobber part of the initial stack mapping.  But
that is just userland lossage that userland made happen, not a
kernel problem.

Signed-off-by: Roland McGrath <roland@redhat.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 2d9455282744..1b63237fc6dc 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -594,6 +594,11 @@ int setup_arg_pages(struct linux_binprm *bprm,
 #else
 	stack_top = arch_align_stack(stack_top);
 	stack_top = PAGE_ALIGN(stack_top);
+
+	if (unlikely(stack_top < mmap_min_addr) ||
+	    unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
+		return -ENOMEM;
+
 	stack_shift = vma->vm_end - stack_top;
 
 	bprm->p -= stack_shift;
-- 
cgit v1.2.2


From 7993bc1f4663c0db67bb8f0d98e6678145b387cd Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 7 Sep 2010 19:36:28 -0700
Subject: execve: improve interactivity with large arguments

This adds a preemption point during the copying of the argument and
environment strings for execve, in copy_strings().  There is already
a preemption point in the count() loop, so this doesn't add any new
points in the abstract sense.

When the total argument+environment strings are very large, the time
spent copying them can be much more than a normal user time slice.
So this change improves the interactivity of the rest of the system
when one process is doing an execve with very large arguments.

Signed-off-by: Roland McGrath <roland@redhat.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 1b63237fc6dc..6f2d777431a8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -419,6 +419,8 @@ static int copy_strings(int argc, const char __user *const __user *argv,
 		while (len > 0) {
 			int offset, bytes_to_copy;
 
+			cond_resched();
+
 			offset = pos % PAGE_SIZE;
 			if (offset == 0)
 				offset = PAGE_SIZE;
-- 
cgit v1.2.2


From 9aea5a65aa7a1af9a4236dfaeb0088f1624f9919 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 7 Sep 2010 19:37:06 -0700
Subject: execve: make responsive to SIGKILL with large arguments

An execve with a very large total of argument/environment strings
can take a really long time in the execve system call.  It runs
uninterruptibly to count and copy all the strings.  This change
makes it abort the exec quickly if sent a SIGKILL.

Note that this is the conservative change, to interrupt only for
SIGKILL, by using fatal_signal_pending().  It would be perfectly
correct semantics to let any signal interrupt the string-copying in
execve, i.e. use signal_pending() instead of fatal_signal_pending().
We'll save that change for later, since it could have user-visible
consequences, such as having a timer set too quickly make it so that
an execve can never complete, though it always happened to work before.

Signed-off-by: Roland McGrath <roland@redhat.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 6f2d777431a8..828dd2461d6b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -376,6 +376,9 @@ static int count(const char __user * const __user * argv, int max)
 			argv++;
 			if (i++ >= max)
 				return -E2BIG;
+
+			if (fatal_signal_pending(current))
+				return -ERESTARTNOHAND;
 			cond_resched();
 		}
 	}
@@ -419,6 +422,10 @@ static int copy_strings(int argc, const char __user *const __user *argv,
 		while (len > 0) {
 			int offset, bytes_to_copy;
 
+			if (fatal_signal_pending(current)) {
+				ret = -ERESTARTNOHAND;
+				goto out;
+			}
 			cond_resched();
 
 			offset = pos % PAGE_SIZE;
-- 
cgit v1.2.2


From 51749e47e191db8e588ad5cebea731caf7b705d7 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 8 Sep 2010 09:00:22 +0000
Subject: xfs: log IO completion workqueue is a high priority queue

The workqueue implementation in 2.6.36-rcX has changed, resulting
in the workqueues no longer having dedicated threads for work
processing. This has caused severe livelocks under heavy parallel
create workloads because the log IO completions have been getting
held up behind metadata IO completions.  Hence log commits would
stall, memory allocation would stall because pages could not be
cleaned, and lock contention on the AIL during inode IO completion
processing was being seen to slow everything down even further.

By making the log Io completion workqueue a high priority workqueue,
they are queued ahead of all data/metadata IO completions and
processed before the data/metadata completions. Hence the log never
gets stalled, and operations needed to clean memory can continue as
quickly as possible. This avoids the livelock conditions and allos
the system to keep running under heavy load as per normal.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
 fs/xfs/linux-2.6/xfs_buf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index d72cf2bb054a..286e36e21dae 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1932,7 +1932,8 @@ xfs_buf_init(void)
 	if (!xfs_buf_zone)
 		goto out;
 
-	xfslogd_workqueue = create_workqueue("xfslogd");
+	xfslogd_workqueue = alloc_workqueue("xfslogd",
+					WQ_RESCUER | WQ_HIGHPRI, 1);
 	if (!xfslogd_workqueue)
 		goto out_free_buf_zone;
 
-- 
cgit v1.2.2


From 5e64b0d9e86ffff8b299556341d85319117539e9 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 7 Sep 2010 13:30:05 +0800
Subject: ocfs2/lockdep: Move ip_xattr_sem out of ocfs2_xattr_get_nolock.

As the name shows, we shouldn't have any lock in
ocfs2_xattr_get_nolock. so lift ip_xattr_sem to the caller.
This should be safe for us since the only 2 callers are:
1. ocfs2_xattr_get which will lock the resources.
2. ocfs2_mknod which don't need this locking.

And this also resolves the following lockdep warning.

=======================================================
[ INFO: possible circular locking dependency detected ]
2.6.35+ #5
-------------------------------------------------------
reflink/30027 is trying to acquire lock:
 (&oi->ip_alloc_sem){+.+.+.}, at: [<ffffffffa0673b67>] ocfs2_reflink_ioctl+0x69a/0x1226 [ocfs2]

but task is already holding lock:
 (&oi->ip_xattr_sem){++++..}, at: [<ffffffffa0673b58>] ocfs2_reflink_ioctl+0x68b/0x1226 [ocfs2]

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #3 (&oi->ip_xattr_sem){++++..}:
       [<ffffffff82064d6d>] __lock_acquire+0x79a/0x7f1
       [<ffffffff82065a81>] lock_acquire+0xc6/0xed
       [<ffffffff82339650>] down_read+0x34/0x47
       [<ffffffffa0691cb8>] ocfs2_xattr_get_nolock+0xa0/0x4e6 [ocfs2]
       [<ffffffffa069d64f>] ocfs2_get_acl_nolock+0x5c/0x132 [ocfs2]
       [<ffffffffa069d9c7>] ocfs2_init_acl+0x60/0x243 [ocfs2]
       [<ffffffffa066499d>] ocfs2_mknod+0xae8/0xfea [ocfs2]
       [<ffffffffa0665041>] ocfs2_create+0x9d/0x105 [ocfs2]
       [<ffffffff820e1c83>] vfs_create+0x9b/0xf4
       [<ffffffff820e20bb>] do_last+0x2fd/0x5be
       [<ffffffff820e31c0>] do_filp_open+0x1fb/0x572
       [<ffffffff820d6cf6>] do_sys_open+0x5a/0xe7
       [<ffffffff820d6dac>] sys_open+0x1b/0x1d
       [<ffffffff8200296b>] system_call_fastpath+0x16/0x1b

-> #2 (jbd2_handle){+.+...}:
       [<ffffffff82064d6d>] __lock_acquire+0x79a/0x7f1
       [<ffffffff82065a81>] lock_acquire+0xc6/0xed
       [<ffffffffa0604ff8>] start_this_handle+0x4a3/0x4bc [jbd2]
       [<ffffffffa06051d6>] jbd2__journal_start+0xba/0xee [jbd2]
       [<ffffffffa0605218>] jbd2_journal_start+0xe/0x10 [jbd2]
       [<ffffffffa065ca34>] ocfs2_start_trans+0xb7/0x19b [ocfs2]
       [<ffffffffa06645f3>] ocfs2_mknod+0x73e/0xfea [ocfs2]
       [<ffffffffa0665041>] ocfs2_create+0x9d/0x105 [ocfs2]
       [<ffffffff820e1c83>] vfs_create+0x9b/0xf4
       [<ffffffff820e20bb>] do_last+0x2fd/0x5be
       [<ffffffff820e31c0>] do_filp_open+0x1fb/0x572
       [<ffffffff820d6cf6>] do_sys_open+0x5a/0xe7
       [<ffffffff820d6dac>] sys_open+0x1b/0x1d
       [<ffffffff8200296b>] system_call_fastpath+0x16/0x1b

-> #1 (&journal->j_trans_barrier){.+.+..}:
       [<ffffffff82064d6d>] __lock_acquire+0x79a/0x7f1
       [<ffffffff82064fa9>] lock_release_non_nested+0x1e5/0x24b
       [<ffffffff82065999>] lock_release+0x158/0x17a
       [<ffffffff823389f6>] __mutex_unlock_slowpath+0xbf/0x11b
       [<ffffffff82338a5b>] mutex_unlock+0x9/0xb
       [<ffffffffa0679673>] ocfs2_free_ac_resource+0x31/0x67 [ocfs2]
       [<ffffffffa067c6bc>] ocfs2_free_alloc_context+0x11/0x1d [ocfs2]
       [<ffffffffa0633de0>] ocfs2_write_begin_nolock+0x141e/0x159b [ocfs2]
       [<ffffffffa0635523>] ocfs2_write_begin+0x11e/0x1e7 [ocfs2]
       [<ffffffff820a1297>] generic_file_buffered_write+0x10c/0x210
       [<ffffffffa0653624>] ocfs2_file_aio_write+0x4cc/0x6d3 [ocfs2]
       [<ffffffff820d822d>] do_sync_write+0xc2/0x106
       [<ffffffff820d897b>] vfs_write+0xae/0x131
       [<ffffffff820d8e55>] sys_write+0x47/0x6f
       [<ffffffff8200296b>] system_call_fastpath+0x16/0x1b

-> #0 (&oi->ip_alloc_sem){+.+.+.}:
       [<ffffffff82063f92>] validate_chain+0x727/0xd68
       [<ffffffff82064d6d>] __lock_acquire+0x79a/0x7f1
       [<ffffffff82065a81>] lock_acquire+0xc6/0xed
       [<ffffffff82339694>] down_write+0x31/0x52
       [<ffffffffa0673b67>] ocfs2_reflink_ioctl+0x69a/0x1226 [ocfs2]
       [<ffffffffa06599f6>] ocfs2_ioctl+0x61a/0x656 [ocfs2]
       [<ffffffff820e53ac>] vfs_ioctl+0x2a/0x9d
       [<ffffffff820e5903>] do_vfs_ioctl+0x45d/0x4ae
       [<ffffffff820e59ab>] sys_ioctl+0x57/0x7a
       [<ffffffff8200296b>] system_call_fastpath+0x16/0x1b

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/xattr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index d03469f61801..06fa5e77c40e 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1286,13 +1286,11 @@ int ocfs2_xattr_get_nolock(struct inode *inode,
 	xis.inode_bh = xbs.inode_bh = di_bh;
 	di = (struct ocfs2_dinode *)di_bh->b_data;
 
-	down_read(&oi->ip_xattr_sem);
 	ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
 				    buffer_size, &xis);
 	if (ret == -ENODATA && di->i_xattr_loc)
 		ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
 					    buffer_size, &xbs);
-	up_read(&oi->ip_xattr_sem);
 
 	return ret;
 }
@@ -1316,8 +1314,10 @@ static int ocfs2_xattr_get(struct inode *inode,
 		mlog_errno(ret);
 		return ret;
 	}
+	down_read(&OCFS2_I(inode)->ip_xattr_sem);
 	ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
 				     name, buffer, buffer_size);
+	up_read(&OCFS2_I(inode)->ip_xattr_sem);
 
 	ocfs2_inode_unlock(inode, 0);
 
-- 
cgit v1.2.2


From 07eaac9438b13ec0b863111698b91ccec8f3b8d4 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Tue, 7 Sep 2010 13:30:06 +0800
Subject: ocfs2: Fix lockdep warning in reflink.

This patch change mutex_lock to a new subclass and
add a new inode lock subclass for the target inode
which caused this lockdep warning.

=============================================
[ INFO: possible recursive locking detected ]
2.6.35+ #5
---------------------------------------------
reflink/11086 is trying to acquire lock:
 (Meta){+++++.}, at: [<ffffffffa06f9d65>] ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2]

but task is already holding lock:
 (Meta){+++++.}, at: [<ffffffffa06f9aa0>] ocfs2_reflink_ioctl+0x5d3/0x1229 [ocfs2]

other info that might help us debug this:
6 locks held by reflink/11086:
 #0:  (&sb->s_type->i_mutex_key#15/1){+.+.+.}, at: [<ffffffff820e09ec>] lookup_create+0x26/0x97
 #1:  (&sb->s_type->i_mutex_key#15){+.+.+.}, at: [<ffffffffa06f99a0>] ocfs2_reflink_ioctl+0x4d3/0x1229 [ocfs2]
 #2:  (Meta){+++++.}, at: [<ffffffffa06f9aa0>] ocfs2_reflink_ioctl+0x5d3/0x1229 [ocfs2]
 #3:  (&oi->ip_xattr_sem){+.+.+.}, at: [<ffffffffa06f9b58>] ocfs2_reflink_ioctl+0x68b/0x1229 [ocfs2]
 #4:  (&oi->ip_alloc_sem){+.+.+.}, at: [<ffffffffa06f9b67>] ocfs2_reflink_ioctl+0x69a/0x1229 [ocfs2]
 #5:  (&sb->s_type->i_mutex_key#15/2){+.+...}, at: [<ffffffffa06f9d4f>] ocfs2_reflink_ioctl+0x882/0x1229 [ocfs2]

stack backtrace:
Pid: 11086, comm: reflink Not tainted 2.6.35+ #5
Call Trace:
 [<ffffffff82063dd9>] validate_chain+0x56e/0xd68
 [<ffffffff82062275>] ? mark_held_locks+0x49/0x69
 [<ffffffff82064d6d>] __lock_acquire+0x79a/0x7f1
 [<ffffffff82065a81>] lock_acquire+0xc6/0xed
 [<ffffffffa06f9d65>] ? ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2]
 [<ffffffffa06c9ade>] __ocfs2_cluster_lock+0x975/0xa0d [ocfs2]
 [<ffffffffa06f9d65>] ? ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2]
 [<ffffffffa06e107b>] ? ocfs2_wait_for_recovery+0x15/0x8a [ocfs2]
 [<ffffffffa06cb6ea>] ocfs2_inode_lock_full_nested+0x1ac/0xdc5 [ocfs2]
 [<ffffffffa06f9d65>] ? ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2]
 [<ffffffff820623a0>] ? trace_hardirqs_on_caller+0x10b/0x12f
 [<ffffffff82060193>] ? debug_mutex_free_waiter+0x4f/0x53
 [<ffffffffa06f9d65>] ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2]
 [<ffffffffa06ce24a>] ? ocfs2_file_lock_res_init+0x66/0x78 [ocfs2]
 [<ffffffff820bb2d2>] ? might_fault+0x40/0x8d
 [<ffffffffa06df9f6>] ocfs2_ioctl+0x61a/0x656 [ocfs2]
 [<ffffffff820ee5d3>] ? mntput_no_expire+0x1d/0xb0
 [<ffffffff820e07b3>] ? path_put+0x2c/0x31
 [<ffffffff820e53ac>] vfs_ioctl+0x2a/0x9d
 [<ffffffff820e5903>] do_vfs_ioctl+0x45d/0x4ae
 [<ffffffff8233a7f6>] ? _raw_spin_unlock+0x26/0x2a
 [<ffffffff8200299c>] ? sysret_check+0x27/0x62
 [<ffffffff820e59ab>] sys_ioctl+0x57/0x7a
 [<ffffffff8200296b>] system_call_fastpath+0x16/0x1b

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlmglue.h      | 1 +
 fs/ocfs2/refcounttree.c | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index d1ce48e1b3d6..1d596d8c4a4a 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -84,6 +84,7 @@ enum {
 	OI_LS_PARENT,
 	OI_LS_RENAME1,
 	OI_LS_RENAME2,
+	OI_LS_REFLINK_TARGET,
 };
 
 int ocfs2_dlm_init(struct ocfs2_super *osb);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 0afeda83120f..efdd75607406 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4201,8 +4201,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
 		goto out;
 	}
 
-	mutex_lock(&new_inode->i_mutex);
-	ret = ocfs2_inode_lock(new_inode, &new_bh, 1);
+	mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD);
+	ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1,
+				      OI_LS_REFLINK_TARGET);
 	if (ret) {
 		mlog_errno(ret);
 		goto out_unlock;
-- 
cgit v1.2.2


From 0f4da216b8c3c35c90ecd18e1899c6f125957c2b Mon Sep 17 00:00:00 2001
From: Tristan Ye <tristan.ye@oracle.com>
Date: Wed, 8 Sep 2010 17:12:38 +0800
Subject: Ocfs2: Re-access the journal after ocfs2_insert_extent() in dxdir
 codes.

In ocfs2_dx_dir_rebalance(), we need to rejournal_acess the blocks after
calling ocfs2_insert_extent() since growing an extent tree may trigger
ocfs2_extend_trans(), which makes previous journal_access meaningless.

Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dir.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index f04ebcfffc4a..c49f6de0e7ab 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3931,6 +3931,15 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
 		goto out_commit;
 	}
 
+	cpos = split_hash;
+	ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle,
+				       data_ac, meta_ac, new_dx_leaves,
+				       num_dx_leaves);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
 	for (i = 0; i < num_dx_leaves; i++) {
 		ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
 					      orig_dx_leaves[i],
@@ -3939,15 +3948,14 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
 			mlog_errno(ret);
 			goto out_commit;
 		}
-	}
 
-	cpos = split_hash;
-	ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle,
-				       data_ac, meta_ac, new_dx_leaves,
-				       num_dx_leaves);
-	if (ret) {
-		mlog_errno(ret);
-		goto out_commit;
+		ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
+					      new_dx_leaves[i],
+					      OCFS2_JOURNAL_ACCESS_WRITE);
+		if (ret) {
+			mlog_errno(ret);
+			goto out_commit;
+		}
 	}
 
 	ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf,
-- 
cgit v1.2.2


From 228ac6357718df2d5c8d70210fa51b2225aab5ee Mon Sep 17 00:00:00 2001
From: Tristan Ye <tristan.ye@oracle.com>
Date: Fri, 10 Sep 2010 10:16:33 +0800
Subject: Ocfs2: Handle empty list in lockres_seq_start() for dlmdebug.c

This patch tries to handle the case in which list 'dlm->tracking_list' is
empty, to avoid accessing an invalid pointer. It fixes the following oops:

http://oss.oracle.com/bugzilla/show_bug.cgi?id=1287

Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlm/dlmdebug.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 5efdd37dfe48..901ca52bf86b 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -636,8 +636,14 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
 	spin_lock(&dlm->track_lock);
 	if (oldres)
 		track_list = &oldres->tracking;
-	else
+	else {
 		track_list = &dlm->tracking_list;
+		if (list_empty(track_list)) {
+			dl = NULL;
+			spin_unlock(&dlm->track_lock);
+			goto bail;
+		}
+	}
 
 	list_for_each_entry(res, track_list, tracking) {
 		if (&res->tracking == &dlm->tracking_list)
@@ -660,6 +666,7 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
 	} else
 		dl = NULL;
 
+bail:
 	/* passed to seq_show */
 	return dl;
 }
-- 
cgit v1.2.2


From ca04d9c3ec721e474f00992efc1b1afb625507f5 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Thu, 26 Aug 2010 16:12:01 -0700
Subject: ceph: fix null pointer deref on anon root dentry release

When we release a root dentry, particularly after a splice, the parent
(actually our) inode was evaluating to NULL and was getting dereferenced
by ceph_snap().  This is reproduced by something as simple as

 mount -t ceph monhost:/a/b mnt
 mount -t ceph monhost:/a mnt2
 ls mnt2

A splice_dentry() would kill the old 'b' inode's root dentry, and we'd
crash while releasing it.

Fix by checking for both the ROOT and NULL cases explicitly.  We only need
to invalidate the parent dir when we have a correct parent to invalidate.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/dir.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 6e4f43ff23ec..a1986eb52045 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1021,11 +1021,15 @@ out_touch:
 static void ceph_dentry_release(struct dentry *dentry)
 {
 	struct ceph_dentry_info *di = ceph_dentry(dentry);
-	struct inode *parent_inode = dentry->d_parent->d_inode;
-	u64 snapid = ceph_snap(parent_inode);
+	struct inode *parent_inode = NULL;
+	u64 snapid = CEPH_NOSNAP;
 
+	if (!IS_ROOT(dentry)) {
+		parent_inode = dentry->d_parent->d_inode;
+		if (parent_inode)
+			snapid = ceph_snap(parent_inode);
+	}
 	dout("dentry_release %p parent %p\n", dentry, parent_inode);
-
 	if (parent_inode && snapid != CEPH_SNAPDIR) {
 		struct ceph_inode_info *ci = ceph_inode(parent_inode);
 
-- 
cgit v1.2.2


From 3d4401d9d0aef5c40706350685ddea3df6708496 Mon Sep 17 00:00:00 2001
From: Yehuda Sadeh <yehuda@hq.newdream.net>
Date: Fri, 3 Sep 2010 12:57:11 -0700
Subject: ceph: fix pagelist kunmap tail

A wrong parameter was passed to the kunmap.

Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/pagelist.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c
index b6859f47d364..46a368b6dce5 100644
--- a/fs/ceph/pagelist.c
+++ b/fs/ceph/pagelist.c
@@ -5,10 +5,18 @@
 
 #include "pagelist.h"
 
+static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
+{
+	struct page *page = list_entry(pl->head.prev, struct page,
+				       lru);
+	kunmap(page);
+}
+
 int ceph_pagelist_release(struct ceph_pagelist *pl)
 {
 	if (pl->mapped_tail)
-		kunmap(pl->mapped_tail);
+		ceph_pagelist_unmap_tail(pl);
+
 	while (!list_empty(&pl->head)) {
 		struct page *page = list_first_entry(&pl->head, struct page,
 						     lru);
@@ -26,7 +34,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
 	pl->room += PAGE_SIZE;
 	list_add_tail(&page->lru, &pl->head);
 	if (pl->mapped_tail)
-		kunmap(pl->mapped_tail);
+		ceph_pagelist_unmap_tail(pl);
 	pl->mapped_tail = kmap(page);
 	return 0;
 }
-- 
cgit v1.2.2


From 3612abbd5df6baa9ca3e0777c6c8646e202d3f66 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 7 Sep 2010 15:59:27 -0700
Subject: ceph: fix reconnect encoding for old servers

Fix the reconnect encoding to encode the cap record when the MDS does not
have the FLOCK capability (i.e., pre v0.22).

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/mds_client.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f091b1351786..fad95f8f2608 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2374,6 +2374,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
 						num_fcntl_locks,
 						num_flock_locks);
 		unlock_kernel();
+	} else {
+		err = ceph_pagelist_append(pagelist, &rec, reclen);
 	}
 
 out_free:
-- 
cgit v1.2.2


From a77d9f7dce7600058d56f0670ed29d77abffcde2 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Sat, 11 Sep 2010 10:55:25 -0700
Subject: ceph: fix file offset wrapping at 4GB on 32-bit archs

Cast the value before shifting so that we don't run out of bits with a
32-bit unsigned long.  This fixes wrapping of high file offsets into the
low 4GB of a file on disk, and the subsequent data corruption for large
files.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/addr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4cfce1ee31fa..50461b8c23a4 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -766,7 +766,8 @@ get_more_pages:
 			/* ok */
 			if (locked_pages == 0) {
 				/* prepare async write request */
-				offset = page->index << PAGE_CACHE_SHIFT;
+				offset = (unsigned long long)page->index
+					<< PAGE_CACHE_SHIFT;
 				len = wsize;
 				req = ceph_osdc_new_request(&client->osdc,
 					    &ci->i_layout,
-- 
cgit v1.2.2


From b1bde04c6d9a120dec602cc8a70b8a7f21600883 Mon Sep 17 00:00:00 2001
From: Fabio Olive Leite <fleite@redhat.com>
Date: Sun, 12 Sep 2010 19:55:25 -0400
Subject: Remove incorrect do_vfs_lock message

The do_vfs_lock function on fs/nfs/file.c is only called if NLM is
not being used, via the -onolock mount option. Therefore it cannot
really be "out of sync with lock manager" when the local locking
function called returns an error, as there will be no corresponding
call to the NLM. For details, simply check the if/else on do_setlk
and do_unlk on fs/nfs/file.c.

Signed-Off-By: Fabio Olive Leite <fleite@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index eb51bd6201da..05bf3c0dc751 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -723,10 +723,6 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
 		default:
 			BUG();
 	}
-	if (res < 0)
-		dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager"
-			" - error %d!\n",
-				__func__, res);
 	return res;
 }
 
-- 
cgit v1.2.2


From b20d37ca9561711c6a3c4b859c2855f49565e061 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 12 Sep 2010 19:55:26 -0400
Subject: NFS: Fix a typo in nfs_sockaddr_match_ipaddr6

Reported-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 fs/nfs/client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 4e7df2adb212..e7340729af89 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -275,7 +275,7 @@ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
 	    sin1->sin6_scope_id != sin2->sin6_scope_id)
 		return 0;
 
-	return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr);
+	return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
 }
 #else	/* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
 static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
-- 
cgit v1.2.2


From fbf3fdd2443965d9ba6fb4b5fecd1f6e0847218f Mon Sep 17 00:00:00 2001
From: Menyhart Zoltan <Zoltan.Menyhart@bull.net>
Date: Sun, 12 Sep 2010 19:55:26 -0400
Subject: statfs() gives ESTALE error

Hi,

An NFS client executes a statfs("file", &buff) call.
"file" exists / existed, the client has read / written it,
but it has already closed it.

user_path(pathname, &path) looks up "file" successfully in the
directory-cache  and restarts the aging timer of the directory-entry.
Even if "file" has already been removed from the server, because the
lookupcache=positive option I use, keeps the entries valid for a while.

nfs_statfs() returns ESTALE if "file" has already been removed from the
server.

If the user application repeats the statfs("file", &buff) call, we
are stuck: "file" remains young forever in the directory-cache.

Signed-off-by: Zoltan Menyhart  <Zoltan.Menyhart@bull.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@kernel.org
---
 fs/nfs/super.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ec3966e4706b..f4cbf0c306c6 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -431,7 +431,15 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 		goto out_err;
 
 	error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
+	if (unlikely(error == -ESTALE)) {
+		struct dentry *pd_dentry;
 
+		pd_dentry = dget_parent(dentry);
+		if (pd_dentry != NULL) {
+			nfs_zap_caches(pd_dentry->d_inode);
+			dput(pd_dentry);
+		}
+	}
 	nfs_free_fattr(res.fattr);
 	if (error < 0)
 		goto out_err;
-- 
cgit v1.2.2


From 827e3457022d0bb0b1bb8a0eb88501876fe7dcf0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 12 Sep 2010 19:57:50 -0400
Subject: SUNRPC: Fix the NFSv4 and RPCSEC_GSS Kconfig dependencies
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The NFSv4 client's callback server calls svc_gss_principal(), which
is defined in the auth_rpcgss.ko

The NFSv4 server has the same dependency, and in addition calls
svcauth_gss_flavor(), gss_mech_get_by_pseudoflavor(),
gss_pseudoflavor_to_service() and gss_mech_put() from the same module.

The module auth_rpcgss itself has no dependencies aside from sunrpc,
so we only need to select RPCSEC_GSS.

Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/Kconfig  | 1 +
 fs/nfsd/Kconfig | 1 +
 2 files changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 6c2aad49d731..f7e13db613cb 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -63,6 +63,7 @@ config NFS_V3_ACL
 config NFS_V4
 	bool "NFS client support for NFS version 4"
 	depends on NFS_FS
+	select SUNRPC_GSS
 	help
 	  This option enables support for version 4 of the NFS protocol
 	  (RFC 3530) in the kernel's NFS client.
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 95932f523aef..4264377552e2 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -69,6 +69,7 @@ config NFSD_V4
 	depends on NFSD && PROC_FS && EXPERIMENTAL
 	select NFSD_V3
 	select FS_POSIX_ACL
+	select SUNRPC_GSS
 	help
 	  This option enables support in your system's NFS server for
 	  version 4 of the NFS protocol (RFC 3530).
-- 
cgit v1.2.2


From 62b2be591a9b12c550308ef7718a31abfc815b50 Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lionkov@gmail.com>
Date: Tue, 24 Aug 2010 18:13:59 +0000
Subject: fs/9p, net/9p: memory leak fixes

Four memory leak fixes in the 9P code.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index c7c23eab9440..84159cf9c521 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1128,6 +1128,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb);
 		generic_fillattr(dentry->d_inode, stat);
 
+	p9stat_free(st);
 	kfree(st);
 	return 0;
 }
@@ -1489,6 +1490,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 
 	retval = strnlen(buffer, buflen);
 done:
+	p9stat_free(st);
 	kfree(st);
 	return retval;
 }
-- 
cgit v1.2.2


From 5c25f347a7b00b2ebe0a55c4a3cfe4c3e1e8725e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 24 Aug 2010 10:30:49 +0000
Subject: fs/9p: Fix error handling in v9fs_get_sb

This was introduced by 7cadb63d58a932041afa3f957d5cbb6ce69dcee5

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_super.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index f9311077de68..1d12ba0ed3db 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -122,6 +122,10 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 	fid = v9fs_session_init(v9ses, dev_name, data);
 	if (IS_ERR(fid)) {
 		retval = PTR_ERR(fid);
+		/*
+		 * we need to call session_close to tear down some
+		 * of the data structure setup by session_init
+		 */
 		goto close_session;
 	}
 
@@ -144,7 +148,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 		retval = -ENOMEM;
 		goto release_sb;
 	}
-
 	sb->s_root = root;
 
 	if (v9fs_proto_dotl(v9ses)) {
@@ -152,7 +155,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 		st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
 		if (IS_ERR(st)) {
 			retval = PTR_ERR(st);
-			goto clunk_fid;
+			goto release_sb;
 		}
 
 		v9fs_stat2inode_dotl(st, root->d_inode);
@@ -162,7 +165,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 		st = p9_client_stat(fid);
 		if (IS_ERR(st)) {
 			retval = PTR_ERR(st);
-			goto clunk_fid;
+			goto release_sb;
 		}
 
 		root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
@@ -174,19 +177,24 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 
 	v9fs_fid_add(root, fid);
 
-P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
+	P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
 	simple_set_mnt(mnt, sb);
 	return 0;
 
 clunk_fid:
 	p9_client_clunk(fid);
-
 close_session:
 	v9fs_session_close(v9ses);
 	kfree(v9ses);
 	return retval;
-
 release_sb:
+	/*
+	 * we will do the session_close and root dentry release
+	 * in the below call. But we need to clunk fid, because we haven't
+	 * attached the fid to dentry so it won't get clunked
+	 * automatically.
+	 */
+	p9_client_clunk(fid);
 	deactivate_locked_super(sb);
 	return retval;
 }
-- 
cgit v1.2.2


From 62726a7ab3a6a3624256172af055ff0a38c6ffa2 Mon Sep 17 00:00:00 2001
From: jvrao <jvrao@linux.vnet.ibm.com>
Date: Wed, 25 Aug 2010 16:26:21 +0000
Subject: 9p: Check for NULL fid in v9fs_dir_release()

NULL fid should be handled in cases where we endup calling v9fs_dir_release()
before even we instantiate the fid in filp.

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_dir.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 16c8a2a98c1b..899f168fd19c 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -292,9 +292,11 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
 
 	fid = filp->private_data;
 	P9_DPRINTK(P9_DEBUG_VFS,
-			"inode: %p filp: %p fid: %d\n", inode, filp, fid->fid);
+			"v9fs_dir_release: inode: %p filp: %p fid: %d\n",
+			inode, filp, fid ? fid->fid : -1);
 	filemap_write_and_wait(inode->i_mapping);
-	p9_client_clunk(fid);
+	if (fid)
+		p9_client_clunk(fid);
 	return 0;
 }
 
-- 
cgit v1.2.2


From 3c30750ffafbc32af040b09f777b67aa2486b063 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 30 Aug 2010 16:04:35 +0000
Subject: fs/9p: Use the correct dentry operations

We should use the cached dentry operation only if caching mode is enabled

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 84159cf9c521..a6990bbf6056 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -730,7 +730,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
 		P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
 		goto error;
 	}
-	dentry->d_op = &v9fs_cached_dentry_operations;
+	if (v9ses->cache)
+		dentry->d_op = &v9fs_cached_dentry_operations;
+	else
+		dentry->d_op = &v9fs_dentry_operations;
 	d_instantiate(dentry, inode);
 	err = v9fs_fid_add(dentry, fid);
 	if (err < 0)
-- 
cgit v1.2.2


From 1d76e3135733a06aa12bb35891c05f306b27b2d6 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 30 Aug 2010 17:43:07 +0000
Subject: fs/9p: Don't use dotl version of mknod for dotu inode operations

We should not use dotlversion for the dotu inode operations

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index a6990bbf6056..9e670d527646 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1947,7 +1947,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
 	.unlink = v9fs_vfs_unlink,
 	.mkdir = v9fs_vfs_mkdir,
 	.rmdir = v9fs_vfs_rmdir,
-	.mknod = v9fs_vfs_mknod_dotl,
+	.mknod = v9fs_vfs_mknod,
 	.rename = v9fs_vfs_rename,
 	.getattr = v9fs_vfs_getattr,
 	.setattr = v9fs_vfs_setattr,
-- 
cgit v1.2.2


From 467c525109d5d542d7d416b0c11bdd54610fe2f4 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 13 Sep 2010 11:39:20 -0700
Subject: ceph: fix dn offset during readdir_prepopulate

When adding the readdir results to the cache, ceph_set_dentry_offset was
clobbered our just-set offset.  This can cause the readdir result offsets
to get out of sync with the server.  Add an argument to the helper so
that it does not.

This bug was introduced by 1cd3935bedccf592d44343890251452a6dd74fc4.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/inode.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e7cca414da03..62377ec37edf 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -845,7 +845,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
  * the caller) if we fail.
  */
 static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
-				    bool *prehash)
+				    bool *prehash, bool set_offset)
 {
 	struct dentry *realdn;
 
@@ -877,7 +877,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
 	}
 	if ((!prehash || *prehash) && d_unhashed(dn))
 		d_rehash(dn);
-	ceph_set_dentry_offset(dn);
+	if (set_offset)
+		ceph_set_dentry_offset(dn);
 out:
 	return dn;
 }
@@ -1062,7 +1063,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 				d_delete(dn);
 				goto done;
 			}
-			dn = splice_dentry(dn, in, &have_lease);
+			dn = splice_dentry(dn, in, &have_lease, true);
 			if (IS_ERR(dn)) {
 				err = PTR_ERR(dn);
 				goto done;
@@ -1105,7 +1106,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 			goto done;
 		}
 		dout(" linking snapped dir %p to dn %p\n", in, dn);
-		dn = splice_dentry(dn, in, NULL);
+		dn = splice_dentry(dn, in, NULL, true);
 		if (IS_ERR(dn)) {
 			err = PTR_ERR(dn);
 			goto done;
@@ -1237,7 +1238,7 @@ retry_lookup:
 				err = PTR_ERR(in);
 				goto out;
 			}
-			dn = splice_dentry(dn, in, NULL);
+			dn = splice_dentry(dn, in, NULL, false);
 			if (IS_ERR(dn))
 				dn = NULL;
 		}
-- 
cgit v1.2.2


From 8bef9239ee1a42eb37d3f83bacf6a75f019c028d Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 14 Sep 2010 15:45:44 -0700
Subject: ceph: correctly set 'follows' in flushsnap messages

The 'follows' should match the seq for the snap context for the given snap
cap, which is the context under which we have been dirtying and writing
data and metadata.  The snapshot that _contains_ those updates thus
_follows_ that context's seq #.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/snap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 4868b9dcac5a..9e836afba341 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -467,7 +467,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
 		INIT_LIST_HEAD(&capsnap->ci_item);
 		INIT_LIST_HEAD(&capsnap->flushing_item);
 
-		capsnap->follows = snapc->seq - 1;
+		capsnap->follows = snapc->seq;
 		capsnap->issued = __ceph_caps_issued(ci, NULL);
 		capsnap->dirty = dirty;
 
-- 
cgit v1.2.2


From cfc0bf6640dfd0f43bf8bfec5a475284809baa4d Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 14 Sep 2010 15:50:59 -0700
Subject: ceph: stop sending FLUSHSNAPs when we hit a dirty capsnap

Stop sending FLUSHSNAP messages when we hit a capsnap that has dirty_pages
or is still writing.  We'll send the newer capsnaps only after the older
ones complete.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/caps.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a2069b6680ae..9fbe9019155c 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1227,7 +1227,7 @@ retry:
 		 * pages to be written out.
 		 */
 		if (capsnap->dirty_pages || capsnap->writing)
-			continue;
+			break;
 
 		/*
 		 * if cap writeback already occurred, we should have dropped
@@ -1276,8 +1276,8 @@ retry:
 			      &session->s_cap_snaps_flushing);
 		spin_unlock(&inode->i_lock);
 
-		dout("flush_snaps %p cap_snap %p follows %lld size %llu\n",
-		     inode, capsnap, next_follows, capsnap->size);
+		dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
+		     inode, capsnap, capsnap->follows, capsnap->flush_tid);
 		send_cap_msg(session, ceph_vino(inode).ino, 0,
 			     CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
 			     capsnap->dirty, 0, capsnap->flush_tid, 0, mseq,
-- 
cgit v1.2.2


From 460cf3411b858ad509d5255e0dfaf862a83c0299 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 14 Sep 2010 11:38:24 -0400
Subject: cifs: fix potential double put of TCP session reference

cifs_get_smb_ses must be called on a server pointer on which it holds an
active reference. It first does a search for an existing SMB session. If
it finds one, it'll put the server reference and then try to ensure that
the negprot is done, etc.

If it encounters an error at that point then it'll return an error.
There's a potential problem here though. When cifs_get_smb_ses returns
an error, the caller will also put the TCP server reference leading to a
double-put.

Fix this by having cifs_get_smb_ses only put the server reference if
it found an existing session that it could use and isn't returning an
error.

Cc: stable@kernel.org
Reviewed-by: Suresh Jayaraman <sjayaraman@suse.de>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/connect.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 67dad54fbfa1..88c84a38bccb 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1706,9 +1706,6 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 	if (ses) {
 		cFYI(1, "Existing smb sess found (status=%d)", ses->status);
 
-		/* existing SMB ses has a server reference already */
-		cifs_put_tcp_session(server);
-
 		mutex_lock(&ses->session_mutex);
 		rc = cifs_negotiate_protocol(xid, ses);
 		if (rc) {
@@ -1731,6 +1728,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 			}
 		}
 		mutex_unlock(&ses->session_mutex);
+
+		/* existing SMB ses has a server reference already */
+		cifs_put_tcp_session(server);
 		FreeXid(xid);
 		return ses;
 	}
-- 
cgit v1.2.2


From 75e1c70fc31490ef8a373ea2a4bea2524099b478 Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Fri, 10 Sep 2010 14:16:00 -0700
Subject: aio: check for multiplication overflow in do_io_submit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tavis Ormandy pointed out that do_io_submit does not do proper bounds
checking on the passed-in iocb array:

       if (unlikely(nr < 0))
               return -EINVAL;

       if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(iocbpp)))))
               return -EFAULT;                      ^^^^^^^^^^^^^^^^^^

The attached patch checks for overflow, and if it is detected, the
number of iocbs submitted is scaled down to a number that will fit in
the long.  This is an ok thing to do, as sys_io_submit is documented as
returning the number of iocbs submitted, so callers should handle a
return value of less than the 'nr' argument passed in.

Reported-by: Tavis Ormandy <taviso@cmpxchg8b.com>
Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 3006b5bc33d6..1320b2a05fb2 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1659,6 +1659,9 @@ long do_io_submit(aio_context_t ctx_id, long nr,
 	if (unlikely(nr < 0))
 		return -EINVAL;
 
+	if (unlikely(nr > LONG_MAX/sizeof(*iocbpp)))
+		nr = LONG_MAX/sizeof(*iocbpp);
+
 	if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
 		return -EFAULT;
 
-- 
cgit v1.2.2


From ae00d4f37f4df56821331deb1028748110dd6dc9 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Thu, 16 Sep 2010 16:26:51 -0700
Subject: ceph: fix cap_snap and realm split

The cap_snap creation/queueing relies on both the current i_head_snapc
_and_ the i_snap_realm pointers being correct, so that the new cap_snap
can properly reference the old context and the new i_head_snapc can be
updated to reference the new snaprealm's context.  To fix this, we:

 - move inodes completely to the new (split) realm so that i_snap_realm
   is correct, and
 - generate the new snapc's _before_ queueing the cap_snaps in
   ceph_update_snap_trace().

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/addr.c  |  4 +--
 fs/ceph/snap.c  | 88 +++++++++++++++++++--------------------------------------
 fs/ceph/super.h |  2 ++
 3 files changed, 33 insertions(+), 61 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 50461b8c23a4..efbc604001c8 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -411,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 	if (i_size < page_off + len)
 		len = i_size - page_off;
 
-	dout("writepage %p page %p index %lu on %llu~%u\n",
-	     inode, page, page->index, page_off, len);
+	dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
+	     inode, page, page->index, page_off, len, snapc);
 
 	writeback_stat = atomic_long_inc_return(&client->writeback_count);
 	if (writeback_stat >
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 9e836afba341..9e6eef14b7df 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm(
 	INIT_LIST_HEAD(&realm->children);
 	INIT_LIST_HEAD(&realm->child_item);
 	INIT_LIST_HEAD(&realm->empty_item);
+	INIT_LIST_HEAD(&realm->dirty_item);
 	INIT_LIST_HEAD(&realm->inodes_with_caps);
 	spin_lock_init(&realm->inodes_with_caps_lock);
 	__insert_snap_realm(&mdsc->snap_realms, realm);
@@ -604,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
 	struct ceph_snap_realm *realm;
 	int invalidate = 0;
 	int err = -ENOMEM;
+	LIST_HEAD(dirty_realms);
 
 	dout("update_snap_trace deletion=%d\n", deletion);
 more:
@@ -626,24 +628,6 @@ more:
 		}
 	}
 
-	if (le64_to_cpu(ri->seq) > realm->seq) {
-		dout("update_snap_trace updating %llx %p %lld -> %lld\n",
-		     realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
-		/*
-		 * if the realm seq has changed, queue a cap_snap for every
-		 * inode with open caps.  we do this _before_ we update
-		 * the realm info so that we prepare for writeback under the
-		 * _previous_ snap context.
-		 *
-		 * ...unless it's a snap deletion!
-		 */
-		if (!deletion)
-			queue_realm_cap_snaps(realm);
-	} else {
-		dout("update_snap_trace %llx %p seq %lld unchanged\n",
-		     realm->ino, realm, realm->seq);
-	}
-
 	/* ensure the parent is correct */
 	err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent));
 	if (err < 0)
@@ -651,6 +635,8 @@ more:
 	invalidate += err;
 
 	if (le64_to_cpu(ri->seq) > realm->seq) {
+		dout("update_snap_trace updating %llx %p %lld -> %lld\n",
+		     realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
 		/* update realm parameters, snap lists */
 		realm->seq = le64_to_cpu(ri->seq);
 		realm->created = le64_to_cpu(ri->created);
@@ -668,9 +654,17 @@ more:
 		if (err < 0)
 			goto fail;
 
+		/* queue realm for cap_snap creation */
+		list_add(&realm->dirty_item, &dirty_realms);
+
 		invalidate = 1;
 	} else if (!realm->cached_context) {
+		dout("update_snap_trace %llx %p seq %lld new\n",
+		     realm->ino, realm, realm->seq);
 		invalidate = 1;
+	} else {
+		dout("update_snap_trace %llx %p seq %lld unchanged\n",
+		     realm->ino, realm, realm->seq);
 	}
 
 	dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino,
@@ -683,6 +677,14 @@ more:
 	if (invalidate)
 		rebuild_snap_realms(realm);
 
+	/*
+	 * queue cap snaps _after_ we've built the new snap contexts,
+	 * so that i_head_snapc can be set appropriately.
+	 */
+	list_for_each_entry(realm, &dirty_realms, dirty_item) {
+		queue_realm_cap_snaps(realm);
+	}
+
 	__cleanup_empty_realms(mdsc);
 	return 0;
 
@@ -816,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			};
 			struct inode *inode = ceph_find_inode(sb, vino);
 			struct ceph_inode_info *ci;
+			struct ceph_snap_realm *oldrealm;
 
 			if (!inode)
 				continue;
@@ -841,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			dout(" will move %p to split realm %llx %p\n",
 			     inode, realm->ino, realm);
 			/*
-			 * Remove the inode from the realm's inode
-			 * list, but don't add it to the new realm
-			 * yet.  We don't want the cap_snap to be
-			 * queued (again) by ceph_update_snap_trace()
-			 * below.  Queue it _now_, under the old context.
+			 * Move the inode to the new realm
 			 */
 			spin_lock(&realm->inodes_with_caps_lock);
 			list_del_init(&ci->i_snap_realm_item);
+			list_add(&ci->i_snap_realm_item,
+				 &realm->inodes_with_caps);
+			oldrealm = ci->i_snap_realm;
+			ci->i_snap_realm = realm;
 			spin_unlock(&realm->inodes_with_caps_lock);
 			spin_unlock(&inode->i_lock);
 
-			ceph_queue_cap_snap(ci);
+			ceph_get_snap_realm(mdsc, realm);
+			ceph_put_snap_realm(mdsc, oldrealm);
 
 			iput(inode);
 			continue;
@@ -880,43 +884,9 @@ skip_inode:
 	ceph_update_snap_trace(mdsc, p, e,
 			       op == CEPH_SNAP_OP_DESTROY);
 
-	if (op == CEPH_SNAP_OP_SPLIT) {
-		/*
-		 * ok, _now_ add the inodes into the new realm.
-		 */
-		for (i = 0; i < num_split_inos; i++) {
-			struct ceph_vino vino = {
-				.ino = le64_to_cpu(split_inos[i]),
-				.snap = CEPH_NOSNAP,
-			};
-			struct inode *inode = ceph_find_inode(sb, vino);
-			struct ceph_inode_info *ci;
-
-			if (!inode)
-				continue;
-			ci = ceph_inode(inode);
-			spin_lock(&inode->i_lock);
-			if (list_empty(&ci->i_snap_realm_item)) {
-				struct ceph_snap_realm *oldrealm =
-					ci->i_snap_realm;
-
-				dout(" moving %p to split realm %llx %p\n",
-				     inode, realm->ino, realm);
-				spin_lock(&realm->inodes_with_caps_lock);
-				list_add(&ci->i_snap_realm_item,
-					 &realm->inodes_with_caps);
-				ci->i_snap_realm = realm;
-				spin_unlock(&realm->inodes_with_caps_lock);
-				ceph_get_snap_realm(mdsc, realm);
-				ceph_put_snap_realm(mdsc, oldrealm);
-			}
-			spin_unlock(&inode->i_lock);
-			iput(inode);
-		}
-
+	if (op == CEPH_SNAP_OP_SPLIT)
 		/* we took a reference when we created the realm, above */
 		ceph_put_snap_realm(mdsc, realm);
-	}
 
 	__cleanup_empty_realms(mdsc);
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c33897ae5725..c80bfbe27b05 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -690,6 +690,8 @@ struct ceph_snap_realm {
 
 	struct list_head empty_item;     /* if i have ref==0 */
 
+	struct list_head dirty_item;     /* if realm needs new context */
+
 	/* the current set of snaps for this realm */
 	struct ceph_snap_context *cached_context;
 
-- 
cgit v1.2.2


From 5f4874903df3562b9d5649fc1cf7b8c6bb238e42 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Thu, 9 Sep 2010 14:45:00 +0100
Subject: GFS2: gfs2_logd should be using interruptible waits

Looks like this crept in, in a recent update.

Reported-by:  Krzysztof Urbaniak <urban@bash.org.pl>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index cde1248a6225..ac750bd31a6f 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -932,7 +932,7 @@ int gfs2_logd(void *data)
 
 		do {
 			prepare_to_wait(&sdp->sd_logd_waitq, &wait,
-					TASK_UNINTERRUPTIBLE);
+					TASK_INTERRUPTIBLE);
 			if (!gfs2_ail_flush_reqd(sdp) &&
 			    !gfs2_jrnl_flush_reqd(sdp) &&
 			    !kthread_should_stop())
-- 
cgit v1.2.2


From e835124c2be289515b918f2688ced4249e2de566 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 17 Sep 2010 08:03:08 -0700
Subject: ceph: only send one flushsnap per cap_snap per mds session

Sending multiple flushsnap messages is problematic because we ignore
the response if the tid doesn't match, and the server may only respond to
each one once.  It's also a waste.

So, skip cap_snaps that are already on the flushing list, unless the caller
tells us to resend (because we are reconnecting).

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/caps.c  | 19 +++++++++++++++----
 fs/ceph/snap.c  |  2 +-
 fs/ceph/super.h |  3 ++-
 3 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 9fbe9019155c..b01c316a8148 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1195,10 +1195,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
  * asynchronously back to the MDS once sync writes complete and dirty
  * data is written out.
  *
+ * Unless @again is true, skip cap_snaps that were already sent to
+ * the MDS (i.e., during this session).
+ *
  * Called under i_lock.  Takes s_mutex as needed.
  */
 void __ceph_flush_snaps(struct ceph_inode_info *ci,
-			struct ceph_mds_session **psession)
+			struct ceph_mds_session **psession,
+			int again)
 		__releases(ci->vfs_inode->i_lock)
 		__acquires(ci->vfs_inode->i_lock)
 {
@@ -1240,6 +1244,13 @@ retry:
 			dout("no auth cap (migrating?), doing nothing\n");
 			goto out;
 		}
+
+		/* only flush each capsnap once */
+		if (!again && !list_empty(&capsnap->flushing_item)) {
+			dout("already flushed %p, skipping\n", capsnap);
+			continue;
+		}
+
 		mds = ci->i_auth_cap->session->s_mds;
 		mseq = ci->i_auth_cap->mseq;
 
@@ -1314,7 +1325,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
 	struct inode *inode = &ci->vfs_inode;
 
 	spin_lock(&inode->i_lock);
-	__ceph_flush_snaps(ci, NULL);
+	__ceph_flush_snaps(ci, NULL, 0);
 	spin_unlock(&inode->i_lock);
 }
 
@@ -1477,7 +1488,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 
 	/* flush snaps first time around only */
 	if (!list_empty(&ci->i_cap_snaps))
-		__ceph_flush_snaps(ci, &session);
+		__ceph_flush_snaps(ci, &session, 0);
 	goto retry_locked;
 retry:
 	spin_lock(&inode->i_lock);
@@ -1894,7 +1905,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
 		if (cap && cap->session == session) {
 			dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
 			     cap, capsnap);
-			__ceph_flush_snaps(ci, &session);
+			__ceph_flush_snaps(ci, &session, 1);
 		} else {
 			pr_err("%p auth cap %p not mds%d ???\n", inode,
 			       cap, session->s_mds);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 9e6eef14b7df..190b6c4a6f2b 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -717,7 +717,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
 		igrab(inode);
 		spin_unlock(&mdsc->snap_flush_lock);
 		spin_lock(&inode->i_lock);
-		__ceph_flush_snaps(ci, &session);
+		__ceph_flush_snaps(ci, &session, 0);
 		spin_unlock(&inode->i_lock);
 		iput(inode);
 		spin_lock(&mdsc->snap_flush_lock);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c80bfbe27b05..b87638e84c4b 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -828,7 +828,8 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
 extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 				       struct ceph_snap_context *snapc);
 extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
-			       struct ceph_mds_session **psession);
+			       struct ceph_mds_session **psession,
+			       int again);
 extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 			    struct ceph_mds_session *session);
 extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
-- 
cgit v1.2.2


From a43fb73101eaf6db0b33d22c152b338ab8b3edbb Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 17 Sep 2010 09:54:08 -0700
Subject: ceph: check mapping to determine if FILE_CACHE cap is used

See if the i_data mapping has any pages to determine if the FILE_CACHE
capability is currently in use, instead of assuming it is any time the
rdcache_gen value is set (i.e., issued -> used).

This allows the MDS RECALL_STATE process work for inodes that have cached
pages.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/caps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index b01c316a8148..73c153092f72 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -814,7 +814,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
 		used |= CEPH_CAP_PIN;
 	if (ci->i_rd_ref)
 		used |= CEPH_CAP_FILE_RD;
-	if (ci->i_rdcache_ref || ci->i_rdcache_gen)
+	if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages)
 		used |= CEPH_CAP_FILE_CACHE;
 	if (ci->i_wr_ref)
 		used |= CEPH_CAP_FILE_WR;
-- 
cgit v1.2.2


From be4f104dfd3b5e3ae262bff607965cfc38027dec Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 17 Sep 2010 12:30:31 -0700
Subject: ceph: select CRYPTO

We select CRYPTO_AES, but not CRYPTO.

Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/ceph/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index bc87b9c1d27e..0fcd2640c23f 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -3,6 +3,7 @@ config CEPH_FS
 	depends on INET && EXPERIMENTAL
 	select LIBCRC32C
 	select CRYPTO_AES
+	select CRYPTO
 	help
 	  Choose Y or M here to include support for mounting the
 	  experimental Ceph distributed file system.  Ceph is an extremely
-- 
cgit v1.2.2


From 50aff040363d31f87e94f38f1710973d99489951 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Sat, 21 Aug 2010 14:40:20 +0800
Subject: ocfs2/net: fix uninitialized ret in o2net_send_message_vec()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

mmotm/fs/ocfs2/cluster/tcp.c: In function ‘o2net_send_message_vec’:
mmotm/fs/ocfs2/cluster/tcp.c:980:6: warning: ‘ret’ may be used uninitialized in this function

It seems a real bug introduced by commit 9af0b38ff3 (ocfs2/net:
Use wait_event() in o2net_send_message_vec()).

cc: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/cluster/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 1361997cf205..cbe2f057cc28 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -977,7 +977,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn,
 int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
 			   size_t caller_veclen, u8 target_node, int *status)
 {
-	int ret;
+	int ret = 0;
 	struct o2net_msg *msg = NULL;
 	size_t veclen, caller_bytes = 0;
 	struct kvec *vec = NULL;
-- 
cgit v1.2.2


From 112d421df2fddc0278584b084f4fcfedd144c5f4 Mon Sep 17 00:00:00 2001
From: Jan Harkes <jaharkes@cs.cmu.edu>
Date: Fri, 17 Sep 2010 23:26:01 -0400
Subject: Coda: mount hangs because of missed REQ_WRITE rename

Coda's REQ_* defines were renamed to avoid clashes with the block layer
(commit 4aeefdc69f7b: "coda: fixup clash with block layer REQ_*
defines").

However one was missed and response messages are no longer matched with
requests and waiting threads are no longer woken up.  This patch fixes
this.

Signed-off-by: Jan Harkes <jaharkes@cs.cmu.edu>
[ Also fixed up whitespace while at it  -Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/coda/psdev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index de89645777c7..116af7546cf0 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -184,8 +184,8 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
 	}
 
 	/* adjust outsize. is this useful ?? */
-        req->uc_outSize = nbytes;	
-        req->uc_flags |= REQ_WRITE;
+	req->uc_outSize = nbytes;
+	req->uc_flags |= CODA_REQ_WRITE;
 	count = nbytes;
 
 	/* Convert filedescriptor into a file handle */
-- 
cgit v1.2.2


From 371d217ee1ff8b418b8f73fb2a34990f951ec2d4 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 21 Sep 2010 11:49:01 +0200
Subject: char: Mark /dev/zero and /dev/kmem as not capable of writeback

These devices don't do any writeback but their device inodes still can get
dirty so mark bdi appropriately so that bdi code does the right thing and files
inodes to lists of bdi carrying the device inodes.

Cc: stable@kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/char_dev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/char_dev.c b/fs/char_dev.c
index f80a4f25123c..143d393881cb 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -40,7 +40,9 @@ struct backing_dev_info directly_mappable_cdev_bdi = {
 #endif
 		/* permit direct mmap, for read, write or exec */
 		BDI_CAP_MAP_DIRECT |
-		BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP),
+		BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP |
+		/* no writeback happens */
+		BDI_CAP_NO_ACCT_AND_WRITEBACK),
 };
 
 static struct kobj_map *cdev_map;
-- 
cgit v1.2.2


From 692ebd17c2905313fff3c504c249c6a0faad16ec Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 21 Sep 2010 11:51:01 +0200
Subject: bdi: Fix warnings in __mark_inode_dirty for /dev/zero and friends

Inodes of devices such as /dev/zero can get dirty for example via
utime(2) syscall or due to atime update. Backing device of such inodes
(zero_bdi, etc.) is however unable to handle dirty inodes and thus
__mark_inode_dirty complains.  In fact, inode should be rather dirtied
against backing device of the filesystem holding it. This is generally a
good rule except for filesystems such as 'bdev' or 'mtd_inodefs'. Inodes
in these pseudofilesystems are referenced from ordinary filesystem
inodes and carry mapping with real data of the device. Thus for these
inodes we have to use inode->i_mapping->backing_dev_info as we did so
far. We distinguish these filesystems by checking whether sb->s_bdi
points to a non-trivial backing device or not.

Example: Assume we have an ext3 filesystem on /dev/sda1 mounted on /.
There's a device inode A described by a path "/dev/sdb" on this
filesystem. This inode will be dirtied against backing device "8:0"
after this patch. bdev filesystem contains block device inode B coupled
with our inode A. When someone modifies a page of /dev/sdb, it's B that
gets dirtied and the dirtying happens against the backing device "8:16".
Thus both inodes get filed to a correct bdi list.

Cc: stable@kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 81e086d8aa57..5581122bd2c0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -52,8 +52,6 @@ struct wb_writeback_work {
 #define CREATE_TRACE_POINTS
 #include <trace/events/writeback.h>
 
-#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
-
 /*
  * We don't actually have pdflush, but this one is exported though /proc...
  */
@@ -71,6 +69,27 @@ int writeback_in_progress(struct backing_dev_info *bdi)
 	return test_bit(BDI_writeback_running, &bdi->state);
 }
 
+static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
+
+	/*
+	 * For inodes on standard filesystems, we use superblock's bdi. For
+	 * inodes on virtual filesystems, we want to use inode mapping's bdi
+	 * because they can possibly point to something useful (think about
+	 * block_dev filesystem).
+	 */
+	if (sb->s_bdi && sb->s_bdi != &noop_backing_dev_info) {
+		/* Some device inodes could play dirty tricks. Catch them... */
+		WARN(bdi != sb->s_bdi && bdi_cap_writeback_dirty(bdi),
+			"Dirtiable inode bdi %s != sb bdi %s\n",
+			bdi->name, sb->s_bdi->name);
+		return sb->s_bdi;
+	}
+	return bdi;
+}
+
 static void bdi_queue_work(struct backing_dev_info *bdi,
 		struct wb_writeback_work *work)
 {
-- 
cgit v1.2.2


From 767b68e96993e29e3480d7ecdd9c4b84667c5762 Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Wed, 22 Sep 2010 14:32:56 -0400
Subject: Prevent freeing uninitialized pointer in compat_do_readv_writev

In 32-bit compatibility mode, the error handling for
compat_do_readv_writev() may free an uninitialized pointer, potentially
leading to all sorts of ugly memory corruption.  This is reliably
triggerable by unprivileged users by invoking the readv()/writev()
syscalls with an invalid iovec pointer.  The below patch fixes this to
emulate the non-compat version.

Introduced by commit b83733639a49 ("compat: factor out
compat_rw_copy_check_uvector from compat_do_readv_writev")

Signed-off-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
Cc: stable@kernel.org (2.6.35)
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/compat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 718c7062aec1..0644a154672b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1153,7 +1153,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 {
 	compat_ssize_t tot_len;
 	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov;
+	struct iovec *iov = iovstack;
 	ssize_t ret;
 	io_fn_t fn;
 	iov_fn_t fnv;
-- 
cgit v1.2.2


From c227e69028473c7c7994a9b0a2cc0034f3f7e0fe Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 22 Sep 2010 13:04:54 -0700
Subject: /proc/vmcore: fix seeking

Commit 73296bc611 ("procfs: Use generic_file_llseek in /proc/vmcore")
broke seeking on /proc/vmcore.  This changes it back to use default_llseek
in order to restore the original behaviour.

The problem with generic_file_llseek is that it only allows seeks up to
inode->i_sb->s_maxbytes, which is zero on procfs and some other virtual
file systems.  We should merge generic_file_llseek and default_llseek some
day and clean this up in a proper way, but for 2.6.35/36, reverting vmcore
is the safer solution.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Reported-by: CAI Qian <caiqian@redhat.com>
Tested-by: CAI Qian <caiqian@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/vmcore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 91c817ff02c3..2367fb3f70bc 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -163,7 +163,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
 
 static const struct file_operations proc_vmcore_operations = {
 	.read		= read_vmcore,
-	.llseek		= generic_file_llseek,
+	.llseek		= default_llseek,
 };
 
 static struct vmcore* __init get_new_element(void)
-- 
cgit v1.2.2


From a0c42bac79731276c9b2f28d54f9e658fcf843a2 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 22 Sep 2010 13:05:03 -0700
Subject: aio: do not return ERESTARTSYS as a result of AIO

OCFS2 can return ERESTARTSYS from its write function when the process is
signalled while waiting for a cluster lock (and the filesystem is mounted
with intr mount option).  Generally, it seems reasonable to allow
filesystems to return this error code from its IO functions.  As we must
not leak ERESTARTSYS (and similar error codes) to userspace as a result of
an AIO operation, we have to properly convert it to EINTR inside AIO code
(restarting the syscall isn't really an option because other AIO could
have been already submitted by the same io_submit syscall).

Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 1320b2a05fb2..250b0a73c8a8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -712,8 +712,16 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
 	 */
 	ret = retry(iocb);
 
-	if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED)
+	if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
+		/*
+		 * There's no easy way to restart the syscall since other AIO's
+		 * may be already running. Just fail this IO with EINTR.
+		 */
+		if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
+			     ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK))
+			ret = -EINTR;
 		aio_complete(iocb, ret, 0);
+	}
 out:
 	spin_lock_irq(&ctx->ctx_lock);
 
-- 
cgit v1.2.2


From 1c2499ae87f828eabddf6483b0dfc11da1100c07 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Wed, 22 Sep 2010 13:05:06 -0700
Subject: /proc/pid/smaps: fix dirty pages accounting

Currently, /proc/<pid>/smaps has wrong dirty pages accounting.
Shared_Dirty and Private_Dirty output only pte dirty pages and ignore
PG_dirty page flag.  It is difference against documentation, but also
inconsistent against Referenced field.  (Referenced checks both pte and
page flags)

This patch fixes it.

Test program:

 large-array.c
 ---------------------------------------------------
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>

 char array[1*1024*1024*1024L];

 int main(void)
 {
         memset(array, 1, sizeof(array));
         pause();

         return 0;
 }
 ---------------------------------------------------

Test case:
 1. run ./large-array
 2. cat /proc/`pidof large-array`/smaps
 3. swapoff -a
 4. cat /proc/`pidof large-array`/smaps again

Test result:
 <before patch>

00601000-40601000 rw-p 00000000 00:00 0
Size:            1048576 kB
Rss:             1048576 kB
Pss:             1048576 kB
Shared_Clean:          0 kB
Shared_Dirty:          0 kB
Private_Clean:    218992 kB   <-- showed pages as clean incorrectly
Private_Dirty:    829584 kB
Referenced:       388364 kB
Swap:                  0 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB

 <after patch>

00601000-40601000 rw-p 00000000 00:00 0
Size:            1048576 kB
Rss:             1048576 kB
Pss:             1048576 kB
Shared_Clean:          0 kB
Shared_Dirty:          0 kB
Private_Clean:         0 kB
Private_Dirty:   1048576 kB  <-- fixed
Referenced:       388480 kB
Swap:                  0 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 271afc48b9a5..1dbca4e8cc16 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -363,13 +363,13 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 			mss->referenced += PAGE_SIZE;
 		mapcount = page_mapcount(page);
 		if (mapcount >= 2) {
-			if (pte_dirty(ptent))
+			if (pte_dirty(ptent) || PageDirty(page))
 				mss->shared_dirty += PAGE_SIZE;
 			else
 				mss->shared_clean += PAGE_SIZE;
 			mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
 		} else {
-			if (pte_dirty(ptent))
+			if (pte_dirty(ptent) || PageDirty(page))
 				mss->private_dirty += PAGE_SIZE;
 			else
 				mss->private_clean += PAGE_SIZE;
-- 
cgit v1.2.2


From 12828061cdacfb1db3eb03fd71952d5ebc555bbb Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 13 Sep 2010 14:00:23 +0800
Subject: ocfs2: update ctime when changing the file's permission by setfacl

In commit 30e2bab, ext3 fixed it. So change it accordingly in ocfs2.

Steps to reproduce:
# touch aaa
# stat -c %Z aaa
1283760364
# setfacl -m  'u::x,g::x,o::x' aaa
# stat -c %Z aaa
1283760364

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/acl.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index a76e0aa5cd3f..391915093fe1 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -209,7 +209,10 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh,
 	}
 
 	inode->i_mode = new_mode;
+	inode->i_ctime = CURRENT_TIME;
 	di->i_mode = cpu_to_le16(inode->i_mode);
+	di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+	di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
 
 	ocfs2_journal_dirty(handle, di_bh);
 
-- 
cgit v1.2.2


From 47dea423799d98c53793237ab386a94976f305d5 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Mon, 13 Sep 2010 15:13:50 +0800
Subject: ocfs2: Use cpu_to_le16 for e_leaf_clusters in
 ocfs2_bg_discontig_add_extent.

e_leaf_clusters is a le16, so use cpu_to_le16 instead
of cpu_to_le32.

What's more, we change 'clusters' to unsigned int to
signify that the size of 'clusters' isn't important here.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/suballoc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 8a286f54dca1..849c2f0e0a0e 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -357,7 +357,7 @@ out:
 static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
 					  struct ocfs2_group_desc *bg,
 					  struct ocfs2_chain_list *cl,
-					  u64 p_blkno, u32 clusters)
+					  u64 p_blkno, unsigned int clusters)
 {
 	struct ocfs2_extent_list *el = &bg->bg_list;
 	struct ocfs2_extent_rec *rec;
@@ -369,7 +369,7 @@ static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
 	rec->e_blkno = cpu_to_le64(p_blkno);
 	rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) /
 				  le16_to_cpu(cl->cl_bpc));
-	rec->e_leaf_clusters = cpu_to_le32(clusters);
+	rec->e_leaf_clusters = cpu_to_le16(clusters);
 	le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc));
 	le16_add_cpu(&bg->bg_free_bits_count,
 		     clusters * le16_to_cpu(cl->cl_bpc));
-- 
cgit v1.2.2


From 4a452de4fdfe4dbb27e491904d8bfaf1262bdff4 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Sun, 19 Sep 2010 13:42:28 +0800
Subject: ocfs2: Move 'wanted' into parens of ocfs2_resmap_resv_bits.

The first time I read the function ocfs2_resmap_resv_bits, I consider
about what 'wanted' will be used and consider about the comments.
Then I find it is only used if the reservation is empty. ;)

So we'd better move it to the parens so that it make the code more
readable, what's more, ocfs2_resmap_resv_bits is used so frequently
and we should save some cpus.

Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/reservations.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index d8b6e4259b80..3e78db361bc7 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -732,25 +732,23 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
 			   struct ocfs2_alloc_reservation *resv,
 			   int *cstart, int *clen)
 {
-	unsigned int wanted = *clen;
-
 	if (resv == NULL || ocfs2_resmap_disabled(resmap))
 		return -ENOSPC;
 
 	spin_lock(&resv_lock);
 
-	/*
-	 * We don't want to over-allocate for temporary
-	 * windows. Otherwise, we run the risk of fragmenting the
-	 * allocation space.
-	 */
-	wanted = ocfs2_resv_window_bits(resmap, resv);
-	if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
-		wanted = *clen;
-
 	if (ocfs2_resv_empty(resv)) {
-		mlog(0, "empty reservation, find new window\n");
+		/*
+		 * We don't want to over-allocate for temporary
+		 * windows. Otherwise, we run the risk of fragmenting the
+		 * allocation space.
+		 */
+		unsigned int wanted = ocfs2_resv_window_bits(resmap, resv);
 
+		if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
+			wanted = *clen;
+
+		mlog(0, "empty reservation, find new window\n");
 		/*
 		 * Try to get a window here. If it works, we must fall
 		 * through and test the bitmap . This avoids some
-- 
cgit v1.2.2


From 0000b862027d624ac564609b87c1aa4d14dd1e46 Mon Sep 17 00:00:00 2001
From: Tao Ma <tao.ma@oracle.com>
Date: Sun, 19 Sep 2010 13:42:29 +0800
Subject: ocfs2: Sync inode flags with ext2.

We sync our inode flags with ext2 and define them by hex
values. But actually in commit 3669567(4 years ago), all
these values are moved to include/linux/fs.h. So we'd
better also use them as what ext2 did. So sync our inode
flags with ext2 by using FS_*.

Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/ocfs2_fs.h    | 37 +++++++++++++++++++++++++------------
 fs/ocfs2/ocfs2_ioctl.h |  8 ++++----
 2 files changed, 29 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 33f1c9a8258d..fa31d05e41b7 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -235,18 +235,31 @@
 #define OCFS2_HAS_REFCOUNT_FL   (0x0010)
 
 /* Inode attributes, keep in sync with EXT2 */
-#define OCFS2_SECRM_FL		(0x00000001)	/* Secure deletion */
-#define OCFS2_UNRM_FL		(0x00000002)	/* Undelete */
-#define OCFS2_COMPR_FL		(0x00000004)	/* Compress file */
-#define OCFS2_SYNC_FL		(0x00000008)	/* Synchronous updates */
-#define OCFS2_IMMUTABLE_FL	(0x00000010)	/* Immutable file */
-#define OCFS2_APPEND_FL		(0x00000020)	/* writes to file may only append */
-#define OCFS2_NODUMP_FL		(0x00000040)	/* do not dump file */
-#define OCFS2_NOATIME_FL	(0x00000080)	/* do not update atime */
-#define OCFS2_DIRSYNC_FL	(0x00010000)	/* dirsync behaviour (directories only) */
-
-#define OCFS2_FL_VISIBLE	(0x000100FF)	/* User visible flags */
-#define OCFS2_FL_MODIFIABLE	(0x000100FF)	/* User modifiable flags */
+#define OCFS2_SECRM_FL			FS_SECRM_FL	/* Secure deletion */
+#define OCFS2_UNRM_FL			FS_UNRM_FL	/* Undelete */
+#define OCFS2_COMPR_FL			FS_COMPR_FL	/* Compress file */
+#define OCFS2_SYNC_FL			FS_SYNC_FL	/* Synchronous updates */
+#define OCFS2_IMMUTABLE_FL		FS_IMMUTABLE_FL	/* Immutable file */
+#define OCFS2_APPEND_FL			FS_APPEND_FL	/* writes to file may only append */
+#define OCFS2_NODUMP_FL			FS_NODUMP_FL	/* do not dump file */
+#define OCFS2_NOATIME_FL		FS_NOATIME_FL	/* do not update atime */
+/* Reserved for compression usage... */
+#define OCFS2_DIRTY_FL			FS_DIRTY_FL
+#define OCFS2_COMPRBLK_FL		FS_COMPRBLK_FL	/* One or more compressed clusters */
+#define OCFS2_NOCOMP_FL			FS_NOCOMP_FL	/* Don't compress */
+#define OCFS2_ECOMPR_FL			FS_ECOMPR_FL	/* Compression error */
+/* End compression flags --- maybe not all used */
+#define OCFS2_BTREE_FL			FS_BTREE_FL	/* btree format dir */
+#define OCFS2_INDEX_FL			FS_INDEX_FL	/* hash-indexed directory */
+#define OCFS2_IMAGIC_FL			FS_IMAGIC_FL	/* AFS directory */
+#define OCFS2_JOURNAL_DATA_FL		FS_JOURNAL_DATA_FL /* Reserved for ext3 */
+#define OCFS2_NOTAIL_FL			FS_NOTAIL_FL	/* file tail should not be merged */
+#define OCFS2_DIRSYNC_FL		FS_DIRSYNC_FL	/* dirsync behaviour (directories only) */
+#define OCFS2_TOPDIR_FL			FS_TOPDIR_FL	/* Top of directory hierarchies*/
+#define OCFS2_RESERVED_FL		FS_RESERVED_FL	/* reserved for ext2 lib */
+
+#define OCFS2_FL_VISIBLE		FS_FL_USER_VISIBLE	/* User visible flags */
+#define OCFS2_FL_MODIFIABLE		FS_FL_USER_MODIFIABLE	/* User modifiable flags */
 
 /*
  * Extent record flags (e_node.leaf.flags)
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h
index 2d3420af1a83..5d241505690b 100644
--- a/fs/ocfs2/ocfs2_ioctl.h
+++ b/fs/ocfs2/ocfs2_ioctl.h
@@ -23,10 +23,10 @@
 /*
  * ioctl commands
  */
-#define OCFS2_IOC_GETFLAGS	_IOR('f', 1, long)
-#define OCFS2_IOC_SETFLAGS	_IOW('f', 2, long)
-#define OCFS2_IOC32_GETFLAGS	_IOR('f', 1, int)
-#define OCFS2_IOC32_SETFLAGS	_IOW('f', 2, int)
+#define OCFS2_IOC_GETFLAGS	FS_IOC_GETFLAGS
+#define OCFS2_IOC_SETFLAGS	FS_IOC_SETFLAGS
+#define OCFS2_IOC32_GETFLAGS	FS_IOC32_GETFLAGS
+#define OCFS2_IOC32_SETFLAGS	FS_IOC32_SETFLAGS
 
 /*
  * Space reservation / allocation / free ioctls and argument structure
-- 
cgit v1.2.2


From 5dad6c39d156fbbde0b0ef170d9173feffdeb546 Mon Sep 17 00:00:00 2001
From: Srinivas Eeda <srinivas.eeda@oracle.com>
Date: Tue, 21 Sep 2010 16:27:26 -0700
Subject: o2dlm: force free mles during dlm exit

While umounting, a block mle doesn't get freed if dlm is shutdown after
master request is received but before assert master. This results in unclean
shutdown of dlm domain.

This patch frees all mles that lie around after other nodes were notified about
exiting the dlm and marking dlm state as leaving. Only block mles are expected
to be around, so we log ERROR for other mles but still free them.

Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 fs/ocfs2/dlm/dlmcommon.h |  1 +
 fs/ocfs2/dlm/dlmdomain.c |  1 +
 fs/ocfs2/dlm/dlmmaster.c | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 4b6ae2c13b47..765298908f1d 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -1030,6 +1030,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm,
 			 struct dlm_lock_resource *res);
 void dlm_clean_master_list(struct dlm_ctxt *dlm,
 			   u8 dead_node);
+void dlm_force_free_mles(struct dlm_ctxt *dlm);
 int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock);
 int __dlm_lockres_has_locks(struct dlm_lock_resource *res);
 int __dlm_lockres_unused(struct dlm_lock_resource *res);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 153abb5abef0..11a5c87fd7f7 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -693,6 +693,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
 
 		dlm_mark_domain_leaving(dlm);
 		dlm_leave_domain(dlm);
+		dlm_force_free_mles(dlm);
 		dlm_complete_dlm_shutdown(dlm);
 	}
 	dlm_put(dlm);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index ffb4c68dafa4..f564b0e5f80d 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -3433,3 +3433,43 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm,
 	wake_up(&res->wq);
 	wake_up(&dlm->migration_wq);
 }
+
+void dlm_force_free_mles(struct dlm_ctxt *dlm)
+{
+	int i;
+	struct hlist_head *bucket;
+	struct dlm_master_list_entry *mle;
+	struct hlist_node *tmp, *list;
+
+	/*
+	 * We notified all other nodes that we are exiting the domain and
+	 * marked the dlm state to DLM_CTXT_LEAVING. If any mles are still
+	 * around we force free them and wake any processes that are waiting
+	 * on the mles
+	 */
+	spin_lock(&dlm->spinlock);
+	spin_lock(&dlm->master_lock);
+
+	BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING);
+	BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES));
+
+	for (i = 0; i < DLM_HASH_BUCKETS; i++) {
+		bucket = dlm_master_hash(dlm, i);
+		hlist_for_each_safe(list, tmp, bucket) {
+			mle = hlist_entry(list, struct dlm_master_list_entry,
+					  master_hash_node);
+			if (mle->type != DLM_MLE_BLOCK) {
+				mlog(ML_ERROR, "bad mle: %p\n", mle);
+				dlm_print_one_mle(mle);
+			}
+			atomic_set(&mle->woken, 1);
+			wake_up(&mle->wq);
+
+			__dlm_unlink_mle(dlm, mle);
+			__dlm_mle_detach_hb_events(dlm, mle);
+			__dlm_put_mle(mle);
+		}
+	}
+	spin_unlock(&dlm->master_lock);
+	spin_unlock(&dlm->spinlock);
+}
-- 
cgit v1.2.2


From e4eab08d6050ad04d960738f589724204fd1064c Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@fluxnic.net>
Date: Fri, 20 Aug 2010 21:14:46 +0100
Subject: ARM: 6342/1: fix ASLR of PIE executables

Since commits 990cb8acf2 and cc92c28b2d, it is possible to have full
address space layout randomization (ASLR) on ARM.  Except that one small
change was missing for ASLR of PIE executables.

Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 fs/binfmt_elf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 535e763ab1a6..6884e198e0c7 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -800,7 +800,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 			 * default mmap base, as well as whatever program they
 			 * might try to exec.  This is because the brk will
 			 * follow the loader, and is not movable.  */
-#ifdef CONFIG_X86
+#if defined(CONFIG_X86) || defined(CONFIG_ARM)
 			load_bias = 0;
 #else
 			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
-- 
cgit v1.2.2