From 4184ea7f908d95f329febc3665cf66da8568b467 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 10 Mar 2009 12:39:20 -0400
Subject: Btrfs: Fix locking around adding new space_info

Storage allocated to different raid levels in btrfs is tracked by
a btrfs_space_info structure, and all of the current space_infos are
collected into a list_head.

Most filesystems have 3 or 4 of these structs total, and the list is
only changed when new raid levels are added or at unmount time.

This commit adds rcu locking on the list head, and properly frees
things at unmount time.  It also clears the space_info->full flag
whenever new space is added to the FS.

The locking for the space info list goes like this:

reads: protected by rcu_read_lock()
writes: protected by the chunk_mutex

At unmount time we don't need special locking because all the readers
are gone.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/ctree.h       |  9 +++++++++
 fs/btrfs/extent-tree.c | 45 ++++++++++++++++++++++++++++++++++++++++++---
 fs/btrfs/volumes.c     |  2 ++
 3 files changed, 53 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 82491ba8fa40..5e1d4e30e9d8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -784,7 +784,14 @@ struct btrfs_fs_info {
 	struct list_head dirty_cowonly_roots;
 
 	struct btrfs_fs_devices *fs_devices;
+
+	/*
+	 * the space_info list is almost entirely read only.  It only changes
+	 * when we add a new raid type to the FS, and that happens
+	 * very rarely.  RCU is used to protect it.
+	 */
 	struct list_head space_info;
+
 	spinlock_t delalloc_lock;
 	spinlock_t new_trans_lock;
 	u64 delalloc_bytes;
@@ -1797,6 +1804,8 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
+void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
+
 int btrfs_check_metadata_free_space(struct btrfs_root *root);
 int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
 				u64 bytes);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9abf81f71c46..fefe83ad2059 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -20,6 +20,7 @@
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
 #include <linux/sort.h>
+#include <linux/rcupdate.h>
 #include "compat.h"
 #include "hash.h"
 #include "crc32c.h"
@@ -330,13 +331,33 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
 {
 	struct list_head *head = &info->space_info;
 	struct btrfs_space_info *found;
-	list_for_each_entry(found, head, list) {
-		if (found->flags == flags)
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(found, head, list) {
+		if (found->flags == flags) {
+			rcu_read_unlock();
 			return found;
+		}
 	}
+	rcu_read_unlock();
 	return NULL;
 }
 
+/*
+ * after adding space to the filesystem, we need to clear the full flags
+ * on all the space infos.
+ */
+void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
+{
+	struct list_head *head = &info->space_info;
+	struct btrfs_space_info *found;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(found, head, list)
+		found->full = 0;
+	rcu_read_unlock();
+}
+
 static u64 div_factor(u64 num, int factor)
 {
 	if (factor == 10)
@@ -1903,7 +1924,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 	if (!found)
 		return -ENOMEM;
 
-	list_add(&found->list, &info->space_info);
 	INIT_LIST_HEAD(&found->block_groups);
 	init_rwsem(&found->groups_sem);
 	spin_lock_init(&found->lock);
@@ -1917,6 +1937,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 	found->full = 0;
 	found->force_alloc = 0;
 	*space_info = found;
+	list_add_rcu(&found->list, &info->space_info);
 	return 0;
 }
 
@@ -6320,6 +6341,7 @@ out:
 int btrfs_free_block_groups(struct btrfs_fs_info *info)
 {
 	struct btrfs_block_group_cache *block_group;
+	struct btrfs_space_info *space_info;
 	struct rb_node *n;
 
 	spin_lock(&info->block_group_cache_lock);
@@ -6341,6 +6363,23 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 		spin_lock(&info->block_group_cache_lock);
 	}
 	spin_unlock(&info->block_group_cache_lock);
+
+	/* now that all the block groups are freed, go through and
+	 * free all the space_info structs.  This is only called during
+	 * the final stages of unmount, and so we know nobody is
+	 * using them.  We call synchronize_rcu() once before we start,
+	 * just to be on the safe side.
+	 */
+	synchronize_rcu();
+
+	while(!list_empty(&info->space_info)) {
+		space_info = list_entry(info->space_info.next,
+					struct btrfs_space_info,
+					list);
+
+		list_del(&space_info->list);
+		kfree(space_info);
+	}
 	return 0;
 }
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1316139bf9e8..7aa3810d7f69 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1459,6 +1459,8 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
 	device->fs_devices->total_rw_bytes += diff;
 
 	device->total_bytes = new_size;
+	btrfs_clear_space_info_full(device->dev_root->fs_info);
+
 	return btrfs_update_device(trans, device);
 }
 
-- 
cgit v1.2.2


From 913d952eb573c3d1f7487e83b5590e13e7cae2bd Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Tue, 10 Mar 2009 13:17:18 -0400
Subject: Btrfs: Clear space_info full when adding new devices

The full flag on the space info structs tells the allocator not to try
and allocate more chunks because the devices in the FS are fully allocated.

When more devices are added, we need to clear the full flag so the allocator
knows it has more space available.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/volumes.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7aa3810d7f69..dd06e18e5aac 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1374,6 +1374,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 		ret = btrfs_add_device(trans, root, device);
 	}
 
+	/*
+	 * we've got more storage, clear any full flags on the space
+	 * infos
+	 */
+	btrfs_clear_space_info_full(root->fs_info);
+
 	unlock_chunks(root);
 	btrfs_commit_transaction(trans, root);
 
-- 
cgit v1.2.2


From 395a87bfefbc400011417e9eaae33169f9f036c0 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Tue, 10 Mar 2009 18:18:47 -0400
Subject: ext4: fix header check in ext4_ext_search_right() for deep extent
 trees.

The ext4_ext_search_right() function is confusing; it uses a
"depth" variable which is 0 at the root and maximum at the leaves,
but the on-disk metadata uses a "depth" (actually eh_depth) which
is opposite: maximum at the root, and 0 at the leaves.

The ext4_ext_check_header() function is given a depth and checks
the header agaisnt that depth; it expects the on-disk semantics,
but we are giving it the opposite in the while loop in this
function.  We should be giving it the on-disk notion of "depth"
which we can get from (p_depth - depth) - and if you look, the last
(more commonly hit) call to ext4_ext_check_header() does just this.

Sending in the wrong depth results in (incorrect) messages
about corruption:

EXT4-fs error (device sdb1): ext4_ext_search_right: bad header
in inode #2621457: unexpected eh_depth - magic f30a, entries 340,
max 340(0), depth 1(2)

http://bugzilla.kernel.org/show_bug.cgi?id=12821

Reported-by: David Dindorp <ddi@dubex.dk>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e2eab196875f..e0aa4fe4f596 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1122,7 +1122,8 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
 	struct ext4_extent_idx *ix;
 	struct ext4_extent *ex;
 	ext4_fsblk_t block;
-	int depth, ee_len;
+	int depth;	/* Note, NOT eh_depth; depth from top of tree */
+	int ee_len;
 
 	BUG_ON(path == NULL);
 	depth = path->p_depth;
@@ -1179,7 +1180,8 @@ got_index:
 		if (bh == NULL)
 			return -EIO;
 		eh = ext_block_hdr(bh);
-		if (ext4_ext_check_header(inode, eh, depth)) {
+		/* subtract from p_depth to get proper eh_depth */
+		if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
 			put_bh(bh);
 			return -EIO;
 		}
-- 
cgit v1.2.2


From 2842c3b5449f31470b61db716f1926b594fb6156 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 12 Mar 2009 12:20:01 -0400
Subject: ext4: Print the find_group_flex() warning only once

This is a short-term warning, and even printk_ratelimit() can result
in too much noise in system logs.  So only print it once as a warning.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ialloc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 627f8c3337a3..2d2b3585ee91 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -698,6 +698,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 	struct inode *ret;
 	ext4_group_t i;
 	int free = 0;
+	static int once = 1;
 	ext4_group_t flex_group;
 
 	/* Cannot create files in a deleted directory */
@@ -719,7 +720,8 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
 		ret2 = find_group_flex(sb, dir, &group);
 		if (ret2 == -1) {
 			ret2 = find_group_other(sb, dir, &group);
-			if (ret2 == 0 && printk_ratelimit())
+			if (ret2 == 0 && once)
+				once = 0;
 				printk(KERN_NOTICE "ext4: find_group_flex "
 				       "failed, fallback succeeded dir %lu\n",
 				       dir->i_ino);
-- 
cgit v1.2.2


From 8d03c7a0c550e7ab24cadcef5e66656bfadec8b9 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Sat, 14 Mar 2009 11:51:46 -0400
Subject: ext4: fix bogus BUG_ONs in in mballoc code

Thiemo Nagel reported that:

# dd if=/dev/zero of=image.ext4 bs=1M count=2
# mkfs.ext4 -v -F -b 1024 -m 0 -g 512 -G 4 -I 128 -N 1 \
  -O large_file,dir_index,flex_bg,extent,sparse_super image.ext4
# mount -o loop image.ext4 mnt/
# dd if=/dev/zero of=mnt/file

oopsed, with a BUG_ON in ext4_mb_normalize_request because
size == EXT4_BLOCKS_PER_GROUP

It appears to me (esp. after talking to Andreas) that the BUG_ON
is bogus; a request of exactly EXT4_BLOCKS_PER_GROUP should
be allowed, though larger sizes do indicate a problem.

Fix that an another (apparently rare) codepath with a similar check.

Reported-by: Thiemo Nagel <thiemo.nagel@ph.tum.de>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4415beeb0b62..41f4348b62f5 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1447,7 +1447,7 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
 	struct ext4_free_extent *gex = &ac->ac_g_ex;
 
 	BUG_ON(ex->fe_len <= 0);
-	BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+	BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 	BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 	BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
 
@@ -3292,7 +3292,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 	}
 	BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
 			start > ac->ac_o_ex.fe_logical);
-	BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+	BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 
 	/* now prepare goal request */
 
-- 
cgit v1.2.2


From ee6f779b9e0851e2f7da292a9f58e0095edf615a Mon Sep 17 00:00:00 2001
From: Zhang Le <r0bertz@gentoo.org>
Date: Mon, 16 Mar 2009 14:44:31 +0800
Subject: filp->f_pos not correctly updated in proc_task_readdir

filp->f_pos only get updated at the end of the function. Thus d_off of those
dirents who are in the middle will be 0, and this will cause a problem in
glibc's readdir implementation, specifically endless loop. Because when overflow
occurs, f_pos will be set to next dirent to read, however it will be 0, unless
the next one is the last one. So it will start over again and again.

There is a sample program in man 2 gendents. This is the output of the program
running on a multithread program's task dir before this patch is applied:

  $ ./a.out /proc/3807/task
  --------------- nread=128 ---------------
  i-node#  file type  d_reclen  d_off   d_name
    506442  directory    16          1  .
    506441  directory    16          0  ..
    506443  directory    16          0  3807
    506444  directory    16          0  3809
    506445  directory    16          0  3812
    506446  directory    16          0  3861
    506447  directory    16          0  3862
    506448  directory    16          8  3863

This is the output after this patch is applied

  $ ./a.out /proc/3807/task
  --------------- nread=128 ---------------
  i-node#  file type  d_reclen  d_off   d_name
    506442  directory    16          1  .
    506441  directory    16          2  ..
    506443  directory    16          3  3807
    506444  directory    16          4  3809
    506445  directory    16          5  3812
    506446  directory    16          6  3861
    506447  directory    16          7  3862
    506448  directory    16          8  3863

Signed-off-by: Zhang Le <r0bertz@gentoo.org>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 0c9de19a1633..cc6ea2329e71 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3066,7 +3066,6 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
 	int retval = -ENOENT;
 	ino_t ino;
 	int tid;
-	unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */
 	struct pid_namespace *ns;
 
 	task = get_proc_task(inode);
@@ -3083,18 +3082,18 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
 		goto out_no_task;
 	retval = 0;
 
-	switch (pos) {
+	switch (filp->f_pos) {
 	case 0:
 		ino = inode->i_ino;
-		if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
+		if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0)
 			goto out;
-		pos++;
+		filp->f_pos++;
 		/* fall through */
 	case 1:
 		ino = parent_ino(dentry);
-		if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
+		if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0)
 			goto out;
-		pos++;
+		filp->f_pos++;
 		/* fall through */
 	}
 
@@ -3104,9 +3103,9 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
 	ns = filp->f_dentry->d_sb->s_fs_info;
 	tid = (int)filp->f_version;
 	filp->f_version = 0;
-	for (task = first_tid(leader, tid, pos - 2, ns);
+	for (task = first_tid(leader, tid, filp->f_pos - 2, ns);
 	     task;
-	     task = next_tid(task), pos++) {
+	     task = next_tid(task), filp->f_pos++) {
 		tid = task_pid_nr_ns(task, ns);
 		if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
 			/* returning this tgid failed, save it as the first
@@ -3117,7 +3116,6 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
 		}
 	}
 out:
-	filp->f_pos = pos;
 	put_task_struct(leader);
 out_no_task:
 	return retval;
-- 
cgit v1.2.2


From d33a1976fbee1ee321d6f014333d8f03a39d526c Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 16 Mar 2009 23:25:40 -0400
Subject: ext4: fix bb_prealloc_list corruption due to wrong group locking

This is for Red Hat bug 490026: EXT4 panic, list corruption in
ext4_mb_new_inode_pa

ext4_lock_group(sb, group) is supposed to protect this list for
each group, and a common code flow to remove an album is like
this:

    ext4_get_group_no_and_offset(sb, pa->pa_pstart, &grp, NULL);
    ext4_lock_group(sb, grp);
    list_del(&pa->pa_group_list);
    ext4_unlock_group(sb, grp);

so it's critical that we get the right group number back for
this prealloc context, to lock the right group (the one
associated with this pa) and prevent concurrent list manipulation.

however, ext4_mb_put_pa() passes in (pa->pa_pstart - 1) with a
comment, "-1 is to protect from crossing allocation group".

This makes sense for the group_pa, where pa_pstart is advanced
by the length which has been used (in ext4_mb_release_context()),
and when the entire length has been used, pa_pstart has been
advanced to the first block of the next group.

However, for inode_pa, pa_pstart is never advanced; it's just
set once to the first block in the group and not moved after
that.  So in this case, if we subtract one in ext4_mb_put_pa(),
we are actually locking the *previous* group, and opening the
race with the other threads which do not subtract off the extra
block.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 41f4348b62f5..9f61e62f435f 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3589,6 +3589,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
 			struct super_block *sb, struct ext4_prealloc_space *pa)
 {
 	ext4_group_t grp;
+	ext4_fsblk_t grp_blk;
 
 	if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
 		return;
@@ -3603,8 +3604,12 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
 	pa->pa_deleted = 1;
 	spin_unlock(&pa->pa_lock);
 
-	/* -1 is to protect from crossing allocation group */
-	ext4_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL);
+	grp_blk = pa->pa_pstart;
+	/* If linear, pa_pstart may be in the next group when pa is used up */
+	if (pa->pa_linear)
+		grp_blk--;
+
+	ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
 
 	/*
 	 * possible race:
-- 
cgit v1.2.2


From ee568b25ee9e160b32d1aef73d8b2ee9c05d34db Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 17 Mar 2009 10:02:35 -0700
Subject: Avoid 64-bit "switch()" statements on 32-bit architectures

Commit ee6f779b9e0851e2f7da292a9f58e0095edf615a ("filp->f_pos not
correctly updated in proc_task_readdir") changed the proc code to use
filp->f_pos directly, rather than through a temporary variable.  In the
process, that caused the operations to be done on the full 64 bits, even
though the offset is never that big.

That's all fine and dandy per se, but for some unfathomable reason gcc
generates absolutely horrid code when using 64-bit values in switch()
statements.  To the point of actually calling out to gcc helper
functions like __cmpdi2 rather than just doing the trivial comparisons
directly the way gcc does for normal compares.  At which point we get
link failures, because we really don't want to support that kind of
crazy code.

Fix this by just casting the f_pos value to "unsigned long", which
is plenty big enough for /proc, and avoids the gcc code generation issue.

Reported-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Zhang Le <r0bertz@gentoo.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index cc6ea2329e71..beaa0ce3b82e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3082,7 +3082,7 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
 		goto out_no_task;
 	retval = 0;
 
-	switch (filp->f_pos) {
+	switch ((unsigned long)filp->f_pos) {
 	case 0:
 		ino = inode->i_ino;
 		if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0)
-- 
cgit v1.2.2


From 84f09f46b4ee9e4e9b6381f8af31817516d2091b Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 4 Mar 2009 23:05:35 +0200
Subject: NFSD: provide encode routine for OP_OPENATTR

Although this operation is unsupported by our implementation
we still need to provide an encode routine for it to
merely encode its (error) status back in the compound reply.

Thanks for Bill Baker at sun.com for testing with the Sun
OpenSolaris' client, finding, and reporting this bug at
Connectathon 2009.

This bug was introduced in 2.6.27

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4xdr.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index f65953be39c0..9250067943d8 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2596,6 +2596,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
 	[OP_LOOKUPP]		= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_NVERIFY]		= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_OPEN]		= (nfsd4_enc)nfsd4_encode_open,
+	[OP_OPENATTR]		= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_OPEN_CONFIRM]	= (nfsd4_enc)nfsd4_encode_open_confirm,
 	[OP_OPEN_DOWNGRADE]	= (nfsd4_enc)nfsd4_encode_open_downgrade,
 	[OP_PUTFH]		= (nfsd4_enc)nfsd4_encode_noop,
-- 
cgit v1.2.2


From a8e7d49aa7be728c4ae241a75a2a124cdcabc0c5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 19 Mar 2009 11:32:05 -0700
Subject: Fix race in create_empty_buffers() vs __set_page_dirty_buffers()

Nick Piggin noticed this (very unlikely) race between setting a page
dirty and creating the buffers for it - we need to hold the mapping
private_lock until we've set the page dirty bit in order to make sure
that create_empty_buffers() might not build up a set of buffers without
the dirty bits set when the page is dirty.

I doubt anybody has ever hit this race (and it didn't solve the issue
Nick was looking at), but as Nick says: "Still, it does appear to solve
a real race, which we should close."

Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 9f697419ed8e..891e1c78e4f1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -760,15 +760,9 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
  * If warn is true, then emit a warning if the page is not uptodate and has
  * not been truncated.
  */
-static int __set_page_dirty(struct page *page,
+static void __set_page_dirty(struct page *page,
 		struct address_space *mapping, int warn)
 {
-	if (unlikely(!mapping))
-		return !TestSetPageDirty(page);
-
-	if (TestSetPageDirty(page))
-		return 0;
-
 	spin_lock_irq(&mapping->tree_lock);
 	if (page->mapping) {	/* Race with truncate? */
 		WARN_ON_ONCE(warn && !PageUptodate(page));
@@ -785,8 +779,6 @@ static int __set_page_dirty(struct page *page,
 	}
 	spin_unlock_irq(&mapping->tree_lock);
 	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-
-	return 1;
 }
 
 /*
@@ -816,6 +808,7 @@ static int __set_page_dirty(struct page *page,
  */
 int __set_page_dirty_buffers(struct page *page)
 {
+	int newly_dirty;
 	struct address_space *mapping = page_mapping(page);
 
 	if (unlikely(!mapping))
@@ -831,9 +824,12 @@ int __set_page_dirty_buffers(struct page *page)
 			bh = bh->b_this_page;
 		} while (bh != head);
 	}
+	newly_dirty = !TestSetPageDirty(page);
 	spin_unlock(&mapping->private_lock);
 
-	return __set_page_dirty(page, mapping, 1);
+	if (newly_dirty)
+		__set_page_dirty(page, mapping, 1);
+	return newly_dirty;
 }
 EXPORT_SYMBOL(__set_page_dirty_buffers);
 
@@ -1262,8 +1258,11 @@ void mark_buffer_dirty(struct buffer_head *bh)
 			return;
 	}
 
-	if (!test_set_buffer_dirty(bh))
-		__set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
+	if (!test_set_buffer_dirty(bh)) {
+		struct page *page = bh->b_page;
+		if (!TestSetPageDirty(page))
+			__set_page_dirty(page, page_mapping(page), 0);
+	}
 }
 
 /*
-- 
cgit v1.2.2


From 87c3a86e1c220121d0ced59d1a71e78ed9abc6dd Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Wed, 18 Mar 2009 17:04:19 -0700
Subject: eventfd: remove fput() call from possible IRQ context

Remove a source of fput() call from inside IRQ context.  Myself, like Eric,
wasn't able to reproduce an fput() call from IRQ context, but Jeff said he was
able to, with the attached test program.  Independently from this, the bug is
conceptually there, so we might be better off fixing it.  This patch adds an
optimization similar to the one we already do on ->ki_filp, on ->ki_eventfd.
Playing with ->f_count directly is not pretty in general, but the alternative
here would be to add a brand new delayed fput() infrastructure, that I'm not
sure is worth it.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c | 37 +++++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 8fa77e233944..4a9d4d641fb9 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -443,7 +443,7 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx)
 	req->private = NULL;
 	req->ki_iovec = NULL;
 	INIT_LIST_HEAD(&req->ki_run_list);
-	req->ki_eventfd = ERR_PTR(-EINVAL);
+	req->ki_eventfd = NULL;
 
 	/* Check if the completion queue has enough free space to
 	 * accept an event from this io.
@@ -485,8 +485,6 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
 {
 	assert_spin_locked(&ctx->ctx_lock);
 
-	if (!IS_ERR(req->ki_eventfd))
-		fput(req->ki_eventfd);
 	if (req->ki_dtor)
 		req->ki_dtor(req);
 	if (req->ki_iovec != &req->ki_inline_vec)
@@ -508,8 +506,11 @@ static void aio_fput_routine(struct work_struct *data)
 		list_del(&req->ki_list);
 		spin_unlock_irq(&fput_lock);
 
-		/* Complete the fput */
-		__fput(req->ki_filp);
+		/* Complete the fput(s) */
+		if (req->ki_filp != NULL)
+			__fput(req->ki_filp);
+		if (req->ki_eventfd != NULL)
+			__fput(req->ki_eventfd);
 
 		/* Link the iocb into the context's free list */
 		spin_lock_irq(&ctx->ctx_lock);
@@ -527,12 +528,14 @@ static void aio_fput_routine(struct work_struct *data)
  */
 static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 {
+	int schedule_putreq = 0;
+
 	dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
 		req, atomic_long_read(&req->ki_filp->f_count));
 
 	assert_spin_locked(&ctx->ctx_lock);
 
-	req->ki_users --;
+	req->ki_users--;
 	BUG_ON(req->ki_users < 0);
 	if (likely(req->ki_users))
 		return 0;
@@ -540,10 +543,23 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 	req->ki_cancel = NULL;
 	req->ki_retry = NULL;
 
-	/* Must be done under the lock to serialise against cancellation.
-	 * Call this aio_fput as it duplicates fput via the fput_work.
+	/*
+	 * Try to optimize the aio and eventfd file* puts, by avoiding to
+	 * schedule work in case it is not __fput() time. In normal cases,
+	 * we would not be holding the last reference to the file*, so
+	 * this function will be executed w/out any aio kthread wakeup.
 	 */
-	if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
+	if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count)))
+		schedule_putreq++;
+	else
+		req->ki_filp = NULL;
+	if (req->ki_eventfd != NULL) {
+		if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count)))
+			schedule_putreq++;
+		else
+			req->ki_eventfd = NULL;
+	}
+	if (unlikely(schedule_putreq)) {
 		get_ioctx(ctx);
 		spin_lock(&fput_lock);
 		list_add(&req->ki_list, &fput_head);
@@ -1009,7 +1025,7 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
 	 * eventfd. The eventfd_signal() function is safe to be called
 	 * from IRQ context.
 	 */
-	if (!IS_ERR(iocb->ki_eventfd))
+	if (iocb->ki_eventfd != NULL)
 		eventfd_signal(iocb->ki_eventfd, 1);
 
 put_rq:
@@ -1608,6 +1624,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 		req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
 		if (IS_ERR(req->ki_eventfd)) {
 			ret = PTR_ERR(req->ki_eventfd);
+			req->ki_eventfd = NULL;
 			goto out_put_req;
 		}
 	}
-- 
cgit v1.2.2


From 65c24491b4fef017c64e39ec64384fde5e05e0a0 Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Wed, 18 Mar 2009 17:04:21 -0700
Subject: aio: lookup_ioctx can return the wrong value when looking up a bogus
 context

The libaio test harness turned up a problem whereby lookup_ioctx on a
bogus io context was returning the 1 valid io context from the list
(harness/cases/3.p).

Because of that, an extra put_iocontext was done, and when the process
exited, it hit a BUG_ON in the put_iocontext macro called from exit_aio
(since we expect a users count of 1 and instead get 0).

The problem was introduced by "aio: make the lookup_ioctx() lockless"
(commit abf137dd7712132ee56d5b3143c2ff61a72a5faa).

Thanks to Zach for pointing out that hlist_for_each_entry_rcu will not
return with a NULL tpos at the end of the loop, even if the entry was
not found.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Acked-by: Zach Brown <zach.brown@oracle.com>
Acked-by: Jens Axboe <jens.axboe@oracle.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 4a9d4d641fb9..76da12537956 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -587,7 +587,7 @@ int aio_put_req(struct kiocb *req)
 static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 {
 	struct mm_struct *mm = current->mm;
-	struct kioctx *ctx = NULL;
+	struct kioctx *ctx, *ret = NULL;
 	struct hlist_node *n;
 
 	rcu_read_lock();
@@ -595,12 +595,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 	hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
 		if (ctx->user_id == ctx_id && !ctx->dead) {
 			get_ioctx(ctx);
+			ret = ctx;
 			break;
 		}
 	}
 
 	rcu_read_unlock();
-	return ctx;
+	return ret;
 }
 
 /*
-- 
cgit v1.2.2


From 8faece5f906725c10e7a1f6caf84452abadbdc7b Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Date: Fri, 20 Mar 2009 01:25:09 -0500
Subject: eCryptfs: Allocate a variable number of pages for file headers

When allocating the memory used to store the eCryptfs header contents, a
single, zeroed page was being allocated with get_zeroed_page().
However, the size of an eCryptfs header is either PAGE_CACHE_SIZE or
ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE (8192), whichever is larger, and is
stored in the file's private_data->crypt_stat->num_header_bytes_at_front
field.

ecryptfs_write_metadata_to_contents() was using
num_header_bytes_at_front to decide how many bytes should be written to
the lower filesystem for the file header.  Unfortunately, at least 8K
was being written from the page, despite the chance of the single,
zeroed page being smaller than 8K.  This resulted in random areas of
kernel memory being written between the 0x1000 and 0x1FFF bytes offsets
in the eCryptfs file headers if PAGE_SIZE was 4K.

This patch allocates a variable number of pages, calculated with
num_header_bytes_at_front, and passes the number of allocated pages
along to ecryptfs_write_metadata_to_contents().

Thanks to Florian Streibelt for reporting the data leak and working with
me to find the problem.  2.6.28 is the only kernel release with this
vulnerability.  Corresponds to CVE-2009-0787

Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Acked-by: Dustin Kirkland <kirkland@canonical.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Eugene Teo <eugeneteo@kernel.sg>
Cc: Greg KH <greg@kroah.com>
Cc: dann frazier <dannf@dannf.org>
Cc: Serge E. Hallyn <serue@us.ibm.com>
Cc: Florian Streibelt <florian@f-streibelt.de>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ecryptfs/crypto.c | 39 ++++++++++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index bdca1f4b3a3e..75bee99de0f6 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1324,14 +1324,13 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t max,
 }
 
 static int
-ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
-				    struct dentry *ecryptfs_dentry,
-				    char *virt)
+ecryptfs_write_metadata_to_contents(struct dentry *ecryptfs_dentry,
+				    char *virt, size_t virt_len)
 {
 	int rc;
 
 	rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, virt,
-				  0, crypt_stat->num_header_bytes_at_front);
+				  0, virt_len);
 	if (rc)
 		printk(KERN_ERR "%s: Error attempting to write header "
 		       "information to lower file; rc = [%d]\n", __func__,
@@ -1341,7 +1340,6 @@ ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
 
 static int
 ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
-				 struct ecryptfs_crypt_stat *crypt_stat,
 				 char *page_virt, size_t size)
 {
 	int rc;
@@ -1351,6 +1349,17 @@ ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
 	return rc;
 }
 
+static unsigned long ecryptfs_get_zeroed_pages(gfp_t gfp_mask,
+					       unsigned int order)
+{
+	struct page *page;
+
+	page = alloc_pages(gfp_mask | __GFP_ZERO, order);
+	if (page)
+		return (unsigned long) page_address(page);
+	return 0;
+}
+
 /**
  * ecryptfs_write_metadata
  * @ecryptfs_dentry: The eCryptfs dentry
@@ -1367,7 +1376,9 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
 {
 	struct ecryptfs_crypt_stat *crypt_stat =
 		&ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
+	unsigned int order;
 	char *virt;
+	size_t virt_len;
 	size_t size = 0;
 	int rc = 0;
 
@@ -1383,33 +1394,35 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
 		rc = -EINVAL;
 		goto out;
 	}
+	virt_len = crypt_stat->num_header_bytes_at_front;
+	order = get_order(virt_len);
 	/* Released in this function */
-	virt = (char *)get_zeroed_page(GFP_KERNEL);
+	virt = (char *)ecryptfs_get_zeroed_pages(GFP_KERNEL, order);
 	if (!virt) {
 		printk(KERN_ERR "%s: Out of memory\n", __func__);
 		rc = -ENOMEM;
 		goto out;
 	}
-	rc = ecryptfs_write_headers_virt(virt, PAGE_CACHE_SIZE, &size,
-					 crypt_stat, ecryptfs_dentry);
+	rc = ecryptfs_write_headers_virt(virt, virt_len, &size, crypt_stat,
+					 ecryptfs_dentry);
 	if (unlikely(rc)) {
 		printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n",
 		       __func__, rc);
 		goto out_free;
 	}
 	if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
-		rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry,
-						      crypt_stat, virt, size);
+		rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry, virt,
+						      size);
 	else
-		rc = ecryptfs_write_metadata_to_contents(crypt_stat,
-							 ecryptfs_dentry, virt);
+		rc = ecryptfs_write_metadata_to_contents(ecryptfs_dentry, virt,
+							 virt_len);
 	if (rc) {
 		printk(KERN_ERR "%s: Error writing metadata out to lower file; "
 		       "rc = [%d]\n", __func__, rc);
 		goto out_free;
 	}
 out_free:
-	free_page((unsigned long)virt);
+	free_pages((unsigned long)virt, order);
 out:
 	return rc;
 }
-- 
cgit v1.2.2


From 2aac0cf88681bfa092f731553bc7fbd23516be73 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Date: Fri, 20 Mar 2009 02:23:57 -0500
Subject: eCryptfs: NULL crypt_stat dereference during lookup

If ecryptfs_encrypted_view or ecryptfs_xattr_metadata were being
specified as mount options, a NULL pointer dereference of crypt_stat
was possible during lookup.

This patch moves the crypt_stat assignment into
ecryptfs_lookup_and_interpose_lower(), ensuring that crypt_stat
will not be NULL before we attempt to dereference it.

Thanks to Dan Carpenter and his static analysis tool, smatch, for
finding this bug.

Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Acked-by: Dustin Kirkland <kirkland@canonical.com>
Cc: Dan Carpenter <error27@gmail.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ecryptfs/crypto.c          | 10 ++++++----
 fs/ecryptfs/ecryptfs_kernel.h |  1 -
 fs/ecryptfs/inode.c           | 32 ++++++++++++--------------------
 3 files changed, 18 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 75bee99de0f6..8b65f289ee00 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -2221,17 +2221,19 @@ int ecryptfs_decode_and_decrypt_filename(char **plaintext_name,
 					 struct dentry *ecryptfs_dir_dentry,
 					 const char *name, size_t name_size)
 {
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
+		&ecryptfs_superblock_to_private(
+			ecryptfs_dir_dentry->d_sb)->mount_crypt_stat;
 	char *decoded_name;
 	size_t decoded_name_size;
 	size_t packet_size;
 	int rc = 0;
 
-	if ((name_size > ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE)
+	if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
+	    && !(mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
+	    && (name_size > ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE)
 	    && (strncmp(name, ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX,
 			ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE) == 0)) {
-		struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
-			&ecryptfs_superblock_to_private(
-				ecryptfs_dir_dentry->d_sb)->mount_crypt_stat;
 		const char *orig_name = name;
 		size_t orig_name_size = name_size;
 
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index eb2267eca1fe..ac749d4d644f 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -620,7 +620,6 @@ int ecryptfs_interpose(struct dentry *hidden_dentry,
 		       u32 flags);
 int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 					struct dentry *lower_dentry,
-					struct ecryptfs_crypt_stat *crypt_stat,
 					struct inode *ecryptfs_dir_inode,
 					struct nameidata *ecryptfs_nd);
 int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 5697899a168d..55b3145b8072 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -246,7 +246,6 @@ out:
  */
 int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 					struct dentry *lower_dentry,
-					struct ecryptfs_crypt_stat *crypt_stat,
 					struct inode *ecryptfs_dir_inode,
 					struct nameidata *ecryptfs_nd)
 {
@@ -254,6 +253,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 	struct vfsmount *lower_mnt;
 	struct inode *lower_inode;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
+	struct ecryptfs_crypt_stat *crypt_stat;
 	char *page_virt = NULL;
 	u64 file_size;
 	int rc = 0;
@@ -314,6 +314,11 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 			goto out_free_kmem;
 		}
 	}
+	crypt_stat = &ecryptfs_inode_to_private(
+					ecryptfs_dentry->d_inode)->crypt_stat;
+	/* TODO: lock for crypt_stat comparison */
+	if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
+			ecryptfs_set_default_sizes(crypt_stat);
 	rc = ecryptfs_read_and_validate_header_region(page_virt,
 						      ecryptfs_dentry->d_inode);
 	if (rc) {
@@ -362,9 +367,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
 {
 	char *encrypted_and_encoded_name = NULL;
 	size_t encrypted_and_encoded_name_size;
-	struct ecryptfs_crypt_stat *crypt_stat = NULL;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
-	struct ecryptfs_inode_info *inode_info;
 	struct dentry *lower_dir_dentry, *lower_dentry;
 	int rc = 0;
 
@@ -388,26 +391,15 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
 	}
 	if (lower_dentry->d_inode)
 		goto lookup_and_interpose;
-	inode_info =  ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
-	if (inode_info) {
-		crypt_stat = &inode_info->crypt_stat;
-		/* TODO: lock for crypt_stat comparison */
-		if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
-			ecryptfs_set_default_sizes(crypt_stat);
-	}
-	if (crypt_stat)
-		mount_crypt_stat = crypt_stat->mount_crypt_stat;
-	else
-		mount_crypt_stat = &ecryptfs_superblock_to_private(
-			ecryptfs_dentry->d_sb)->mount_crypt_stat;
-	if (!(crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCRYPT_FILENAMES))
-	    && !(mount_crypt_stat && (mount_crypt_stat->flags
-				     & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)))
+	mount_crypt_stat = &ecryptfs_superblock_to_private(
+				ecryptfs_dentry->d_sb)->mount_crypt_stat;
+	if (!(mount_crypt_stat
+	    && (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)))
 		goto lookup_and_interpose;
 	dput(lower_dentry);
 	rc = ecryptfs_encrypt_and_encode_filename(
 		&encrypted_and_encoded_name, &encrypted_and_encoded_name_size,
-		crypt_stat, mount_crypt_stat, ecryptfs_dentry->d_name.name,
+		NULL, mount_crypt_stat, ecryptfs_dentry->d_name.name,
 		ecryptfs_dentry->d_name.len);
 	if (rc) {
 		printk(KERN_ERR "%s: Error attempting to encrypt and encode "
@@ -426,7 +418,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
 	}
 lookup_and_interpose:
 	rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry,
-						 crypt_stat, ecryptfs_dir_inode,
+						 ecryptfs_dir_inode,
 						 ecryptfs_nd);
 	goto out;
 out_d_drop:
-- 
cgit v1.2.2


From f762dd68218665bb87d4e4a0eeac86fde7530293 Mon Sep 17 00:00:00 2001
From: Gertjan van Wingerde <gwingerde@gmail.com>
Date: Sat, 21 Mar 2009 23:18:57 +0100
Subject: Update my email address

Update all previous incarnations of my email address to the correct one.

Signed-off-by: Gertjan van Wingerde <gwingerde@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/minix/inode.c | 2 +-
 fs/ufs/super.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index d1d1eb84679d..618865b3128b 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -3,7 +3,7 @@
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *
- *  Copyright (C) 1996  Gertjan van Wingerde    (gertjan@cs.vu.nl)
+ *  Copyright (C) 1996  Gertjan van Wingerde
  *	Minix V2 fs support.
  *
  *  Modified for 680x0 by Andreas Schwab
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index e65212dfb60e..261a1c2f22dd 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -41,7 +41,7 @@
  * Stefan Reinauer <stepan@home.culture.mipt.ru>
  *
  * Module usage counts added on 96/04/29 by
- * Gertjan van Wingerde <gertjan@cs.vu.nl>
+ * Gertjan van Wingerde <gwingerde@gmail.com>
  *
  * Clean swab support on 19970406 by
  * Francois-Rene Rideau <fare@tunes.org>
-- 
cgit v1.2.2