aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs_vfs.h2
-rw-r--r--fs/9p/vfs_file.c4
-rw-r--r--fs/9p/vfs_inode.c18
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Kconfig.binfmt2
-rw-r--r--fs/afs/callback.c2
-rw-r--r--fs/afs/inode.c2
-rw-r--r--fs/afs/super.c2
-rw-r--r--fs/aio.c4
-rw-r--r--fs/binfmt_elf.c3
-rw-r--r--fs/binfmt_elf_fdpic.c2
-rw-r--r--fs/binfmt_flat.c8
-rw-r--r--fs/block_dev.c16
-rw-r--r--fs/buffer.c13
-rw-r--r--fs/cifs/CHANGES5
-rw-r--r--fs/cifs/asn1.c14
-rw-r--r--fs/cifs/cifsfs.c21
-rw-r--r--fs/cifs/cifsglob.h3
-rw-r--r--fs/cifs/cifspdu.h23
-rw-r--r--fs/cifs/cifssmb.c12
-rw-r--r--fs/cifs/connect.c5
-rw-r--r--fs/cifs/dir.c4
-rw-r--r--fs/cifs/file.c7
-rw-r--r--fs/cifs/inode.c148
-rw-r--r--fs/cifs/misc.c3
-rw-r--r--fs/cifs/readdir.c77
-rw-r--r--fs/dcache.c68
-rw-r--r--fs/ecryptfs/crypto.c2
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h2
-rw-r--r--fs/ecryptfs/miscdev.c2
-rw-r--r--fs/ecryptfs/read_write.c22
-rw-r--r--fs/exec.c9
-rw-r--r--fs/ext3/resize.c3
-rw-r--r--fs/ext3/super.c4
-rw-r--r--fs/ext4/balloc.c71
-rw-r--r--fs/ext4/mballoc.c8
-rw-r--r--fs/ext4/resize.c6
-rw-r--r--fs/ext4/super.c40
-rw-r--r--fs/fat/file.c44
-rw-r--r--fs/fuse/inode.c11
-rw-r--r--fs/gfs2/bmap.c23
-rw-r--r--fs/gfs2/rgrp.c2
-rw-r--r--fs/jbd2/commit.c1
-rw-r--r--fs/jbd2/recovery.c12
-rw-r--r--fs/libfs.c46
-rw-r--r--fs/locks.c6
-rw-r--r--fs/namei.c26
-rw-r--r--fs/nfs/mount_clnt.c5
-rw-r--r--fs/nfs/super.c76
-rw-r--r--fs/nfs/write.c7
-rw-r--r--fs/ntfs/upcase.c5
-rw-r--r--fs/ocfs2/cluster/nodemanager.c74
-rw-r--r--fs/ocfs2/cluster/nodemanager.h4
-rw-r--r--fs/ocfs2/cluster/tcp.c28
-rw-r--r--fs/ocfs2/cluster/tcp.h12
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h32
-rw-r--r--fs/ocfs2/dlm/dlmdebug.h12
-rw-r--r--fs/ocfs2/stack_o2cb.c41
-rw-r--r--fs/ocfs2/stack_user.c37
-rw-r--r--fs/ocfs2/stackglue.c119
-rw-r--r--fs/ocfs2/stackglue.h19
-rw-r--r--fs/open.c37
-rw-r--r--fs/pipe.c10
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c33
-rw-r--r--fs/proc/inode.c3
-rw-r--r--fs/proc/proc_misc.c8
-rw-r--r--fs/proc/task_mmu.c175
-rw-r--r--fs/reiserfs/super.c4
-rw-r--r--fs/select.c2
-rw-r--r--fs/splice.c17
-rw-r--r--fs/udf/super.c57
-rw-r--r--fs/udf/udfdecl.h2
-rw-r--r--fs/utimes.c59
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c24
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h19
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c17
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h8
-rw-r--r--fs/xfs/xfs_inode.c9
-rw-r--r--fs/xfs/xfs_vnodeops.c112
-rw-r--r--fs/xfs/xfs_vnodeops.h3
81 files changed, 1067 insertions, 815 deletions
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index fd01d90cada5..57997fa14e69 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -51,4 +51,4 @@ int v9fs_dir_release(struct inode *inode, struct file *filp);
51int v9fs_file_open(struct inode *inode, struct file *file); 51int v9fs_file_open(struct inode *inode, struct file *file);
52void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat); 52void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat);
53void v9fs_dentry_release(struct dentry *); 53void v9fs_dentry_release(struct dentry *);
54int v9fs_uflags2omode(int uflags); 54int v9fs_uflags2omode(int uflags, int extended);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 0d55affe37d4..52944d2249a4 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -59,7 +59,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
59 59
60 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file); 60 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file);
61 v9ses = v9fs_inode2v9ses(inode); 61 v9ses = v9fs_inode2v9ses(inode);
62 omode = v9fs_uflags2omode(file->f_flags); 62 omode = v9fs_uflags2omode(file->f_flags, v9fs_extended(v9ses));
63 fid = file->private_data; 63 fid = file->private_data;
64 if (!fid) { 64 if (!fid) {
65 fid = v9fs_fid_clone(file->f_path.dentry); 65 fid = v9fs_fid_clone(file->f_path.dentry);
@@ -75,6 +75,8 @@ int v9fs_file_open(struct inode *inode, struct file *file)
75 inode->i_size = 0; 75 inode->i_size = 0;
76 inode->i_blocks = 0; 76 inode->i_blocks = 0;
77 } 77 }
78 if ((file->f_flags & O_APPEND) && (!v9fs_extended(v9ses)))
79 generic_file_llseek(file, 0, SEEK_END);
78 } 80 }
79 81
80 file->private_data = fid; 82 file->private_data = fid;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 40fa807bd929..c95295c65045 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -132,10 +132,10 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
132/** 132/**
133 * v9fs_uflags2omode- convert posix open flags to plan 9 mode bits 133 * v9fs_uflags2omode- convert posix open flags to plan 9 mode bits
134 * @uflags: flags to convert 134 * @uflags: flags to convert
135 * 135 * @extended: if .u extensions are active
136 */ 136 */
137 137
138int v9fs_uflags2omode(int uflags) 138int v9fs_uflags2omode(int uflags, int extended)
139{ 139{
140 int ret; 140 int ret;
141 141
@@ -155,14 +155,16 @@ int v9fs_uflags2omode(int uflags)
155 break; 155 break;
156 } 156 }
157 157
158 if (uflags & O_EXCL)
159 ret |= P9_OEXCL;
160
161 if (uflags & O_TRUNC) 158 if (uflags & O_TRUNC)
162 ret |= P9_OTRUNC; 159 ret |= P9_OTRUNC;
163 160
164 if (uflags & O_APPEND) 161 if (extended) {
165 ret |= P9_OAPPEND; 162 if (uflags & O_EXCL)
163 ret |= P9_OEXCL;
164
165 if (uflags & O_APPEND)
166 ret |= P9_OAPPEND;
167 }
166 168
167 return ret; 169 return ret;
168} 170}
@@ -506,7 +508,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
506 flags = O_RDWR; 508 flags = O_RDWR;
507 509
508 fid = v9fs_create(v9ses, dir, dentry, NULL, perm, 510 fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
509 v9fs_uflags2omode(flags)); 511 v9fs_uflags2omode(flags, v9fs_extended(v9ses)));
510 if (IS_ERR(fid)) { 512 if (IS_ERR(fid)) {
511 err = PTR_ERR(fid); 513 err = PTR_ERR(fid);
512 fid = NULL; 514 fid = NULL;
diff --git a/fs/Kconfig b/fs/Kconfig
index cf12c403b8c7..2694648cbd1b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -830,7 +830,7 @@ config NTFS_FS
830 from the project web site. 830 from the project web site.
831 831
832 For more information see <file:Documentation/filesystems/ntfs.txt> 832 For more information see <file:Documentation/filesystems/ntfs.txt>
833 and <http://linux-ntfs.sourceforge.net/>. 833 and <http://www.linux-ntfs.org/>.
834 834
835 To compile this file system support as a module, choose M here: the 835 To compile this file system support as a module, choose M here: the
836 module will be called ntfs. 836 module will be called ntfs.
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 55e8ee1900a5..3263084eef9e 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -42,7 +42,7 @@ config BINFMT_ELF_FDPIC
42 42
43config BINFMT_FLAT 43config BINFMT_FLAT
44 bool "Kernel support for flat binaries" 44 bool "Kernel support for flat binaries"
45 depends on !MMU 45 depends on !MMU && (!FRV || BROKEN)
46 help 46 help
47 Support uClinux FLAT format binaries. 47 Support uClinux FLAT format binaries.
48 48
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index a78d5b236bb1..587ef5123cd8 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -8,7 +8,7 @@
8 * along with this program; if not, write to the Free Software 8 * along with this program; if not, write to the Free Software
9 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 9 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
10 * 10 *
11 * Authors: David Woodhouse <dwmw2@cambridge.redhat.com> 11 * Authors: David Woodhouse <dwmw2@infradead.org>
12 * David Howells <dhowells@redhat.com> 12 * David Howells <dhowells@redhat.com>
13 * 13 *
14 */ 14 */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 08db82e1343a..bb47217f6a18 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -8,7 +8,7 @@
8 * along with this program; if not, write to the Free Software 8 * along with this program; if not, write to the Free Software
9 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 9 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
10 * 10 *
11 * Authors: David Woodhouse <dwmw2@cambridge.redhat.com> 11 * Authors: David Woodhouse <dwmw2@infradead.org>
12 * David Howells <dhowells@redhat.com> 12 * David Howells <dhowells@redhat.com>
13 * 13 *
14 */ 14 */
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 4b572b801d8d..7e3faeef6818 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -10,7 +10,7 @@
10 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 10 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
11 * 11 *
12 * Authors: David Howells <dhowells@redhat.com> 12 * Authors: David Howells <dhowells@redhat.com>
13 * David Woodhouse <dwmw2@redhat.com> 13 * David Woodhouse <dwmw2@infradead.org>
14 * 14 *
15 */ 15 */
16 16
diff --git a/fs/aio.c b/fs/aio.c
index b5253e77eb2f..0fb3117ddd93 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -591,10 +591,6 @@ static void use_mm(struct mm_struct *mm)
591 atomic_inc(&mm->mm_count); 591 atomic_inc(&mm->mm_count);
592 tsk->mm = mm; 592 tsk->mm = mm;
593 tsk->active_mm = mm; 593 tsk->active_mm = mm;
594 /*
595 * Note that on UML this *requires* PF_BORROWED_MM to be set, otherwise
596 * it won't work. Update it accordingly if you change it here
597 */
598 switch_mm(active_mm, mm, tsk); 594 switch_mm(active_mm, mm, tsk);
599 task_unlock(tsk); 595 task_unlock(tsk);
600 596
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0fa95b198e6e..d48ff5f370f4 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -16,7 +16,6 @@
16#include <linux/time.h> 16#include <linux/time.h>
17#include <linux/mm.h> 17#include <linux/mm.h>
18#include <linux/mman.h> 18#include <linux/mman.h>
19#include <linux/a.out.h>
20#include <linux/errno.h> 19#include <linux/errno.h>
21#include <linux/signal.h> 20#include <linux/signal.h>
22#include <linux/binfmts.h> 21#include <linux/binfmts.h>
@@ -548,7 +547,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
548 struct { 547 struct {
549 struct elfhdr elf_ex; 548 struct elfhdr elf_ex;
550 struct elfhdr interp_elf_ex; 549 struct elfhdr interp_elf_ex;
551 struct exec interp_ex;
552 } *loc; 550 } *loc;
553 551
554 loc = kmalloc(sizeof(*loc), GFP_KERNEL); 552 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
@@ -680,7 +678,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
680 } 678 }
681 679
682 /* Get the exec headers */ 680 /* Get the exec headers */
683 loc->interp_ex = *((struct exec *)bprm->buf);
684 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf); 681 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
685 break; 682 break;
686 } 683 }
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index ddd35d873391..d051a32e6270 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -390,7 +390,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
390 } 390 }
391 391
392 /* expand the stack mapping to use up the entire allocation granule */ 392 /* expand the stack mapping to use up the entire allocation granule */
393 fullsize = ksize((char *) current->mm->start_brk); 393 fullsize = kobjsize((char *) current->mm->start_brk);
394 if (!IS_ERR_VALUE(do_mremap(current->mm->start_brk, stack_size, 394 if (!IS_ERR_VALUE(do_mremap(current->mm->start_brk, stack_size,
395 fullsize, 0, 0))) 395 fullsize, 0, 0)))
396 stack_size = fullsize; 396 stack_size = fullsize;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 3b40d45a3a16..2cb1acda3a82 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -548,7 +548,7 @@ static int load_flat_file(struct linux_binprm * bprm,
548 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); 548 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
549 /* Remap to use all availabe slack region space */ 549 /* Remap to use all availabe slack region space */
550 if (realdatastart && (realdatastart < (unsigned long)-4096)) { 550 if (realdatastart && (realdatastart < (unsigned long)-4096)) {
551 reallen = ksize((void *)realdatastart); 551 reallen = kobjsize((void *)realdatastart);
552 if (reallen > len) { 552 if (reallen > len) {
553 realdatastart = do_mremap(realdatastart, len, 553 realdatastart = do_mremap(realdatastart, len,
554 reallen, MREMAP_FIXED, realdatastart); 554 reallen, MREMAP_FIXED, realdatastart);
@@ -600,7 +600,7 @@ static int load_flat_file(struct linux_binprm * bprm,
600 PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); 600 PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
601 /* Remap to use all availabe slack region space */ 601 /* Remap to use all availabe slack region space */
602 if (textpos && (textpos < (unsigned long) -4096)) { 602 if (textpos && (textpos < (unsigned long) -4096)) {
603 reallen = ksize((void *)textpos); 603 reallen = kobjsize((void *)textpos);
604 if (reallen > len) { 604 if (reallen > len) {
605 textpos = do_mremap(textpos, len, reallen, 605 textpos = do_mremap(textpos, len, reallen,
606 MREMAP_FIXED, textpos); 606 MREMAP_FIXED, textpos);
@@ -683,7 +683,7 @@ static int load_flat_file(struct linux_binprm * bprm,
683 */ 683 */
684 current->mm->start_brk = datapos + data_len + bss_len; 684 current->mm->start_brk = datapos + data_len + bss_len;
685 current->mm->brk = (current->mm->start_brk + 3) & ~3; 685 current->mm->brk = (current->mm->start_brk + 3) & ~3;
686 current->mm->context.end_brk = memp + ksize((void *) memp) - stack_len; 686 current->mm->context.end_brk = memp + kobjsize((void *) memp) - stack_len;
687 } 687 }
688 688
689 if (flags & FLAT_FLAG_KTRACE) 689 if (flags & FLAT_FLAG_KTRACE)
@@ -790,7 +790,7 @@ static int load_flat_file(struct linux_binprm * bprm,
790 790
791 /* zero the BSS, BRK and stack areas */ 791 /* zero the BSS, BRK and stack areas */
792 memset((void*)(datapos + data_len), 0, bss_len + 792 memset((void*)(datapos + data_len), 0, bss_len +
793 (memp + ksize((void *) memp) - stack_len - /* end brk */ 793 (memp + kobjsize((void *) memp) - stack_len - /* end brk */
794 libinfo->lib_list[id].start_brk) + /* start brk */ 794 libinfo->lib_list[id].start_brk) + /* start brk */
795 stack_len); 795 stack_len);
796 796
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7d822fae7765..10d8a0aa871a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -12,6 +12,7 @@
12#include <linux/kmod.h> 12#include <linux/kmod.h>
13#include <linux/major.h> 13#include <linux/major.h>
14#include <linux/smp_lock.h> 14#include <linux/smp_lock.h>
15#include <linux/device_cgroup.h>
15#include <linux/highmem.h> 16#include <linux/highmem.h>
16#include <linux/blkdev.h> 17#include <linux/blkdev.h>
17#include <linux/module.h> 18#include <linux/module.h>
@@ -928,9 +929,22 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
928{ 929{
929 struct module *owner = NULL; 930 struct module *owner = NULL;
930 struct gendisk *disk; 931 struct gendisk *disk;
931 int ret = -ENXIO; 932 int ret;
932 int part; 933 int part;
934 int perm = 0;
935
936 if (file->f_mode & FMODE_READ)
937 perm |= MAY_READ;
938 if (file->f_mode & FMODE_WRITE)
939 perm |= MAY_WRITE;
940 /*
941 * hooks: /n/, see "layering violations".
942 */
943 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
944 if (ret != 0)
945 return ret;
933 946
947 ret = -ENXIO;
934 file->f_mapping = bdev->bd_inode->i_mapping; 948 file->f_mapping = bdev->bd_inode->i_mapping;
935 lock_kernel(); 949 lock_kernel();
936 disk = get_gendisk(bdev->bd_dev, &part); 950 disk = get_gendisk(bdev->bd_dev, &part);
diff --git a/fs/buffer.c b/fs/buffer.c
index a073f3f4f013..0f51c0f7c266 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -821,7 +821,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
821 * contents - it is a noop if I/O is still in 821 * contents - it is a noop if I/O is still in
822 * flight on potentially older contents. 822 * flight on potentially older contents.
823 */ 823 */
824 ll_rw_block(SWRITE, 1, &bh); 824 ll_rw_block(SWRITE_SYNC, 1, &bh);
825 brelse(bh); 825 brelse(bh);
826 spin_lock(lock); 826 spin_lock(lock);
827 } 827 }
@@ -2940,16 +2940,19 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
2940 for (i = 0; i < nr; i++) { 2940 for (i = 0; i < nr; i++) {
2941 struct buffer_head *bh = bhs[i]; 2941 struct buffer_head *bh = bhs[i];
2942 2942
2943 if (rw == SWRITE) 2943 if (rw == SWRITE || rw == SWRITE_SYNC)
2944 lock_buffer(bh); 2944 lock_buffer(bh);
2945 else if (test_set_buffer_locked(bh)) 2945 else if (test_set_buffer_locked(bh))
2946 continue; 2946 continue;
2947 2947
2948 if (rw == WRITE || rw == SWRITE) { 2948 if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) {
2949 if (test_clear_buffer_dirty(bh)) { 2949 if (test_clear_buffer_dirty(bh)) {
2950 bh->b_end_io = end_buffer_write_sync; 2950 bh->b_end_io = end_buffer_write_sync;
2951 get_bh(bh); 2951 get_bh(bh);
2952 submit_bh(WRITE, bh); 2952 if (rw == SWRITE_SYNC)
2953 submit_bh(WRITE_SYNC, bh);
2954 else
2955 submit_bh(WRITE, bh);
2953 continue; 2956 continue;
2954 } 2957 }
2955 } else { 2958 } else {
@@ -2978,7 +2981,7 @@ int sync_dirty_buffer(struct buffer_head *bh)
2978 if (test_clear_buffer_dirty(bh)) { 2981 if (test_clear_buffer_dirty(bh)) {
2979 get_bh(bh); 2982 get_bh(bh);
2980 bh->b_end_io = end_buffer_write_sync; 2983 bh->b_end_io = end_buffer_write_sync;
2981 ret = submit_bh(WRITE, bh); 2984 ret = submit_bh(WRITE_SYNC, bh);
2982 wait_on_buffer(bh); 2985 wait_on_buffer(bh);
2983 if (buffer_eopnotsupp(bh)) { 2986 if (buffer_eopnotsupp(bh)) {
2984 clear_buffer_eopnotsupp(bh); 2987 clear_buffer_eopnotsupp(bh);
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 28e3d5c5fcac..1f3465201fdf 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -2,6 +2,11 @@ Version 1.53
2------------ 2------------
3DFS support added (Microsoft Distributed File System client support needed 3DFS support added (Microsoft Distributed File System client support needed
4for referrals which enable a hierarchical name space among servers). 4for referrals which enable a hierarchical name space among servers).
5Disable temporary caching of mode bits to servers which do not support
6storing of mode (e.g. Windows servers, when client mounts without cifsacl
7mount option) and add new "dynperm" mount option to enable temporary caching
8of mode (enable old behavior). Fix hang on mount caused when server crashes
9tcp session during negotiate protocol.
5 10
6Version 1.52 11Version 1.52
7------------ 12------------
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index cb52cbbe45ff..f58e41d3ba48 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -186,6 +186,11 @@ asn1_length_decode(struct asn1_ctx *ctx, unsigned int *def, unsigned int *len)
186 } 186 }
187 } 187 }
188 } 188 }
189
190 /* don't trust len bigger than ctx buffer */
191 if (*len > ctx->end - ctx->pointer)
192 return 0;
193
189 return 1; 194 return 1;
190} 195}
191 196
@@ -203,6 +208,10 @@ asn1_header_decode(struct asn1_ctx *ctx,
203 if (!asn1_length_decode(ctx, &def, &len)) 208 if (!asn1_length_decode(ctx, &def, &len))
204 return 0; 209 return 0;
205 210
211 /* primitive shall be definite, indefinite shall be constructed */
212 if (*con == ASN1_PRI && !def)
213 return 0;
214
206 if (def) 215 if (def)
207 *eoc = ctx->pointer + len; 216 *eoc = ctx->pointer + len;
208 else 217 else
@@ -389,6 +398,11 @@ asn1_oid_decode(struct asn1_ctx *ctx,
389 unsigned long *optr; 398 unsigned long *optr;
390 399
391 size = eoc - ctx->pointer + 1; 400 size = eoc - ctx->pointer + 1;
401
402 /* first subid actually encodes first two subids */
403 if (size < 2 || size > ULONG_MAX/sizeof(unsigned long))
404 return 0;
405
392 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); 406 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
393 if (*oid == NULL) 407 if (*oid == NULL)
394 return 0; 408 return 0;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5df93fd6303f..86b4d5f405ae 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -97,9 +97,6 @@ cifs_read_super(struct super_block *sb, void *data,
97{ 97{
98 struct inode *inode; 98 struct inode *inode;
99 struct cifs_sb_info *cifs_sb; 99 struct cifs_sb_info *cifs_sb;
100#ifdef CONFIG_CIFS_DFS_UPCALL
101 int len;
102#endif
103 int rc = 0; 100 int rc = 0;
104 101
105 /* BB should we make this contingent on mount parm? */ 102 /* BB should we make this contingent on mount parm? */
@@ -117,15 +114,17 @@ cifs_read_super(struct super_block *sb, void *data,
117 * complex operation (mount), and in case of fail 114 * complex operation (mount), and in case of fail
118 * just exit instead of doing mount and attempting 115 * just exit instead of doing mount and attempting
119 * undo it if this copy fails?*/ 116 * undo it if this copy fails?*/
120 len = strlen(data); 117 if (data) {
121 cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL); 118 int len = strlen(data);
122 if (cifs_sb->mountdata == NULL) { 119 cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
123 kfree(sb->s_fs_info); 120 if (cifs_sb->mountdata == NULL) {
124 sb->s_fs_info = NULL; 121 kfree(sb->s_fs_info);
125 return -ENOMEM; 122 sb->s_fs_info = NULL;
123 return -ENOMEM;
124 }
125 strncpy(cifs_sb->mountdata, data, len + 1);
126 cifs_sb->mountdata[len] = '\0';
126 } 127 }
127 strncpy(cifs_sb->mountdata, data, len + 1);
128 cifs_sb->mountdata[len] = '\0';
129#endif 128#endif
130 129
131 rc = cifs_mount(sb, cifs_sb, data, devname); 130 rc = cifs_mount(sb, cifs_sb, data, devname);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 08914053242b..9cfcf326ead3 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -333,7 +333,6 @@ struct cifsFileInfo {
333 bool messageMode:1; /* for pipes: message vs byte mode */ 333 bool messageMode:1; /* for pipes: message vs byte mode */
334 atomic_t wrtPending; /* handle in use - defer close */ 334 atomic_t wrtPending; /* handle in use - defer close */
335 struct semaphore fh_sem; /* prevents reopen race after dead ses*/ 335 struct semaphore fh_sem; /* prevents reopen race after dead ses*/
336 char *search_resume_name; /* BB removeme BB */
337 struct cifs_search_info srch_inf; 336 struct cifs_search_info srch_inf;
338}; 337};
339 338
@@ -626,7 +625,7 @@ GLOBAL_EXTERN atomic_t tcpSesAllocCount;
626GLOBAL_EXTERN atomic_t tcpSesReconnectCount; 625GLOBAL_EXTERN atomic_t tcpSesReconnectCount;
627GLOBAL_EXTERN atomic_t tconInfoReconnectCount; 626GLOBAL_EXTERN atomic_t tconInfoReconnectCount;
628 627
629/* Various Debug counters to remove someday (BB) */ 628/* Various Debug counters */
630GLOBAL_EXTERN atomic_t bufAllocCount; /* current number allocated */ 629GLOBAL_EXTERN atomic_t bufAllocCount; /* current number allocated */
631#ifdef CONFIG_CIFS_STATS2 630#ifdef CONFIG_CIFS_STATS2
632GLOBAL_EXTERN atomic_t totBufAllocCount; /* total allocated over all time */ 631GLOBAL_EXTERN atomic_t totBufAllocCount; /* total allocated over all time */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 65d58b4e6a61..0f327c224da3 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -79,6 +79,19 @@
79#define TRANS2_GET_DFS_REFERRAL 0x10 79#define TRANS2_GET_DFS_REFERRAL 0x10
80#define TRANS2_REPORT_DFS_INCOSISTENCY 0x11 80#define TRANS2_REPORT_DFS_INCOSISTENCY 0x11
81 81
82/* SMB Transact (Named Pipe) subcommand codes */
83#define TRANS_SET_NMPIPE_STATE 0x0001
84#define TRANS_RAW_READ_NMPIPE 0x0011
85#define TRANS_QUERY_NMPIPE_STATE 0x0021
86#define TRANS_QUERY_NMPIPE_INFO 0x0022
87#define TRANS_PEEK_NMPIPE 0x0023
88#define TRANS_TRANSACT_NMPIPE 0x0026
89#define TRANS_RAW_WRITE_NMPIPE 0x0031
90#define TRANS_READ_NMPIPE 0x0036
91#define TRANS_WRITE_NMPIPE 0x0037
92#define TRANS_WAIT_NMPIPE 0x0053
93#define TRANS_CALL_NMPIPE 0x0054
94
82/* NT Transact subcommand codes */ 95/* NT Transact subcommand codes */
83#define NT_TRANSACT_CREATE 0x01 96#define NT_TRANSACT_CREATE 0x01
84#define NT_TRANSACT_IOCTL 0x02 97#define NT_TRANSACT_IOCTL 0x02
@@ -328,12 +341,13 @@
328#define CREATE_COMPLETE_IF_OPLK 0x00000100 /* should be zero */ 341#define CREATE_COMPLETE_IF_OPLK 0x00000100 /* should be zero */
329#define CREATE_NO_EA_KNOWLEDGE 0x00000200 342#define CREATE_NO_EA_KNOWLEDGE 0x00000200
330#define CREATE_EIGHT_DOT_THREE 0x00000400 /* doc says this is obsolete 343#define CREATE_EIGHT_DOT_THREE 0x00000400 /* doc says this is obsolete
331 open for recovery flag - should 344 "open for recovery" flag - should
332 be zero */ 345 be zero in any case */
346#define CREATE_OPEN_FOR_RECOVERY 0x00000400
333#define CREATE_RANDOM_ACCESS 0x00000800 347#define CREATE_RANDOM_ACCESS 0x00000800
334#define CREATE_DELETE_ON_CLOSE 0x00001000 348#define CREATE_DELETE_ON_CLOSE 0x00001000
335#define CREATE_OPEN_BY_ID 0x00002000 349#define CREATE_OPEN_BY_ID 0x00002000
336#define CREATE_OPEN_BACKUP_INTN 0x00004000 350#define CREATE_OPEN_BACKUP_INTENT 0x00004000
337#define CREATE_NO_COMPRESSION 0x00008000 351#define CREATE_NO_COMPRESSION 0x00008000
338#define CREATE_RESERVE_OPFILTER 0x00100000 /* should be zero */ 352#define CREATE_RESERVE_OPFILTER 0x00100000 /* should be zero */
339#define OPEN_REPARSE_POINT 0x00200000 353#define OPEN_REPARSE_POINT 0x00200000
@@ -722,7 +736,6 @@ typedef struct smb_com_tconx_rsp_ext {
722#define SMB_CSC_CACHE_AUTO_REINT 0x0004 736#define SMB_CSC_CACHE_AUTO_REINT 0x0004
723#define SMB_CSC_CACHE_VDO 0x0008 737#define SMB_CSC_CACHE_VDO 0x0008
724#define SMB_CSC_NO_CACHING 0x000C 738#define SMB_CSC_NO_CACHING 0x000C
725
726#define SMB_UNIQUE_FILE_NAME 0x0010 739#define SMB_UNIQUE_FILE_NAME 0x0010
727#define SMB_EXTENDED_SIGNATURES 0x0020 740#define SMB_EXTENDED_SIGNATURES 0x0020
728 741
@@ -806,7 +819,7 @@ typedef struct smb_com_findclose_req {
806#define ICOUNT_MASK 0x00FF 819#define ICOUNT_MASK 0x00FF
807#define PIPE_READ_MODE 0x0100 820#define PIPE_READ_MODE 0x0100
808#define NAMED_PIPE_TYPE 0x0400 821#define NAMED_PIPE_TYPE 0x0400
809#define PIPE_END_POINT 0x0800 822#define PIPE_END_POINT 0x4000
810#define BLOCKING_NAMED_PIPE 0x8000 823#define BLOCKING_NAMED_PIPE 0x8000
811 824
812typedef struct smb_com_open_req { /* also handles create */ 825typedef struct smb_com_open_req { /* also handles create */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 9b8b4cfdf993..4511b708f0f3 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1728,7 +1728,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1728{ 1728{
1729 int rc = 0; 1729 int rc = 0;
1730 LOCK_REQ *pSMB = NULL; 1730 LOCK_REQ *pSMB = NULL;
1731 LOCK_RSP *pSMBr = NULL; 1731/* LOCK_RSP *pSMBr = NULL; */ /* No response data other than rc to parse */
1732 int bytes_returned; 1732 int bytes_returned;
1733 int timeout = 0; 1733 int timeout = 0;
1734 __u16 count; 1734 __u16 count;
@@ -1739,8 +1739,6 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1739 if (rc) 1739 if (rc)
1740 return rc; 1740 return rc;
1741 1741
1742 pSMBr = (LOCK_RSP *)pSMB; /* BB removeme BB */
1743
1744 if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) { 1742 if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) {
1745 timeout = CIFS_ASYNC_OP; /* no response expected */ 1743 timeout = CIFS_ASYNC_OP; /* no response expected */
1746 pSMB->Timeout = 0; 1744 pSMB->Timeout = 0;
@@ -1774,7 +1772,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1774 1772
1775 if (waitFlag) { 1773 if (waitFlag) {
1776 rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, 1774 rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB,
1777 (struct smb_hdr *) pSMBr, &bytes_returned); 1775 (struct smb_hdr *) pSMB, &bytes_returned);
1778 cifs_small_buf_release(pSMB); 1776 cifs_small_buf_release(pSMB);
1779 } else { 1777 } else {
1780 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *)pSMB, 1778 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *)pSMB,
@@ -3927,9 +3925,9 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
3927 } 3925 }
3928 3926
3929 ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals); 3927 ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals);
3930 if (ref->VersionNumber != 3) { 3928 if (ref->VersionNumber != cpu_to_le16(3)) {
3931 cERROR(1, ("Referrals of V%d version are not supported," 3929 cERROR(1, ("Referrals of V%d version are not supported,"
3932 "should be V3", ref->VersionNumber)); 3930 "should be V3", le16_to_cpu(ref->VersionNumber)));
3933 rc = -EINVAL; 3931 rc = -EINVAL;
3934 goto parse_DFS_referrals_exit; 3932 goto parse_DFS_referrals_exit;
3935 } 3933 }
@@ -3977,7 +3975,7 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
3977 if (rc) 3975 if (rc)
3978 goto parse_DFS_referrals_exit; 3976 goto parse_DFS_referrals_exit;
3979 3977
3980 ref += ref->Size; 3978 ref += le16_to_cpu(ref->Size);
3981 } 3979 }
3982 3980
3983parse_DFS_referrals_exit: 3981parse_DFS_referrals_exit:
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 023434f72c15..e8fa46c7cff2 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -653,6 +653,7 @@ multi_t2_fnd:
653 spin_lock(&GlobalMid_Lock); 653 spin_lock(&GlobalMid_Lock);
654 server->tcpStatus = CifsExiting; 654 server->tcpStatus = CifsExiting;
655 spin_unlock(&GlobalMid_Lock); 655 spin_unlock(&GlobalMid_Lock);
656 wake_up_all(&server->response_q);
656 657
657 /* don't exit until kthread_stop is called */ 658 /* don't exit until kthread_stop is called */
658 set_current_state(TASK_UNINTERRUPTIBLE); 659 set_current_state(TASK_UNINTERRUPTIBLE);
@@ -2120,6 +2121,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2120 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; 2121 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
2121 } 2122 }
2122 2123
2124 if ((volume_info.cifs_acl) && (volume_info.dynperm))
2125 cERROR(1, ("mount option dynperm ignored if cifsacl "
2126 "mount option supported"));
2127
2123 tcon = 2128 tcon =
2124 find_unc(sin_server.sin_addr.s_addr, volume_info.UNC, 2129 find_unc(sin_server.sin_addr.s_addr, volume_info.UNC,
2125 volume_info.username); 2130 volume_info.username);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index f0b5b5f3dd2e..fb69c1fa85c9 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -260,7 +260,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
260 buf, inode->i_sb, xid, 260 buf, inode->i_sb, xid,
261 &fileHandle); 261 &fileHandle);
262 if (newinode) { 262 if (newinode) {
263 newinode->i_mode = mode; 263 if (cifs_sb->mnt_cifs_flags &
264 CIFS_MOUNT_DYNPERM)
265 newinode->i_mode = mode;
264 if ((oplock & CIFS_CREATE_ACTION) && 266 if ((oplock & CIFS_CREATE_ACTION) &&
265 (cifs_sb->mnt_cifs_flags & 267 (cifs_sb->mnt_cifs_flags &
266 CIFS_MOUNT_SET_UID)) { 268 CIFS_MOUNT_SET_UID)) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8636cec2642c..0aac824371a5 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -546,7 +546,6 @@ int cifs_close(struct inode *inode, struct file *file)
546 msleep(timeout); 546 msleep(timeout);
547 timeout *= 8; 547 timeout *= 8;
548 } 548 }
549 kfree(pSMBFile->search_resume_name);
550 kfree(file->private_data); 549 kfree(file->private_data);
551 file->private_data = NULL; 550 file->private_data = NULL;
552 } else 551 } else
@@ -605,12 +604,6 @@ int cifs_closedir(struct inode *inode, struct file *file)
605 else 604 else
606 cifs_buf_release(ptmp); 605 cifs_buf_release(ptmp);
607 } 606 }
608 ptmp = pCFileStruct->search_resume_name;
609 if (ptmp) {
610 cFYI(1, ("closedir free resume name"));
611 pCFileStruct->search_resume_name = NULL;
612 kfree(ptmp);
613 }
614 kfree(file->private_data); 607 kfree(file->private_data);
615 file->private_data = NULL; 608 file->private_data = NULL;
616 } 609 }
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 129dbfe4dca7..722be543ceec 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -418,6 +418,7 @@ int cifs_get_inode_info(struct inode **pinode,
418 char *buf = NULL; 418 char *buf = NULL;
419 bool adjustTZ = false; 419 bool adjustTZ = false;
420 bool is_dfs_referral = false; 420 bool is_dfs_referral = false;
421 umode_t default_mode;
421 422
422 pTcon = cifs_sb->tcon; 423 pTcon = cifs_sb->tcon;
423 cFYI(1, ("Getting info on %s", full_path)); 424 cFYI(1, ("Getting info on %s", full_path));
@@ -530,47 +531,42 @@ int cifs_get_inode_info(struct inode **pinode,
530 inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj; 531 inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj;
531 } 532 }
532 533
533 /* set default mode. will override for dirs below */ 534 /* get default inode mode */
534 if (atomic_read(&cifsInfo->inUse) == 0) 535 if (attr & ATTR_DIRECTORY)
535 /* new inode, can safely set these fields */ 536 default_mode = cifs_sb->mnt_dir_mode;
536 inode->i_mode = cifs_sb->mnt_file_mode; 537 else
537 else /* since we set the inode type below we need to mask off 538 default_mode = cifs_sb->mnt_file_mode;
538 to avoid strange results if type changes and both 539
539 get orred in */ 540 /* set permission bits */
540 inode->i_mode &= ~S_IFMT; 541 if (atomic_read(&cifsInfo->inUse) == 0 ||
541/* if (attr & ATTR_REPARSE) */ 542 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
542 /* We no longer handle these as symlinks because we could not 543 inode->i_mode = default_mode;
543 follow them due to the absolute path with drive letter */ 544 else {
544 if (attr & ATTR_DIRECTORY) { 545 /* just reenable write bits if !ATTR_READONLY */
545 /* override default perms since we do not do byte range locking 546 if ((inode->i_mode & S_IWUGO) == 0 &&
546 on dirs */ 547 (attr & ATTR_READONLY) == 0)
547 inode->i_mode = cifs_sb->mnt_dir_mode; 548 inode->i_mode |= (S_IWUGO & default_mode);
548 inode->i_mode |= S_IFDIR; 549 inode->i_mode &= ~S_IFMT;
549 } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && 550 }
550 (cifsInfo->cifsAttrs & ATTR_SYSTEM) && 551 /* clear write bits if ATTR_READONLY is set */
551 /* No need to le64 convert size of zero */ 552 if (attr & ATTR_READONLY)
552 (pfindData->EndOfFile == 0)) { 553 inode->i_mode &= ~S_IWUGO;
553 inode->i_mode = cifs_sb->mnt_file_mode; 554
554 inode->i_mode |= S_IFIFO; 555 /* set inode type */
555/* BB Finish for SFU style symlinks and devices */ 556 if ((attr & ATTR_SYSTEM) &&
556 } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && 557 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) {
557 (cifsInfo->cifsAttrs & ATTR_SYSTEM)) { 558 /* no need to fix endianness on 0 */
558 if (decode_sfu_inode(inode, le64_to_cpu(pfindData->EndOfFile), 559 if (pfindData->EndOfFile == 0)
559 full_path, cifs_sb, xid)) 560 inode->i_mode |= S_IFIFO;
560 cFYI(1, ("Unrecognized sfu inode type")); 561 else if (decode_sfu_inode(inode,
561 562 le64_to_cpu(pfindData->EndOfFile),
562 cFYI(1, ("sfu mode 0%o", inode->i_mode)); 563 full_path, cifs_sb, xid))
564 cFYI(1, ("unknown SFU file type\n"));
563 } else { 565 } else {
564 inode->i_mode |= S_IFREG; 566 if (attr & ATTR_DIRECTORY)
565 /* treat dos attribute of read-only as read-only mode eg 555 */ 567 inode->i_mode |= S_IFDIR;
566 if (cifsInfo->cifsAttrs & ATTR_READONLY) 568 else
567 inode->i_mode &= ~(S_IWUGO); 569 inode->i_mode |= S_IFREG;
568 else if ((inode->i_mode & S_IWUGO) == 0)
569 /* the ATTR_READONLY flag may have been */
570 /* changed on server -- set any w bits */
571 /* allowed by mnt_file_mode */
572 inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode);
573 /* BB add code to validate if device or weird share or device type? */
574 } 570 }
575 571
576 spin_lock(&inode->i_lock); 572 spin_lock(&inode->i_lock);
@@ -1019,8 +1015,11 @@ mkdir_get_info:
1019 CIFS_MOUNT_MAP_SPECIAL_CHR); 1015 CIFS_MOUNT_MAP_SPECIAL_CHR);
1020 } 1016 }
1021 if (direntry->d_inode) { 1017 if (direntry->d_inode) {
1022 direntry->d_inode->i_mode = mode; 1018 if (cifs_sb->mnt_cifs_flags &
1023 direntry->d_inode->i_mode |= S_IFDIR; 1019 CIFS_MOUNT_DYNPERM)
1020 direntry->d_inode->i_mode =
1021 (mode | S_IFDIR);
1022
1024 if (cifs_sb->mnt_cifs_flags & 1023 if (cifs_sb->mnt_cifs_flags &
1025 CIFS_MOUNT_SET_UID) { 1024 CIFS_MOUNT_SET_UID) {
1026 direntry->d_inode->i_uid = 1025 direntry->d_inode->i_uid =
@@ -1547,13 +1546,26 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1547 } else 1546 } else
1548 goto cifs_setattr_exit; 1547 goto cifs_setattr_exit;
1549 } 1548 }
1550 if (attrs->ia_valid & ATTR_UID) { 1549
1551 cFYI(1, ("UID changed to %d", attrs->ia_uid)); 1550 /*
1552 uid = attrs->ia_uid; 1551 * Without unix extensions we can't send ownership changes to the
1553 } 1552 * server, so silently ignore them. This is consistent with how
1554 if (attrs->ia_valid & ATTR_GID) { 1553 * local DOS/Windows filesystems behave (VFAT, NTFS, etc). With
1555 cFYI(1, ("GID changed to %d", attrs->ia_gid)); 1554 * CIFSACL support + proper Windows to Unix idmapping, we may be
1556 gid = attrs->ia_gid; 1555 * able to support this in the future.
1556 */
1557 if (!pTcon->unix_ext &&
1558 !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) {
1559 attrs->ia_valid &= ~(ATTR_UID | ATTR_GID);
1560 } else {
1561 if (attrs->ia_valid & ATTR_UID) {
1562 cFYI(1, ("UID changed to %d", attrs->ia_uid));
1563 uid = attrs->ia_uid;
1564 }
1565 if (attrs->ia_valid & ATTR_GID) {
1566 cFYI(1, ("GID changed to %d", attrs->ia_gid));
1567 gid = attrs->ia_gid;
1568 }
1557 } 1569 }
1558 1570
1559 time_buf.Attributes = 0; 1571 time_buf.Attributes = 0;
@@ -1563,7 +1575,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1563 attrs->ia_valid &= ~ATTR_MODE; 1575 attrs->ia_valid &= ~ATTR_MODE;
1564 1576
1565 if (attrs->ia_valid & ATTR_MODE) { 1577 if (attrs->ia_valid & ATTR_MODE) {
1566 cFYI(1, ("Mode changed to 0x%x", attrs->ia_mode)); 1578 cFYI(1, ("Mode changed to 0%o", attrs->ia_mode));
1567 mode = attrs->ia_mode; 1579 mode = attrs->ia_mode;
1568 } 1580 }
1569 1581
@@ -1578,18 +1590,18 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1578#ifdef CONFIG_CIFS_EXPERIMENTAL 1590#ifdef CONFIG_CIFS_EXPERIMENTAL
1579 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) 1591 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
1580 rc = mode_to_acl(inode, full_path, mode); 1592 rc = mode_to_acl(inode, full_path, mode);
1581 else if ((mode & S_IWUGO) == 0) { 1593 else
1582#else
1583 if ((mode & S_IWUGO) == 0) {
1584#endif 1594#endif
1585 /* not writeable */ 1595 if (((mode & S_IWUGO) == 0) &&
1586 if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) { 1596 (cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
1587 set_dosattr = true; 1597 set_dosattr = true;
1588 time_buf.Attributes = 1598 time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs |
1589 cpu_to_le32(cifsInode->cifsAttrs | 1599 ATTR_READONLY);
1590 ATTR_READONLY); 1600 /* fix up mode if we're not using dynperm */
1591 } 1601 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
1592 } else if (cifsInode->cifsAttrs & ATTR_READONLY) { 1602 attrs->ia_mode = inode->i_mode & ~S_IWUGO;
1603 } else if ((mode & S_IWUGO) &&
1604 (cifsInode->cifsAttrs & ATTR_READONLY)) {
1593 /* If file is readonly on server, we would 1605 /* If file is readonly on server, we would
1594 not be able to write to it - so if any write 1606 not be able to write to it - so if any write
1595 bit is enabled for user or group or other we 1607 bit is enabled for user or group or other we
@@ -1600,6 +1612,20 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1600 /* Windows ignores set to zero */ 1612 /* Windows ignores set to zero */
1601 if (time_buf.Attributes == 0) 1613 if (time_buf.Attributes == 0)
1602 time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL); 1614 time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL);
1615
1616 /* reset local inode permissions to normal */
1617 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) {
1618 attrs->ia_mode &= ~(S_IALLUGO);
1619 if (S_ISDIR(inode->i_mode))
1620 attrs->ia_mode |=
1621 cifs_sb->mnt_dir_mode;
1622 else
1623 attrs->ia_mode |=
1624 cifs_sb->mnt_file_mode;
1625 }
1626 } else if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) {
1627 /* ignore mode change - ATTR_READONLY hasn't changed */
1628 attrs->ia_valid &= ~ATTR_MODE;
1603 } 1629 }
1604 } 1630 }
1605 1631
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1d69b8014e0b..4b17f8fe3157 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -519,8 +519,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
519 pnotify = (struct file_notify_information *) 519 pnotify = (struct file_notify_information *)
520 ((char *)&pSMBr->hdr.Protocol + data_offset); 520 ((char *)&pSMBr->hdr.Protocol + data_offset);
521 cFYI(1, ("dnotify on %s Action: 0x%x", 521 cFYI(1, ("dnotify on %s Action: 0x%x",
522 pnotify->FileName, 522 pnotify->FileName, pnotify->Action));
523 pnotify->Action)); /* BB removeme BB */
524 /* cifs_dump_mem("Rcvd notify Data: ",buf, 523 /* cifs_dump_mem("Rcvd notify Data: ",buf,
525 sizeof(struct smb_hdr)+60); */ 524 sizeof(struct smb_hdr)+60); */
526 return true; 525 return true;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 713c25110197..83f306954883 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -132,6 +132,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
132 __u32 attr; 132 __u32 attr;
133 __u64 allocation_size; 133 __u64 allocation_size;
134 __u64 end_of_file; 134 __u64 end_of_file;
135 umode_t default_mode;
135 136
136 /* save mtime and size */ 137 /* save mtime and size */
137 local_mtime = tmp_inode->i_mtime; 138 local_mtime = tmp_inode->i_mtime;
@@ -187,48 +188,54 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
187 if (atomic_read(&cifsInfo->inUse) == 0) { 188 if (atomic_read(&cifsInfo->inUse) == 0) {
188 tmp_inode->i_uid = cifs_sb->mnt_uid; 189 tmp_inode->i_uid = cifs_sb->mnt_uid;
189 tmp_inode->i_gid = cifs_sb->mnt_gid; 190 tmp_inode->i_gid = cifs_sb->mnt_gid;
190 /* set default mode. will override for dirs below */ 191 }
191 tmp_inode->i_mode = cifs_sb->mnt_file_mode; 192
192 } else { 193 if (attr & ATTR_DIRECTORY)
193 /* mask off the type bits since it gets set 194 default_mode = cifs_sb->mnt_dir_mode;
194 below and we do not want to get two type 195 else
195 bits set */ 196 default_mode = cifs_sb->mnt_file_mode;
197
198 /* set initial permissions */
199 if ((atomic_read(&cifsInfo->inUse) == 0) ||
200 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
201 tmp_inode->i_mode = default_mode;
202 else {
203 /* just reenable write bits if !ATTR_READONLY */
204 if ((tmp_inode->i_mode & S_IWUGO) == 0 &&
205 (attr & ATTR_READONLY) == 0)
206 tmp_inode->i_mode |= (S_IWUGO & default_mode);
207
196 tmp_inode->i_mode &= ~S_IFMT; 208 tmp_inode->i_mode &= ~S_IFMT;
197 } 209 }
198 210
199 if (attr & ATTR_DIRECTORY) { 211 /* clear write bits if ATTR_READONLY is set */
200 *pobject_type = DT_DIR; 212 if (attr & ATTR_READONLY)
201 /* override default perms since we do not lock dirs */ 213 tmp_inode->i_mode &= ~S_IWUGO;
202 if (atomic_read(&cifsInfo->inUse) == 0) 214
203 tmp_inode->i_mode = cifs_sb->mnt_dir_mode; 215 /* set inode type */
204 tmp_inode->i_mode |= S_IFDIR; 216 if ((attr & ATTR_SYSTEM) &&
205 } else if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) && 217 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) {
206 (attr & ATTR_SYSTEM)) {
207 if (end_of_file == 0) { 218 if (end_of_file == 0) {
208 *pobject_type = DT_FIFO;
209 tmp_inode->i_mode |= S_IFIFO; 219 tmp_inode->i_mode |= S_IFIFO;
220 *pobject_type = DT_FIFO;
210 } else { 221 } else {
211 /* rather than get the type here, we mark the 222 /*
212 inode as needing revalidate and get the real type 223 * trying to get the type can be slow, so just call
213 (blk vs chr vs. symlink) later ie in lookup */ 224 * this a regular file for now, and mark for reval
214 *pobject_type = DT_REG; 225 */
215 tmp_inode->i_mode |= S_IFREG; 226 tmp_inode->i_mode |= S_IFREG;
227 *pobject_type = DT_REG;
216 cifsInfo->time = 0; 228 cifsInfo->time = 0;
217 } 229 }
218/* we no longer mark these because we could not follow them */
219/* } else if (attr & ATTR_REPARSE) {
220 *pobject_type = DT_LNK;
221 tmp_inode->i_mode |= S_IFLNK; */
222 } else { 230 } else {
223 *pobject_type = DT_REG; 231 if (attr & ATTR_DIRECTORY) {
224 tmp_inode->i_mode |= S_IFREG; 232 tmp_inode->i_mode |= S_IFDIR;
225 if (attr & ATTR_READONLY) 233 *pobject_type = DT_DIR;
226 tmp_inode->i_mode &= ~(S_IWUGO); 234 } else {
227 else if ((tmp_inode->i_mode & S_IWUGO) == 0) 235 tmp_inode->i_mode |= S_IFREG;
228 /* the ATTR_READONLY flag may have been changed on */ 236 *pobject_type = DT_REG;
229 /* server -- set any w bits allowed by mnt_file_mode */ 237 }
230 tmp_inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode); 238 }
231 } /* could add code here - to validate if device or weird share type? */
232 239
233 /* can not fill in nlink here as in qpathinfo version and Unx search */ 240 /* can not fill in nlink here as in qpathinfo version and Unx search */
234 if (atomic_read(&cifsInfo->inUse) == 0) 241 if (atomic_read(&cifsInfo->inUse) == 0)
@@ -675,8 +682,6 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
675 cifsFile->invalidHandle = true; 682 cifsFile->invalidHandle = true;
676 CIFSFindClose(xid, pTcon, cifsFile->netfid); 683 CIFSFindClose(xid, pTcon, cifsFile->netfid);
677 } 684 }
678 kfree(cifsFile->search_resume_name);
679 cifsFile->search_resume_name = NULL;
680 if (cifsFile->srch_inf.ntwrk_buf_start) { 685 if (cifsFile->srch_inf.ntwrk_buf_start) {
681 cFYI(1, ("freeing SMB ff cache buf on search rewind")); 686 cFYI(1, ("freeing SMB ff cache buf on search rewind"));
682 if (cifsFile->srch_inf.smallBuf) 687 if (cifsFile->srch_inf.smallBuf)
@@ -1043,9 +1048,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
1043 } /* else { 1048 } /* else {
1044 cifsFile->invalidHandle = true; 1049 cifsFile->invalidHandle = true;
1045 CIFSFindClose(xid, pTcon, cifsFile->netfid); 1050 CIFSFindClose(xid, pTcon, cifsFile->netfid);
1046 } 1051 } */
1047 kfree(cifsFile->search_resume_name);
1048 cifsFile->search_resume_name = NULL; */
1049 1052
1050 rc = find_cifs_entry(xid, pTcon, file, 1053 rc = find_cifs_entry(xid, pTcon, file,
1051 &current_entry, &num_to_fill); 1054 &current_entry, &num_to_fill);
diff --git a/fs/dcache.c b/fs/dcache.c
index 3ee588d5f585..6068c25b393c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -17,6 +17,7 @@
17#include <linux/syscalls.h> 17#include <linux/syscalls.h>
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/mm.h> 19#include <linux/mm.h>
20#include <linux/fdtable.h>
20#include <linux/fs.h> 21#include <linux/fs.h>
21#include <linux/fsnotify.h> 22#include <linux/fsnotify.h>
22#include <linux/slab.h> 23#include <linux/slab.h>
@@ -106,9 +107,10 @@ static void dentry_lru_remove(struct dentry *dentry)
106/* 107/*
107 * Release the dentry's inode, using the filesystem 108 * Release the dentry's inode, using the filesystem
108 * d_iput() operation if defined. 109 * d_iput() operation if defined.
109 * Called with dcache_lock and per dentry lock held, drops both.
110 */ 110 */
111static void dentry_iput(struct dentry * dentry) 111static void dentry_iput(struct dentry * dentry)
112 __releases(dentry->d_lock)
113 __releases(dcache_lock)
112{ 114{
113 struct inode *inode = dentry->d_inode; 115 struct inode *inode = dentry->d_inode;
114 if (inode) { 116 if (inode) {
@@ -132,12 +134,13 @@ static void dentry_iput(struct dentry * dentry)
132 * d_kill - kill dentry and return parent 134 * d_kill - kill dentry and return parent
133 * @dentry: dentry to kill 135 * @dentry: dentry to kill
134 * 136 *
135 * Called with dcache_lock and d_lock, releases both. The dentry must 137 * The dentry must already be unhashed and removed from the LRU.
136 * already be unhashed and removed from the LRU.
137 * 138 *
138 * If this is the root of the dentry tree, return NULL. 139 * If this is the root of the dentry tree, return NULL.
139 */ 140 */
140static struct dentry *d_kill(struct dentry *dentry) 141static struct dentry *d_kill(struct dentry *dentry)
142 __releases(dentry->d_lock)
143 __releases(dcache_lock)
141{ 144{
142 struct dentry *parent; 145 struct dentry *parent;
143 146
@@ -383,11 +386,11 @@ restart:
383 * Try to prune ancestors as well. This is necessary to prevent 386 * Try to prune ancestors as well. This is necessary to prevent
384 * quadratic behavior of shrink_dcache_parent(), but is also expected 387 * quadratic behavior of shrink_dcache_parent(), but is also expected
385 * to be beneficial in reducing dentry cache fragmentation. 388 * to be beneficial in reducing dentry cache fragmentation.
386 *
387 * Called with dcache_lock, drops it and then regains.
388 * Called with dentry->d_lock held, drops it.
389 */ 389 */
390static void prune_one_dentry(struct dentry * dentry) 390static void prune_one_dentry(struct dentry * dentry)
391 __releases(dentry->d_lock)
392 __releases(dcache_lock)
393 __acquires(dcache_lock)
391{ 394{
392 __d_drop(dentry); 395 __d_drop(dentry);
393 dentry = d_kill(dentry); 396 dentry = d_kill(dentry);
@@ -1604,10 +1607,9 @@ static int d_isparent(struct dentry *p1, struct dentry *p2)
1604 * 1607 *
1605 * Note: If ever the locking in lock_rename() changes, then please 1608 * Note: If ever the locking in lock_rename() changes, then please
1606 * remember to update this too... 1609 * remember to update this too...
1607 *
1608 * On return, dcache_lock will have been unlocked.
1609 */ 1610 */
1610static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) 1611static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
1612 __releases(dcache_lock)
1611{ 1613{
1612 struct mutex *m1 = NULL, *m2 = NULL; 1614 struct mutex *m1 = NULL, *m2 = NULL;
1613 struct dentry *ret; 1615 struct dentry *ret;
@@ -1743,11 +1745,9 @@ out_nolock:
1743shouldnt_be_hashed: 1745shouldnt_be_hashed:
1744 spin_unlock(&dcache_lock); 1746 spin_unlock(&dcache_lock);
1745 BUG(); 1747 BUG();
1746 goto shouldnt_be_hashed;
1747} 1748}
1748 1749
1749static int prepend(char **buffer, int *buflen, const char *str, 1750static int prepend(char **buffer, int *buflen, const char *str, int namelen)
1750 int namelen)
1751{ 1751{
1752 *buflen -= namelen; 1752 *buflen -= namelen;
1753 if (*buflen < 0) 1753 if (*buflen < 0)
@@ -1757,8 +1757,13 @@ static int prepend(char **buffer, int *buflen, const char *str,
1757 return 0; 1757 return 0;
1758} 1758}
1759 1759
1760static int prepend_name(char **buffer, int *buflen, struct qstr *name)
1761{
1762 return prepend(buffer, buflen, name->name, name->len);
1763}
1764
1760/** 1765/**
1761 * d_path - return the path of a dentry 1766 * __d_path - return the path of a dentry
1762 * @path: the dentry/vfsmount to report 1767 * @path: the dentry/vfsmount to report
1763 * @root: root vfsmnt/dentry (may be modified by this function) 1768 * @root: root vfsmnt/dentry (may be modified by this function)
1764 * @buffer: buffer to return value in 1769 * @buffer: buffer to return value in
@@ -1779,9 +1784,10 @@ char *__d_path(const struct path *path, struct path *root,
1779{ 1784{
1780 struct dentry *dentry = path->dentry; 1785 struct dentry *dentry = path->dentry;
1781 struct vfsmount *vfsmnt = path->mnt; 1786 struct vfsmount *vfsmnt = path->mnt;
1782 char * end = buffer+buflen; 1787 char *end = buffer + buflen;
1783 char * retval; 1788 char *retval;
1784 1789
1790 spin_lock(&vfsmount_lock);
1785 prepend(&end, &buflen, "\0", 1); 1791 prepend(&end, &buflen, "\0", 1);
1786 if (!IS_ROOT(dentry) && d_unhashed(dentry) && 1792 if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
1787 (prepend(&end, &buflen, " (deleted)", 10) != 0)) 1793 (prepend(&end, &buflen, " (deleted)", 10) != 0))
@@ -1800,38 +1806,37 @@ char *__d_path(const struct path *path, struct path *root,
1800 break; 1806 break;
1801 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 1807 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
1802 /* Global root? */ 1808 /* Global root? */
1803 spin_lock(&vfsmount_lock);
1804 if (vfsmnt->mnt_parent == vfsmnt) { 1809 if (vfsmnt->mnt_parent == vfsmnt) {
1805 spin_unlock(&vfsmount_lock);
1806 goto global_root; 1810 goto global_root;
1807 } 1811 }
1808 dentry = vfsmnt->mnt_mountpoint; 1812 dentry = vfsmnt->mnt_mountpoint;
1809 vfsmnt = vfsmnt->mnt_parent; 1813 vfsmnt = vfsmnt->mnt_parent;
1810 spin_unlock(&vfsmount_lock);
1811 continue; 1814 continue;
1812 } 1815 }
1813 parent = dentry->d_parent; 1816 parent = dentry->d_parent;
1814 prefetch(parent); 1817 prefetch(parent);
1815 if ((prepend(&end, &buflen, dentry->d_name.name, 1818 if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) ||
1816 dentry->d_name.len) != 0) ||
1817 (prepend(&end, &buflen, "/", 1) != 0)) 1819 (prepend(&end, &buflen, "/", 1) != 0))
1818 goto Elong; 1820 goto Elong;
1819 retval = end; 1821 retval = end;
1820 dentry = parent; 1822 dentry = parent;
1821 } 1823 }
1822 1824
1825out:
1826 spin_unlock(&vfsmount_lock);
1823 return retval; 1827 return retval;
1824 1828
1825global_root: 1829global_root:
1826 retval += 1; /* hit the slash */ 1830 retval += 1; /* hit the slash */
1827 if (prepend(&retval, &buflen, dentry->d_name.name, 1831 if (prepend_name(&retval, &buflen, &dentry->d_name) != 0)
1828 dentry->d_name.len) != 0)
1829 goto Elong; 1832 goto Elong;
1830 root->mnt = vfsmnt; 1833 root->mnt = vfsmnt;
1831 root->dentry = dentry; 1834 root->dentry = dentry;
1832 return retval; 1835 goto out;
1836
1833Elong: 1837Elong:
1834 return ERR_PTR(-ENAMETOOLONG); 1838 retval = ERR_PTR(-ENAMETOOLONG);
1839 goto out;
1835} 1840}
1836 1841
1837/** 1842/**
@@ -1845,9 +1850,9 @@ Elong:
1845 * 1850 *
1846 * Returns the buffer or an error code if the path was too long. 1851 * Returns the buffer or an error code if the path was too long.
1847 * 1852 *
1848 * "buflen" should be positive. Caller holds the dcache_lock. 1853 * "buflen" should be positive.
1849 */ 1854 */
1850char *d_path(struct path *path, char *buf, int buflen) 1855char *d_path(const struct path *path, char *buf, int buflen)
1851{ 1856{
1852 char *res; 1857 char *res;
1853 struct path root; 1858 struct path root;
@@ -1915,16 +1920,11 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
1915 retval = end-1; 1920 retval = end-1;
1916 *retval = '/'; 1921 *retval = '/';
1917 1922
1918 for (;;) { 1923 while (!IS_ROOT(dentry)) {
1919 struct dentry *parent; 1924 struct dentry *parent = dentry->d_parent;
1920 if (IS_ROOT(dentry))
1921 break;
1922 1925
1923 parent = dentry->d_parent;
1924 prefetch(parent); 1926 prefetch(parent);
1925 1927 if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) ||
1926 if ((prepend(&end, &buflen, dentry->d_name.name,
1927 dentry->d_name.len) != 0) ||
1928 (prepend(&end, &buflen, "/", 1) != 0)) 1928 (prepend(&end, &buflen, "/", 1) != 0))
1929 goto Elong; 1929 goto Elong;
1930 1930
@@ -1975,7 +1975,7 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
1975 error = -ENOENT; 1975 error = -ENOENT;
1976 /* Has the current directory has been unlinked? */ 1976 /* Has the current directory has been unlinked? */
1977 spin_lock(&dcache_lock); 1977 spin_lock(&dcache_lock);
1978 if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) { 1978 if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) {
1979 unsigned long len; 1979 unsigned long len;
1980 struct path tmp = root; 1980 struct path tmp = root;
1981 char * cwd; 1981 char * cwd;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index cd62d75b2cc0..e2832bc7869a 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1906,9 +1906,9 @@ int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm,
1906 goto out; 1906 goto out;
1907 } 1907 }
1908 } 1908 }
1909 mutex_unlock(&key_tfm_list_mutex);
1910 (*tfm) = key_tfm->key_tfm; 1909 (*tfm) = key_tfm->key_tfm;
1911 (*tfm_mutex) = &key_tfm->key_tfm_mutex; 1910 (*tfm_mutex) = &key_tfm->key_tfm_mutex;
1912out: 1911out:
1912 mutex_unlock(&key_tfm_list_mutex);
1913 return rc; 1913 return rc;
1914} 1914}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 951ee33a022d..c15c25745e05 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -660,8 +660,6 @@ int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm,
660int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key, 660int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
661 struct ecryptfs_auth_tok **auth_tok, 661 struct ecryptfs_auth_tok **auth_tok,
662 char *sig); 662 char *sig);
663int ecryptfs_write_zeros(struct file *file, pgoff_t index, int start,
664 int num_zeros);
665int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, 663int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
666 loff_t offset, size_t size); 664 loff_t offset, size_t size);
667int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, 665int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 50c994a249a5..09a4522f65e6 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -575,13 +575,11 @@ int ecryptfs_init_ecryptfs_miscdev(void)
575 int rc; 575 int rc;
576 576
577 atomic_set(&ecryptfs_num_miscdev_opens, 0); 577 atomic_set(&ecryptfs_num_miscdev_opens, 0);
578 mutex_lock(&ecryptfs_daemon_hash_mux);
579 rc = misc_register(&ecryptfs_miscdev); 578 rc = misc_register(&ecryptfs_miscdev);
580 if (rc) 579 if (rc)
581 printk(KERN_ERR "%s: Failed to register miscellaneous device " 580 printk(KERN_ERR "%s: Failed to register miscellaneous device "
582 "for communications with userspace daemons; rc = [%d]\n", 581 "for communications with userspace daemons; rc = [%d]\n",
583 __func__, rc); 582 __func__, rc);
584 mutex_unlock(&ecryptfs_daemon_hash_mux);
585 return rc; 583 return rc;
586} 584}
587 585
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index ebf55150be56..75c2ea9fee35 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -157,20 +157,6 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
157 ecryptfs_page_idx, rc); 157 ecryptfs_page_idx, rc);
158 goto out; 158 goto out;
159 } 159 }
160 if (start_offset_in_page) {
161 /* Read in the page from the lower
162 * into the eCryptfs inode page cache,
163 * decrypting */
164 rc = ecryptfs_decrypt_page(ecryptfs_page);
165 if (rc) {
166 printk(KERN_ERR "%s: Error decrypting "
167 "page; rc = [%d]\n",
168 __func__, rc);
169 ClearPageUptodate(ecryptfs_page);
170 page_cache_release(ecryptfs_page);
171 goto out;
172 }
173 }
174 ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); 160 ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0);
175 161
176 /* 162 /*
@@ -349,14 +335,6 @@ int ecryptfs_read(char *data, loff_t offset, size_t size,
349 ecryptfs_page_idx, rc); 335 ecryptfs_page_idx, rc);
350 goto out; 336 goto out;
351 } 337 }
352 rc = ecryptfs_decrypt_page(ecryptfs_page);
353 if (rc) {
354 printk(KERN_ERR "%s: Error decrypting "
355 "page; rc = [%d]\n", __func__, rc);
356 ClearPageUptodate(ecryptfs_page);
357 page_cache_release(ecryptfs_page);
358 goto out;
359 }
360 ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); 338 ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0);
361 memcpy((data + data_offset), 339 memcpy((data + data_offset),
362 ((char *)ecryptfs_page_virt + start_offset_in_page), 340 ((char *)ecryptfs_page_virt + start_offset_in_page),
diff --git a/fs/exec.c b/fs/exec.c
index 3c2ba7ce11d4..da94a6f05df3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -26,7 +26,6 @@
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h> 27#include <linux/fdtable.h>
28#include <linux/mman.h> 28#include <linux/mman.h>
29#include <linux/a.out.h>
30#include <linux/stat.h> 29#include <linux/stat.h>
31#include <linux/fcntl.h> 30#include <linux/fcntl.h>
32#include <linux/smp_lock.h> 31#include <linux/smp_lock.h>
@@ -61,6 +60,11 @@
61#include <linux/kmod.h> 60#include <linux/kmod.h>
62#endif 61#endif
63 62
63#ifdef __alpha__
64/* for /sbin/loader handling in search_binary_handler() */
65#include <linux/a.out.h>
66#endif
67
64int core_uses_pid; 68int core_uses_pid;
65char core_pattern[CORENAME_MAX_SIZE] = "core"; 69char core_pattern[CORENAME_MAX_SIZE] = "core";
66int suid_dumpable = 0; 70int suid_dumpable = 0;
@@ -860,6 +864,7 @@ static int de_thread(struct task_struct *tsk)
860 864
861no_thread_group: 865no_thread_group:
862 exit_itimers(sig); 866 exit_itimers(sig);
867 flush_itimer_signals();
863 if (leader) 868 if (leader)
864 release_task(leader); 869 release_task(leader);
865 870
@@ -1154,7 +1159,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1154{ 1159{
1155 int try,retval; 1160 int try,retval;
1156 struct linux_binfmt *fmt; 1161 struct linux_binfmt *fmt;
1157#if defined(__alpha__) && defined(CONFIG_ARCH_SUPPORTS_AOUT) 1162#ifdef __alpha__
1158 /* handle /sbin/loader.. */ 1163 /* handle /sbin/loader.. */
1159 { 1164 {
1160 struct exec * eh = (struct exec *) bprm->buf; 1165 struct exec * eh = (struct exec *) bprm->buf;
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 28cfd0b40527..77278e947e94 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -580,7 +580,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
580 } 580 }
581 581
582 blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count; 582 blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
583 data = (__le32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count; 583 data = (__le32 *)dind->b_data + (EXT3_SB(sb)->s_gdb_count %
584 EXT3_ADDR_PER_BLOCK(sb));
584 end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb); 585 end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
585 586
586 /* Get each reserved primary GDT block and verify it holds backups */ 587 /* Get each reserved primary GDT block and verify it holds backups */
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index fe3119a71ada..2845425077e8 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2875,8 +2875,10 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
2875 blk++; 2875 blk++;
2876 } 2876 }
2877out: 2877out:
2878 if (len == towrite) 2878 if (len == towrite) {
2879 mutex_unlock(&inode->i_mutex);
2879 return err; 2880 return err;
2881 }
2880 if (inode->i_size < off+len-towrite) { 2882 if (inode->i_size < off+len-towrite) {
2881 i_size_write(inode, off+len-towrite); 2883 i_size_write(inode, off+len-towrite);
2882 EXT3_I(inode)->i_disksize = inode->i_size; 2884 EXT3_I(inode)->i_disksize = inode->i_size;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 30494c5da843..9cc80b9cc8d8 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -43,6 +43,46 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
43 43
44} 44}
45 45
46static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
47 ext4_group_t block_group)
48{
49 ext4_group_t actual_group;
50 ext4_get_group_no_and_offset(sb, block, &actual_group, 0);
51 if (actual_group == block_group)
52 return 1;
53 return 0;
54}
55
56static int ext4_group_used_meta_blocks(struct super_block *sb,
57 ext4_group_t block_group)
58{
59 ext4_fsblk_t tmp;
60 struct ext4_sb_info *sbi = EXT4_SB(sb);
61 /* block bitmap, inode bitmap, and inode table blocks */
62 int used_blocks = sbi->s_itb_per_group + 2;
63
64 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
65 struct ext4_group_desc *gdp;
66 struct buffer_head *bh;
67
68 gdp = ext4_get_group_desc(sb, block_group, &bh);
69 if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp),
70 block_group))
71 used_blocks--;
72
73 if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp),
74 block_group))
75 used_blocks--;
76
77 tmp = ext4_inode_table(sb, gdp);
78 for (; tmp < ext4_inode_table(sb, gdp) +
79 sbi->s_itb_per_group; tmp++) {
80 if (!ext4_block_in_group(sb, tmp, block_group))
81 used_blocks -= 1;
82 }
83 }
84 return used_blocks;
85}
46/* Initializes an uninitialized block bitmap if given, and returns the 86/* Initializes an uninitialized block bitmap if given, and returns the
47 * number of blocks free in the group. */ 87 * number of blocks free in the group. */
48unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, 88unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
@@ -105,20 +145,34 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
105 free_blocks = group_blocks - bit_max; 145 free_blocks = group_blocks - bit_max;
106 146
107 if (bh) { 147 if (bh) {
108 ext4_fsblk_t start; 148 ext4_fsblk_t start, tmp;
149 int flex_bg = 0;
109 150
110 for (bit = 0; bit < bit_max; bit++) 151 for (bit = 0; bit < bit_max; bit++)
111 ext4_set_bit(bit, bh->b_data); 152 ext4_set_bit(bit, bh->b_data);
112 153
113 start = ext4_group_first_block_no(sb, block_group); 154 start = ext4_group_first_block_no(sb, block_group);
114 155
115 /* Set bits for block and inode bitmaps, and inode table */ 156 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
116 ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data); 157 EXT4_FEATURE_INCOMPAT_FLEX_BG))
117 ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data); 158 flex_bg = 1;
118 for (bit = (ext4_inode_table(sb, gdp) - start),
119 bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
120 ext4_set_bit(bit, bh->b_data);
121 159
160 /* Set bits for block and inode bitmaps, and inode table */
161 tmp = ext4_block_bitmap(sb, gdp);
162 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
163 ext4_set_bit(tmp - start, bh->b_data);
164
165 tmp = ext4_inode_bitmap(sb, gdp);
166 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
167 ext4_set_bit(tmp - start, bh->b_data);
168
169 tmp = ext4_inode_table(sb, gdp);
170 for (; tmp < ext4_inode_table(sb, gdp) +
171 sbi->s_itb_per_group; tmp++) {
172 if (!flex_bg ||
173 ext4_block_in_group(sb, tmp, block_group))
174 ext4_set_bit(tmp - start, bh->b_data);
175 }
122 /* 176 /*
123 * Also if the number of blocks within the group is 177 * Also if the number of blocks within the group is
124 * less than the blocksize * 8 ( which is the size 178 * less than the blocksize * 8 ( which is the size
@@ -126,8 +180,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
126 */ 180 */
127 mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); 181 mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
128 } 182 }
129 183 return free_blocks - ext4_group_used_meta_blocks(sb, block_group);
130 return free_blocks - sbi->s_itb_per_group - 2;
131} 184}
132 185
133 186
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 873ad9b3418c..c9900aade150 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2745,8 +2745,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2745 sbi = EXT4_SB(sb); 2745 sbi = EXT4_SB(sb);
2746 es = sbi->s_es; 2746 es = sbi->s_es;
2747 2747
2748 ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group,
2749 gdp->bg_free_blocks_count);
2750 2748
2751 err = -EIO; 2749 err = -EIO;
2752 bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group); 2750 bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group);
@@ -2762,6 +2760,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2762 if (!gdp) 2760 if (!gdp)
2763 goto out_err; 2761 goto out_err;
2764 2762
2763 ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group,
2764 gdp->bg_free_blocks_count);
2765
2765 err = ext4_journal_get_write_access(handle, gdp_bh); 2766 err = ext4_journal_get_write_access(handle, gdp_bh);
2766 if (err) 2767 if (err)
2767 goto out_err; 2768 goto out_err;
@@ -3094,8 +3095,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3094static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, 3095static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3095 struct ext4_prealloc_space *pa) 3096 struct ext4_prealloc_space *pa)
3096{ 3097{
3097 unsigned len = ac->ac_o_ex.fe_len; 3098 unsigned int len = ac->ac_o_ex.fe_len;
3098
3099 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, 3099 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3100 &ac->ac_b_ex.fe_group, 3100 &ac->ac_b_ex.fe_group,
3101 &ac->ac_b_ex.fe_start); 3101 &ac->ac_b_ex.fe_start);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 9f086a6a472b..9ff7b1c04239 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -563,7 +563,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
563 } 563 }
564 564
565 blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count; 565 blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count;
566 data = (__le32 *)dind->b_data + EXT4_SB(sb)->s_gdb_count; 566 data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count %
567 EXT4_ADDR_PER_BLOCK(sb));
567 end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb); 568 end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb);
568 569
569 /* Get each reserved primary GDT block and verify it holds backups */ 570 /* Get each reserved primary GDT block and verify it holds backups */
@@ -854,7 +855,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
854 */ 855 */
855 856
856 /* Update group descriptor block for new group */ 857 /* Update group descriptor block for new group */
857 gdp = (struct ext4_group_desc *)primary->b_data + gdb_off; 858 gdp = (struct ext4_group_desc *)((char *)primary->b_data +
859 gdb_off * EXT4_DESC_SIZE(sb));
858 860
859 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ 861 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
860 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ 862 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 09d9359c8055..02bf24343979 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -671,6 +671,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
671 unsigned long def_mount_opts; 671 unsigned long def_mount_opts;
672 struct super_block *sb = vfs->mnt_sb; 672 struct super_block *sb = vfs->mnt_sb;
673 struct ext4_sb_info *sbi = EXT4_SB(sb); 673 struct ext4_sb_info *sbi = EXT4_SB(sb);
674 journal_t *journal = sbi->s_journal;
674 struct ext4_super_block *es = sbi->s_es; 675 struct ext4_super_block *es = sbi->s_es;
675 676
676 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 677 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
@@ -729,8 +730,15 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
729 seq_printf(seq, ",commit=%u", 730 seq_printf(seq, ",commit=%u",
730 (unsigned) (sbi->s_commit_interval / HZ)); 731 (unsigned) (sbi->s_commit_interval / HZ));
731 } 732 }
732 if (test_opt(sb, BARRIER)) 733 /*
733 seq_puts(seq, ",barrier=1"); 734 * We're changing the default of barrier mount option, so
735 * let's always display its mount state so it's clear what its
736 * status is.
737 */
738 seq_puts(seq, ",barrier=");
739 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
740 if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
741 seq_puts(seq, ",journal_async_commit");
734 if (test_opt(sb, NOBH)) 742 if (test_opt(sb, NOBH))
735 seq_puts(seq, ",nobh"); 743 seq_puts(seq, ",nobh");
736 if (!test_opt(sb, EXTENTS)) 744 if (!test_opt(sb, EXTENTS))
@@ -1907,6 +1915,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1907 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 1915 sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
1908 1916
1909 set_opt(sbi->s_mount_opt, RESERVATION); 1917 set_opt(sbi->s_mount_opt, RESERVATION);
1918 set_opt(sbi->s_mount_opt, BARRIER);
1910 1919
1911 /* 1920 /*
1912 * turn on extents feature by default in ext4 filesystem 1921 * turn on extents feature by default in ext4 filesystem
@@ -2189,6 +2198,29 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2189 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 2198 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2190 if (ext4_load_journal(sb, es, journal_devnum)) 2199 if (ext4_load_journal(sb, es, journal_devnum))
2191 goto failed_mount3; 2200 goto failed_mount3;
2201 if (!(sb->s_flags & MS_RDONLY) &&
2202 EXT4_SB(sb)->s_journal->j_failed_commit) {
2203 printk(KERN_CRIT "EXT4-fs error (device %s): "
2204 "ext4_fill_super: Journal transaction "
2205 "%u is corrupt\n", sb->s_id,
2206 EXT4_SB(sb)->s_journal->j_failed_commit);
2207 if (test_opt (sb, ERRORS_RO)) {
2208 printk (KERN_CRIT
2209 "Mounting filesystem read-only\n");
2210 sb->s_flags |= MS_RDONLY;
2211 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2212 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2213 }
2214 if (test_opt(sb, ERRORS_PANIC)) {
2215 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2216 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2217 ext4_commit_super(sb, es, 1);
2218 printk(KERN_CRIT
2219 "EXT4-fs (device %s): mount failed\n",
2220 sb->s_id);
2221 goto failed_mount4;
2222 }
2223 }
2192 } else if (journal_inum) { 2224 } else if (journal_inum) {
2193 if (ext4_create_journal(sb, es, journal_inum)) 2225 if (ext4_create_journal(sb, es, journal_inum))
2194 goto failed_mount3; 2226 goto failed_mount3;
@@ -3305,8 +3337,10 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
3305 blk++; 3337 blk++;
3306 } 3338 }
3307out: 3339out:
3308 if (len == towrite) 3340 if (len == towrite) {
3341 mutex_unlock(&inode->i_mutex);
3309 return err; 3342 return err;
3343 }
3310 if (inode->i_size < off+len-towrite) { 3344 if (inode->i_size < off+len-towrite) {
3311 i_size_write(inode, off+len-towrite); 3345 i_size_write(inode, off+len-towrite);
3312 EXT4_I(inode)->i_disksize = inode->i_size; 3346 EXT4_I(inode)->i_disksize = inode->i_size;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 27cc1164ec36..771326b8047e 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -257,26 +257,34 @@ int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
257} 257}
258EXPORT_SYMBOL_GPL(fat_getattr); 258EXPORT_SYMBOL_GPL(fat_getattr);
259 259
260static int fat_check_mode(const struct msdos_sb_info *sbi, struct inode *inode, 260static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
261 mode_t mode) 261 struct inode *inode, umode_t *mode_ptr)
262{ 262{
263 mode_t mask, req = mode & ~S_IFMT; 263 mode_t mask, perm;
264 264
265 if (S_ISREG(mode)) 265 /*
266 * Note, the basic check is already done by a caller of
267 * (attr->ia_mode & ~MSDOS_VALID_MODE)
268 */
269
270 if (S_ISREG(inode->i_mode))
266 mask = sbi->options.fs_fmask; 271 mask = sbi->options.fs_fmask;
267 else 272 else
268 mask = sbi->options.fs_dmask; 273 mask = sbi->options.fs_dmask;
269 274
275 perm = *mode_ptr & ~(S_IFMT | mask);
276
270 /* 277 /*
271 * Of the r and x bits, all (subject to umask) must be present. Of the 278 * Of the r and x bits, all (subject to umask) must be present. Of the
272 * w bits, either all (subject to umask) or none must be present. 279 * w bits, either all (subject to umask) or none must be present.
273 */ 280 */
274 req &= ~mask; 281 if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO)))
275 if ((req & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO)))
276 return -EPERM; 282 return -EPERM;
277 if ((req & S_IWUGO) && ((req & S_IWUGO) != (S_IWUGO & ~mask))) 283 if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask)))
278 return -EPERM; 284 return -EPERM;
279 285
286 *mode_ptr &= S_IFMT | perm;
287
280 return 0; 288 return 0;
281} 289}
282 290
@@ -299,7 +307,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
299{ 307{
300 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); 308 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
301 struct inode *inode = dentry->d_inode; 309 struct inode *inode = dentry->d_inode;
302 int mask, error = 0; 310 int error = 0;
303 unsigned int ia_valid; 311 unsigned int ia_valid;
304 312
305 lock_kernel(); 313 lock_kernel();
@@ -332,12 +340,13 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
332 error = 0; 340 error = 0;
333 goto out; 341 goto out;
334 } 342 }
343
335 if (((attr->ia_valid & ATTR_UID) && 344 if (((attr->ia_valid & ATTR_UID) &&
336 (attr->ia_uid != sbi->options.fs_uid)) || 345 (attr->ia_uid != sbi->options.fs_uid)) ||
337 ((attr->ia_valid & ATTR_GID) && 346 ((attr->ia_valid & ATTR_GID) &&
338 (attr->ia_gid != sbi->options.fs_gid)) || 347 (attr->ia_gid != sbi->options.fs_gid)) ||
339 ((attr->ia_valid & ATTR_MODE) && 348 ((attr->ia_valid & ATTR_MODE) &&
340 fat_check_mode(sbi, inode, attr->ia_mode) < 0)) 349 (attr->ia_mode & ~MSDOS_VALID_MODE)))
341 error = -EPERM; 350 error = -EPERM;
342 351
343 if (error) { 352 if (error) {
@@ -346,15 +355,16 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
346 goto out; 355 goto out;
347 } 356 }
348 357
349 error = inode_setattr(inode, attr); 358 /*
350 if (error) 359 * We don't return -EPERM here. Yes, strange, but this is too
351 goto out; 360 * old behavior.
361 */
362 if (attr->ia_valid & ATTR_MODE) {
363 if (fat_sanitize_mode(sbi, inode, &attr->ia_mode) < 0)
364 attr->ia_valid &= ~ATTR_MODE;
365 }
352 366
353 if (S_ISDIR(inode->i_mode)) 367 error = inode_setattr(inode, attr);
354 mask = sbi->options.fs_dmask;
355 else
356 mask = sbi->options.fs_fmask;
357 inode->i_mode &= S_IFMT | (S_IRWXUGO & ~mask);
358out: 368out:
359 unlock_kernel(); 369 unlock_kernel();
360 return error; 370 return error;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fb77e0962132..3141690558c8 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -488,7 +488,12 @@ static struct fuse_conn *new_conn(struct super_block *sb)
488 err = bdi_init(&fc->bdi); 488 err = bdi_init(&fc->bdi);
489 if (err) 489 if (err)
490 goto error_kfree; 490 goto error_kfree;
491 err = bdi_register_dev(&fc->bdi, fc->dev); 491 if (sb->s_bdev) {
492 err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
493 MAJOR(fc->dev), MINOR(fc->dev));
494 } else {
495 err = bdi_register_dev(&fc->bdi, fc->dev);
496 }
492 if (err) 497 if (err)
493 goto error_bdi_destroy; 498 goto error_bdi_destroy;
494 /* 499 /*
@@ -586,7 +591,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
586 fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); 591 fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
587 fc->minor = arg->minor; 592 fc->minor = arg->minor;
588 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 593 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
589 fc->max_write = min_t(unsigned, 4096, fc->max_write); 594 fc->max_write = max_t(unsigned, 4096, fc->max_write);
590 fc->conn_init = 1; 595 fc->conn_init = 1;
591 } 596 }
592 fuse_put_request(fc, req); 597 fuse_put_request(fc, req);
@@ -662,7 +667,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
662 fc->flags = d.flags; 667 fc->flags = d.flags;
663 fc->user_id = d.user_id; 668 fc->user_id = d.user_id;
664 fc->group_id = d.group_id; 669 fc->group_id = d.group_id;
665 fc->max_read = min_t(unsigned, 4096, d.max_read); 670 fc->max_read = max_t(unsigned, 4096, d.max_read);
666 671
667 /* Used by get_root_inode() */ 672 /* Used by get_root_inode() */
668 sb->s_fs_info = fc; 673 sb->s_fs_info = fc;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index c19184f2e70e..bec76b1c2bb0 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -246,15 +246,11 @@ static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
246 246
247} 247}
248 248
249static inline unsigned int zero_metapath_length(const struct metapath *mp, 249static inline unsigned int metapath_branch_start(const struct metapath *mp)
250 unsigned height)
251{ 250{
252 unsigned int i; 251 if (mp->mp_list[0] == 0)
253 for (i = 0; i < height - 1; i++) { 252 return 2;
254 if (mp->mp_list[i] != 0) 253 return 1;
255 return i;
256 }
257 return height;
258} 254}
259 255
260/** 256/**
@@ -436,7 +432,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
436 struct gfs2_sbd *sdp = GFS2_SB(inode); 432 struct gfs2_sbd *sdp = GFS2_SB(inode);
437 struct buffer_head *dibh = mp->mp_bh[0]; 433 struct buffer_head *dibh = mp->mp_bh[0];
438 u64 bn, dblock = 0; 434 u64 bn, dblock = 0;
439 unsigned n, i, blks, alloced = 0, iblks = 0, zmpl = 0; 435 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
440 unsigned dblks = 0; 436 unsigned dblks = 0;
441 unsigned ptrs_per_blk; 437 unsigned ptrs_per_blk;
442 const unsigned end_of_metadata = height - 1; 438 const unsigned end_of_metadata = height - 1;
@@ -471,9 +467,8 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
471 /* Building up tree height */ 467 /* Building up tree height */
472 state = ALLOC_GROW_HEIGHT; 468 state = ALLOC_GROW_HEIGHT;
473 iblks = height - ip->i_height; 469 iblks = height - ip->i_height;
474 zmpl = zero_metapath_length(mp, height); 470 branch_start = metapath_branch_start(mp);
475 iblks -= zmpl; 471 iblks += (height - branch_start);
476 iblks += height;
477 } 472 }
478 } 473 }
479 474
@@ -509,13 +504,13 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
509 sizeof(struct gfs2_meta_header)); 504 sizeof(struct gfs2_meta_header));
510 *ptr = zero_bn; 505 *ptr = zero_bn;
511 state = ALLOC_GROW_DEPTH; 506 state = ALLOC_GROW_DEPTH;
512 for(i = zmpl; i < height; i++) { 507 for(i = branch_start; i < height; i++) {
513 if (mp->mp_bh[i] == NULL) 508 if (mp->mp_bh[i] == NULL)
514 break; 509 break;
515 brelse(mp->mp_bh[i]); 510 brelse(mp->mp_bh[i]);
516 mp->mp_bh[i] = NULL; 511 mp->mp_bh[i] = NULL;
517 } 512 }
518 i = zmpl; 513 i = branch_start;
519 } 514 }
520 if (n == 0) 515 if (n == 0)
521 break; 516 break;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 6387523a3153..3401628d742b 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -195,7 +195,7 @@ ulong_aligned:
195 depending on architecture. I've experimented with several ways 195 depending on architecture. I've experimented with several ways
196 of writing this section such as using an else before the goto 196 of writing this section such as using an else before the goto
197 but this one seems to be the fastest. */ 197 but this one seems to be the fastest. */
198 while ((unsigned char *)plong < end - 1) { 198 while ((unsigned char *)plong < end - sizeof(unsigned long)) {
199 prefetch(plong + 1); 199 prefetch(plong + 1);
200 if (((*plong) & LBITMASK) != lskipval) 200 if (((*plong) & LBITMASK) != lskipval)
201 break; 201 break;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 4d99685fdce4..a2ed72f7ceee 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -168,6 +168,7 @@ static int journal_submit_commit_record(journal_t *journal,
168 spin_unlock(&journal->j_state_lock); 168 spin_unlock(&journal->j_state_lock);
169 169
170 /* And try again, without the barrier */ 170 /* And try again, without the barrier */
171 lock_buffer(bh);
171 set_buffer_uptodate(bh); 172 set_buffer_uptodate(bh);
172 set_buffer_dirty(bh); 173 set_buffer_dirty(bh);
173 ret = submit_bh(WRITE, bh); 174 ret = submit_bh(WRITE, bh);
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 5d0405a9e7ca..058f50f65b76 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -344,6 +344,7 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh,
344 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, 344 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
345 obh->b_size); 345 obh->b_size);
346 } 346 }
347 put_bh(obh);
347 } 348 }
348 return 0; 349 return 0;
349} 350}
@@ -610,9 +611,8 @@ static int do_one_pass(journal_t *journal,
610 chksum_err = chksum_seen = 0; 611 chksum_err = chksum_seen = 0;
611 612
612 if (info->end_transaction) { 613 if (info->end_transaction) {
613 printk(KERN_ERR "JBD: Transaction %u " 614 journal->j_failed_commit =
614 "found to be corrupt.\n", 615 info->end_transaction;
615 next_commit_ID - 1);
616 brelse(bh); 616 brelse(bh);
617 break; 617 break;
618 } 618 }
@@ -643,10 +643,8 @@ static int do_one_pass(journal_t *journal,
643 643
644 if (!JBD2_HAS_INCOMPAT_FEATURE(journal, 644 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
645 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ 645 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
646 printk(KERN_ERR 646 journal->j_failed_commit =
647 "JBD: Transaction %u " 647 next_commit_ID;
648 "found to be corrupt.\n",
649 next_commit_ID);
650 brelse(bh); 648 brelse(bh);
651 break; 649 break;
652 } 650 }
diff --git a/fs/libfs.c b/fs/libfs.c
index b004dfadd891..baeb71ee1cde 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -512,6 +512,20 @@ void simple_release_fs(struct vfsmount **mount, int *count)
512 mntput(mnt); 512 mntput(mnt);
513} 513}
514 514
515/**
516 * simple_read_from_buffer - copy data from the buffer to user space
517 * @to: the user space buffer to read to
518 * @count: the maximum number of bytes to read
519 * @ppos: the current position in the buffer
520 * @from: the buffer to read from
521 * @available: the size of the buffer
522 *
523 * The simple_read_from_buffer() function reads up to @count bytes from the
524 * buffer @from at offset @ppos into the user space address starting at @to.
525 *
526 * On success, the number of bytes read is returned and the offset @ppos is
527 * advanced by this number, or negative value is returned on error.
528 **/
515ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, 529ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
516 const void *from, size_t available) 530 const void *from, size_t available)
517{ 531{
@@ -528,6 +542,37 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
528 return count; 542 return count;
529} 543}
530 544
545/**
546 * memory_read_from_buffer - copy data from the buffer
547 * @to: the kernel space buffer to read to
548 * @count: the maximum number of bytes to read
549 * @ppos: the current position in the buffer
550 * @from: the buffer to read from
551 * @available: the size of the buffer
552 *
553 * The memory_read_from_buffer() function reads up to @count bytes from the
554 * buffer @from at offset @ppos into the kernel space address starting at @to.
555 *
556 * On success, the number of bytes read is returned and the offset @ppos is
557 * advanced by this number, or negative value is returned on error.
558 **/
559ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
560 const void *from, size_t available)
561{
562 loff_t pos = *ppos;
563
564 if (pos < 0)
565 return -EINVAL;
566 if (pos >= available)
567 return 0;
568 if (count > available - pos)
569 count = available - pos;
570 memcpy(to, from + pos, count);
571 *ppos = pos + count;
572
573 return count;
574}
575
531/* 576/*
532 * Transaction based IO. 577 * Transaction based IO.
533 * The file expects a single write which triggers the transaction, and then 578 * The file expects a single write which triggers the transaction, and then
@@ -800,6 +845,7 @@ EXPORT_SYMBOL(simple_statfs);
800EXPORT_SYMBOL(simple_sync_file); 845EXPORT_SYMBOL(simple_sync_file);
801EXPORT_SYMBOL(simple_unlink); 846EXPORT_SYMBOL(simple_unlink);
802EXPORT_SYMBOL(simple_read_from_buffer); 847EXPORT_SYMBOL(simple_read_from_buffer);
848EXPORT_SYMBOL(memory_read_from_buffer);
803EXPORT_SYMBOL(simple_transaction_get); 849EXPORT_SYMBOL(simple_transaction_get);
804EXPORT_SYMBOL(simple_transaction_read); 850EXPORT_SYMBOL(simple_transaction_read);
805EXPORT_SYMBOL(simple_transaction_release); 851EXPORT_SYMBOL(simple_transaction_release);
diff --git a/fs/locks.c b/fs/locks.c
index 11dbf08651b7..dce8c747371c 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -561,9 +561,6 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
561 /* insert into file's list */ 561 /* insert into file's list */
562 fl->fl_next = *pos; 562 fl->fl_next = *pos;
563 *pos = fl; 563 *pos = fl;
564
565 if (fl->fl_ops && fl->fl_ops->fl_insert)
566 fl->fl_ops->fl_insert(fl);
567} 564}
568 565
569/* 566/*
@@ -586,9 +583,6 @@ static void locks_delete_lock(struct file_lock **thisfl_p)
586 fl->fl_fasync = NULL; 583 fl->fl_fasync = NULL;
587 } 584 }
588 585
589 if (fl->fl_ops && fl->fl_ops->fl_remove)
590 fl->fl_ops->fl_remove(fl);
591
592 if (fl->fl_nspid) { 586 if (fl->fl_nspid) {
593 put_pid(fl->fl_nspid); 587 put_pid(fl->fl_nspid);
594 fl->fl_nspid = NULL; 588 fl->fl_nspid = NULL;
diff --git a/fs/namei.c b/fs/namei.c
index c7e43536c49a..01e67dddcc3d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -581,15 +581,13 @@ static __always_inline int link_path_walk(const char *name, struct nameidata *nd
581 int result; 581 int result;
582 582
583 /* make sure the stuff we saved doesn't go away */ 583 /* make sure the stuff we saved doesn't go away */
584 dget(save.dentry); 584 path_get(&save);
585 mntget(save.mnt);
586 585
587 result = __link_path_walk(name, nd); 586 result = __link_path_walk(name, nd);
588 if (result == -ESTALE) { 587 if (result == -ESTALE) {
589 /* nd->path had been dropped */ 588 /* nd->path had been dropped */
590 nd->path = save; 589 nd->path = save;
591 dget(nd->path.dentry); 590 path_get(&nd->path);
592 mntget(nd->path.mnt);
593 nd->flags |= LOOKUP_REVAL; 591 nd->flags |= LOOKUP_REVAL;
594 result = __link_path_walk(name, nd); 592 result = __link_path_walk(name, nd);
595 } 593 }
@@ -1216,8 +1214,9 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1216 nd->flags = flags; 1214 nd->flags = flags;
1217 nd->depth = 0; 1215 nd->depth = 0;
1218 1216
1219 nd->path.mnt = mntget(mnt); 1217 nd->path.dentry = dentry;
1220 nd->path.dentry = dget(dentry); 1218 nd->path.mnt = mnt;
1219 path_get(&nd->path);
1221 1220
1222 retval = path_walk(name, nd); 1221 retval = path_walk(name, nd);
1223 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1222 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
@@ -2857,16 +2856,17 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
2857{ 2856{
2858 struct nameidata nd; 2857 struct nameidata nd;
2859 void *cookie; 2858 void *cookie;
2859 int res;
2860 2860
2861 nd.depth = 0; 2861 nd.depth = 0;
2862 cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); 2862 cookie = dentry->d_inode->i_op->follow_link(dentry, &nd);
2863 if (!IS_ERR(cookie)) { 2863 if (IS_ERR(cookie))
2864 int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); 2864 return PTR_ERR(cookie);
2865 if (dentry->d_inode->i_op->put_link) 2865
2866 dentry->d_inode->i_op->put_link(dentry, &nd, cookie); 2866 res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
2867 cookie = ERR_PTR(res); 2867 if (dentry->d_inode->i_op->put_link)
2868 } 2868 dentry->d_inode->i_op->put_link(dentry, &nd, cookie);
2869 return PTR_ERR(cookie); 2869 return res;
2870} 2870}
2871 2871
2872int vfs_follow_link(struct nameidata *nd, const char *link) 2872int vfs_follow_link(struct nameidata *nd, const char *link)
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 49c7cd0502cc..779d2eb649c5 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -130,10 +130,11 @@ static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p,
130 struct mnt_fhstatus *res) 130 struct mnt_fhstatus *res)
131{ 131{
132 struct nfs_fh *fh = res->fh; 132 struct nfs_fh *fh = res->fh;
133 unsigned size;
133 134
134 if ((res->status = ntohl(*p++)) == 0) { 135 if ((res->status = ntohl(*p++)) == 0) {
135 int size = ntohl(*p++); 136 size = ntohl(*p++);
136 if (size <= NFS3_FHSIZE) { 137 if (size <= NFS3_FHSIZE && size != 0) {
137 fh->size = size; 138 fh->size = size;
138 memcpy(fh->data, p, size); 139 memcpy(fh->data, p, size);
139 } else 140 } else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2a4a024a4e7b..614efeed5437 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1216,8 +1216,6 @@ static int nfs_validate_mount_data(void *options,
1216{ 1216{
1217 struct nfs_mount_data *data = (struct nfs_mount_data *)options; 1217 struct nfs_mount_data *data = (struct nfs_mount_data *)options;
1218 1218
1219 memset(args, 0, sizeof(*args));
1220
1221 if (data == NULL) 1219 if (data == NULL)
1222 goto out_no_data; 1220 goto out_no_data;
1223 1221
@@ -1251,13 +1249,13 @@ static int nfs_validate_mount_data(void *options,
1251 case 5: 1249 case 5:
1252 memset(data->context, 0, sizeof(data->context)); 1250 memset(data->context, 0, sizeof(data->context));
1253 case 6: 1251 case 6:
1254 if (data->flags & NFS_MOUNT_VER3) 1252 if (data->flags & NFS_MOUNT_VER3) {
1253 if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
1254 goto out_invalid_fh;
1255 mntfh->size = data->root.size; 1255 mntfh->size = data->root.size;
1256 else 1256 } else
1257 mntfh->size = NFS2_FHSIZE; 1257 mntfh->size = NFS2_FHSIZE;
1258 1258
1259 if (mntfh->size > sizeof(mntfh->data))
1260 goto out_invalid_fh;
1261 1259
1262 memcpy(mntfh->data, data->root.data, mntfh->size); 1260 memcpy(mntfh->data, data->root.data, mntfh->size);
1263 if (mntfh->size < sizeof(mntfh->data)) 1261 if (mntfh->size < sizeof(mntfh->data))
@@ -1585,24 +1583,29 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1585{ 1583{
1586 struct nfs_server *server = NULL; 1584 struct nfs_server *server = NULL;
1587 struct super_block *s; 1585 struct super_block *s;
1588 struct nfs_fh mntfh; 1586 struct nfs_parsed_mount_data *data;
1589 struct nfs_parsed_mount_data data; 1587 struct nfs_fh *mntfh;
1590 struct dentry *mntroot; 1588 struct dentry *mntroot;
1591 int (*compare_super)(struct super_block *, void *) = nfs_compare_super; 1589 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
1592 struct nfs_sb_mountdata sb_mntdata = { 1590 struct nfs_sb_mountdata sb_mntdata = {
1593 .mntflags = flags, 1591 .mntflags = flags,
1594 }; 1592 };
1595 int error; 1593 int error = -ENOMEM;
1594
1595 data = kzalloc(sizeof(*data), GFP_KERNEL);
1596 mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
1597 if (data == NULL || mntfh == NULL)
1598 goto out_free_fh;
1596 1599
1597 security_init_mnt_opts(&data.lsm_opts); 1600 security_init_mnt_opts(&data->lsm_opts);
1598 1601
1599 /* Validate the mount data */ 1602 /* Validate the mount data */
1600 error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name); 1603 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
1601 if (error < 0) 1604 if (error < 0)
1602 goto out; 1605 goto out;
1603 1606
1604 /* Get a volume representation */ 1607 /* Get a volume representation */
1605 server = nfs_create_server(&data, &mntfh); 1608 server = nfs_create_server(data, mntfh);
1606 if (IS_ERR(server)) { 1609 if (IS_ERR(server)) {
1607 error = PTR_ERR(server); 1610 error = PTR_ERR(server);
1608 goto out; 1611 goto out;
@@ -1630,16 +1633,16 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1630 1633
1631 if (!s->s_root) { 1634 if (!s->s_root) {
1632 /* initial superblock/root creation */ 1635 /* initial superblock/root creation */
1633 nfs_fill_super(s, &data); 1636 nfs_fill_super(s, data);
1634 } 1637 }
1635 1638
1636 mntroot = nfs_get_root(s, &mntfh); 1639 mntroot = nfs_get_root(s, mntfh);
1637 if (IS_ERR(mntroot)) { 1640 if (IS_ERR(mntroot)) {
1638 error = PTR_ERR(mntroot); 1641 error = PTR_ERR(mntroot);
1639 goto error_splat_super; 1642 goto error_splat_super;
1640 } 1643 }
1641 1644
1642 error = security_sb_set_mnt_opts(s, &data.lsm_opts); 1645 error = security_sb_set_mnt_opts(s, &data->lsm_opts);
1643 if (error) 1646 if (error)
1644 goto error_splat_root; 1647 goto error_splat_root;
1645 1648
@@ -1649,9 +1652,12 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1649 error = 0; 1652 error = 0;
1650 1653
1651out: 1654out:
1652 kfree(data.nfs_server.hostname); 1655 kfree(data->nfs_server.hostname);
1653 kfree(data.mount_server.hostname); 1656 kfree(data->mount_server.hostname);
1654 security_free_mnt_opts(&data.lsm_opts); 1657 security_free_mnt_opts(&data->lsm_opts);
1658out_free_fh:
1659 kfree(mntfh);
1660 kfree(data);
1655 return error; 1661 return error;
1656 1662
1657out_err_nosb: 1663out_err_nosb:
@@ -1800,8 +1806,6 @@ static int nfs4_validate_mount_data(void *options,
1800 struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; 1806 struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
1801 char *c; 1807 char *c;
1802 1808
1803 memset(args, 0, sizeof(*args));
1804
1805 if (data == NULL) 1809 if (data == NULL)
1806 goto out_no_data; 1810 goto out_no_data;
1807 1811
@@ -1959,26 +1963,31 @@ out_no_client_address:
1959static int nfs4_get_sb(struct file_system_type *fs_type, 1963static int nfs4_get_sb(struct file_system_type *fs_type,
1960 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 1964 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
1961{ 1965{
1962 struct nfs_parsed_mount_data data; 1966 struct nfs_parsed_mount_data *data;
1963 struct super_block *s; 1967 struct super_block *s;
1964 struct nfs_server *server; 1968 struct nfs_server *server;
1965 struct nfs_fh mntfh; 1969 struct nfs_fh *mntfh;
1966 struct dentry *mntroot; 1970 struct dentry *mntroot;
1967 int (*compare_super)(struct super_block *, void *) = nfs_compare_super; 1971 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
1968 struct nfs_sb_mountdata sb_mntdata = { 1972 struct nfs_sb_mountdata sb_mntdata = {
1969 .mntflags = flags, 1973 .mntflags = flags,
1970 }; 1974 };
1971 int error; 1975 int error = -ENOMEM;
1972 1976
1973 security_init_mnt_opts(&data.lsm_opts); 1977 data = kzalloc(sizeof(*data), GFP_KERNEL);
1978 mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
1979 if (data == NULL || mntfh == NULL)
1980 goto out_free_fh;
1981
1982 security_init_mnt_opts(&data->lsm_opts);
1974 1983
1975 /* Validate the mount data */ 1984 /* Validate the mount data */
1976 error = nfs4_validate_mount_data(raw_data, &data, dev_name); 1985 error = nfs4_validate_mount_data(raw_data, data, dev_name);
1977 if (error < 0) 1986 if (error < 0)
1978 goto out; 1987 goto out;
1979 1988
1980 /* Get a volume representation */ 1989 /* Get a volume representation */
1981 server = nfs4_create_server(&data, &mntfh); 1990 server = nfs4_create_server(data, mntfh);
1982 if (IS_ERR(server)) { 1991 if (IS_ERR(server)) {
1983 error = PTR_ERR(server); 1992 error = PTR_ERR(server);
1984 goto out; 1993 goto out;
@@ -2009,13 +2018,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
2009 nfs4_fill_super(s); 2018 nfs4_fill_super(s);
2010 } 2019 }
2011 2020
2012 mntroot = nfs4_get_root(s, &mntfh); 2021 mntroot = nfs4_get_root(s, mntfh);
2013 if (IS_ERR(mntroot)) { 2022 if (IS_ERR(mntroot)) {
2014 error = PTR_ERR(mntroot); 2023 error = PTR_ERR(mntroot);
2015 goto error_splat_super; 2024 goto error_splat_super;
2016 } 2025 }
2017 2026
2018 error = security_sb_set_mnt_opts(s, &data.lsm_opts); 2027 error = security_sb_set_mnt_opts(s, &data->lsm_opts);
2019 if (error) 2028 if (error)
2020 goto error_splat_root; 2029 goto error_splat_root;
2021 2030
@@ -2025,10 +2034,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
2025 error = 0; 2034 error = 0;
2026 2035
2027out: 2036out:
2028 kfree(data.client_address); 2037 kfree(data->client_address);
2029 kfree(data.nfs_server.export_path); 2038 kfree(data->nfs_server.export_path);
2030 kfree(data.nfs_server.hostname); 2039 kfree(data->nfs_server.hostname);
2031 security_free_mnt_opts(&data.lsm_opts); 2040 security_free_mnt_opts(&data->lsm_opts);
2041out_free_fh:
2042 kfree(mntfh);
2043 kfree(data);
2032 return error; 2044 return error;
2033 2045
2034out_free: 2046out_free:
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6d8ace3e3259..f333848fd3be 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -739,12 +739,13 @@ int nfs_updatepage(struct file *file, struct page *page,
739 } 739 }
740 740
741 status = nfs_writepage_setup(ctx, page, offset, count); 741 status = nfs_writepage_setup(ctx, page, offset, count);
742 __set_page_dirty_nobuffers(page); 742 if (status < 0)
743 nfs_set_pageerror(page);
744 else
745 __set_page_dirty_nobuffers(page);
743 746
744 dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", 747 dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
745 status, (long long)i_size_read(inode)); 748 status, (long long)i_size_read(inode));
746 if (status < 0)
747 nfs_set_pageerror(page);
748 return status; 749 return status;
749} 750}
750 751
diff --git a/fs/ntfs/upcase.c b/fs/ntfs/upcase.c
index 9101807dc81a..e2f72ca98037 100644
--- a/fs/ntfs/upcase.c
+++ b/fs/ntfs/upcase.c
@@ -77,11 +77,10 @@ ntfschar *generate_default_upcase(void)
77 uc[i] = cpu_to_le16(i); 77 uc[i] = cpu_to_le16(i);
78 for (r = 0; uc_run_table[r][0]; r++) 78 for (r = 0; uc_run_table[r][0]; r++)
79 for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++) 79 for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++)
80 uc[i] = cpu_to_le16(le16_to_cpu(uc[i]) + 80 le16_add_cpu(&uc[i], uc_run_table[r][2]);
81 uc_run_table[r][2]);
82 for (r = 0; uc_dup_table[r][0]; r++) 81 for (r = 0; uc_dup_table[r][0]; r++)
83 for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2) 82 for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2)
84 uc[i + 1] = cpu_to_le16(le16_to_cpu(uc[i + 1]) - 1); 83 le16_add_cpu(&uc[i + 1], -1);
85 for (r = 0; uc_word_table[r][0]; r++) 84 for (r = 0; uc_word_table[r][0]; r++)
86 uc[uc_word_table[r][0]] = cpu_to_le16(uc_word_table[r][1]); 85 uc[uc_word_table[r][0]] = cpu_to_le16(uc_word_table[r][1]);
87 return uc; 86 return uc;
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index cf9401e8cd0b..cfdb08b484ed 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -21,7 +21,6 @@
21 21
22#include <linux/kernel.h> 22#include <linux/kernel.h>
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/sysctl.h>
25#include <linux/configfs.h> 24#include <linux/configfs.h>
26 25
27#include "tcp.h" 26#include "tcp.h"
@@ -36,65 +35,6 @@
36 * cluster references throughout where nodes are looked up */ 35 * cluster references throughout where nodes are looked up */
37struct o2nm_cluster *o2nm_single_cluster = NULL; 36struct o2nm_cluster *o2nm_single_cluster = NULL;
38 37
39#define OCFS2_MAX_HB_CTL_PATH 256
40static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
41
42static ctl_table ocfs2_nm_table[] = {
43 {
44 .ctl_name = 1,
45 .procname = "hb_ctl_path",
46 .data = ocfs2_hb_ctl_path,
47 .maxlen = OCFS2_MAX_HB_CTL_PATH,
48 .mode = 0644,
49 .proc_handler = &proc_dostring,
50 .strategy = &sysctl_string,
51 },
52 { .ctl_name = 0 }
53};
54
55static ctl_table ocfs2_mod_table[] = {
56 {
57 .ctl_name = FS_OCFS2_NM,
58 .procname = "nm",
59 .data = NULL,
60 .maxlen = 0,
61 .mode = 0555,
62 .child = ocfs2_nm_table
63 },
64 { .ctl_name = 0}
65};
66
67static ctl_table ocfs2_kern_table[] = {
68 {
69 .ctl_name = FS_OCFS2,
70 .procname = "ocfs2",
71 .data = NULL,
72 .maxlen = 0,
73 .mode = 0555,
74 .child = ocfs2_mod_table
75 },
76 { .ctl_name = 0}
77};
78
79static ctl_table ocfs2_root_table[] = {
80 {
81 .ctl_name = CTL_FS,
82 .procname = "fs",
83 .data = NULL,
84 .maxlen = 0,
85 .mode = 0555,
86 .child = ocfs2_kern_table
87 },
88 { .ctl_name = 0 }
89};
90
91static struct ctl_table_header *ocfs2_table_header = NULL;
92
93const char *o2nm_get_hb_ctl_path(void)
94{
95 return ocfs2_hb_ctl_path;
96}
97EXPORT_SYMBOL_GPL(o2nm_get_hb_ctl_path);
98 38
99struct o2nm_node *o2nm_get_node_by_num(u8 node_num) 39struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
100{ 40{
@@ -941,9 +881,6 @@ void o2nm_undepend_this_node(void)
941 881
942static void __exit exit_o2nm(void) 882static void __exit exit_o2nm(void)
943{ 883{
944 if (ocfs2_table_header)
945 unregister_sysctl_table(ocfs2_table_header);
946
947 /* XXX sync with hb callbacks and shut down hb? */ 884 /* XXX sync with hb callbacks and shut down hb? */
948 o2net_unregister_hb_callbacks(); 885 o2net_unregister_hb_callbacks();
949 configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys); 886 configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys);
@@ -964,16 +901,9 @@ static int __init init_o2nm(void)
964 if (ret) 901 if (ret)
965 goto out; 902 goto out;
966 903
967 ocfs2_table_header = register_sysctl_table(ocfs2_root_table);
968 if (!ocfs2_table_header) {
969 printk(KERN_ERR "nodemanager: unable to register sysctl\n");
970 ret = -ENOMEM; /* or something. */
971 goto out_o2net;
972 }
973
974 ret = o2net_register_hb_callbacks(); 904 ret = o2net_register_hb_callbacks();
975 if (ret) 905 if (ret)
976 goto out_sysctl; 906 goto out_o2net;
977 907
978 config_group_init(&o2nm_cluster_group.cs_subsys.su_group); 908 config_group_init(&o2nm_cluster_group.cs_subsys.su_group);
979 mutex_init(&o2nm_cluster_group.cs_subsys.su_mutex); 909 mutex_init(&o2nm_cluster_group.cs_subsys.su_mutex);
@@ -990,8 +920,6 @@ static int __init init_o2nm(void)
990 configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys); 920 configfs_unregister_subsystem(&o2nm_cluster_group.cs_subsys);
991out_callbacks: 921out_callbacks:
992 o2net_unregister_hb_callbacks(); 922 o2net_unregister_hb_callbacks();
993out_sysctl:
994 unregister_sysctl_table(ocfs2_table_header);
995out_o2net: 923out_o2net:
996 o2net_exit(); 924 o2net_exit();
997out: 925out:
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h
index 7c860361b8dd..c992ea0da4ad 100644
--- a/fs/ocfs2/cluster/nodemanager.h
+++ b/fs/ocfs2/cluster/nodemanager.h
@@ -33,10 +33,6 @@
33#include <linux/configfs.h> 33#include <linux/configfs.h>
34#include <linux/rbtree.h> 34#include <linux/rbtree.h>
35 35
36#define FS_OCFS2_NM 1
37
38const char *o2nm_get_hb_ctl_path(void);
39
40struct o2nm_node { 36struct o2nm_node {
41 spinlock_t nd_lock; 37 spinlock_t nd_lock;
42 struct config_item nd_item; 38 struct config_item nd_item;
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 1e44ad14881a..a27d61581bd6 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -142,53 +142,43 @@ static void o2net_idle_timer(unsigned long data);
142static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); 142static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
143static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); 143static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
144 144
145static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
146 u32 msgkey, struct task_struct *task, u8 node)
147{
148#ifdef CONFIG_DEBUG_FS 145#ifdef CONFIG_DEBUG_FS
146void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
147 u32 msgkey, struct task_struct *task, u8 node)
148{
149 INIT_LIST_HEAD(&nst->st_net_debug_item); 149 INIT_LIST_HEAD(&nst->st_net_debug_item);
150 nst->st_task = task; 150 nst->st_task = task;
151 nst->st_msg_type = msgtype; 151 nst->st_msg_type = msgtype;
152 nst->st_msg_key = msgkey; 152 nst->st_msg_key = msgkey;
153 nst->st_node = node; 153 nst->st_node = node;
154#endif
155} 154}
156 155
157static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) 156void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
158{ 157{
159#ifdef CONFIG_DEBUG_FS
160 do_gettimeofday(&nst->st_sock_time); 158 do_gettimeofday(&nst->st_sock_time);
161#endif
162} 159}
163 160
164static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) 161void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
165{ 162{
166#ifdef CONFIG_DEBUG_FS
167 do_gettimeofday(&nst->st_send_time); 163 do_gettimeofday(&nst->st_send_time);
168#endif
169} 164}
170 165
171static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) 166void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
172{ 167{
173#ifdef CONFIG_DEBUG_FS
174 do_gettimeofday(&nst->st_status_time); 168 do_gettimeofday(&nst->st_status_time);
175#endif
176} 169}
177 170
178static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, 171void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
179 struct o2net_sock_container *sc) 172 struct o2net_sock_container *sc)
180{ 173{
181#ifdef CONFIG_DEBUG_FS
182 nst->st_sc = sc; 174 nst->st_sc = sc;
183#endif
184} 175}
185 176
186static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) 177void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id)
187{ 178{
188#ifdef CONFIG_DEBUG_FS
189 nst->st_id = msg_id; 179 nst->st_id = msg_id;
190#endif
191} 180}
181#endif /* CONFIG_DEBUG_FS */
192 182
193static inline int o2net_reconnect_delay(void) 183static inline int o2net_reconnect_delay(void)
194{ 184{
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index a705d5d19036..fd6179eb26d4 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -128,23 +128,23 @@ void o2net_debug_del_nst(struct o2net_send_tracking *nst);
128void o2net_debug_add_sc(struct o2net_sock_container *sc); 128void o2net_debug_add_sc(struct o2net_sock_container *sc);
129void o2net_debug_del_sc(struct o2net_sock_container *sc); 129void o2net_debug_del_sc(struct o2net_sock_container *sc);
130#else 130#else
131static int o2net_debugfs_init(void) 131static inline int o2net_debugfs_init(void)
132{ 132{
133 return 0; 133 return 0;
134} 134}
135static void o2net_debugfs_exit(void) 135static inline void o2net_debugfs_exit(void)
136{ 136{
137} 137}
138static void o2net_debug_add_nst(struct o2net_send_tracking *nst) 138static inline void o2net_debug_add_nst(struct o2net_send_tracking *nst)
139{ 139{
140} 140}
141static void o2net_debug_del_nst(struct o2net_send_tracking *nst) 141static inline void o2net_debug_del_nst(struct o2net_send_tracking *nst)
142{ 142{
143} 143}
144static void o2net_debug_add_sc(struct o2net_sock_container *sc) 144static inline void o2net_debug_add_sc(struct o2net_sock_container *sc)
145{ 145{
146} 146}
147static void o2net_debug_del_sc(struct o2net_sock_container *sc) 147static inline void o2net_debug_del_sc(struct o2net_sock_container *sc)
148{ 148{
149} 149}
150#endif /* CONFIG_DEBUG_FS */ 150#endif /* CONFIG_DEBUG_FS */
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 8d58cfe410b1..18307ff81b77 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -224,10 +224,42 @@ struct o2net_send_tracking {
224 struct timeval st_send_time; 224 struct timeval st_send_time;
225 struct timeval st_status_time; 225 struct timeval st_status_time;
226}; 226};
227
228void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
229 u32 msgkey, struct task_struct *task, u8 node);
230void o2net_set_nst_sock_time(struct o2net_send_tracking *nst);
231void o2net_set_nst_send_time(struct o2net_send_tracking *nst);
232void o2net_set_nst_status_time(struct o2net_send_tracking *nst);
233void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
234 struct o2net_sock_container *sc);
235void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id);
236
227#else 237#else
228struct o2net_send_tracking { 238struct o2net_send_tracking {
229 u32 dummy; 239 u32 dummy;
230}; 240};
241
242static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype,
243 u32 msgkey, struct task_struct *task, u8 node)
244{
245}
246static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst)
247{
248}
249static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst)
250{
251}
252static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst)
253{
254}
255static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst,
256 struct o2net_sock_container *sc)
257{
258}
259static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst,
260 u32 msg_id)
261{
262}
231#endif /* CONFIG_DEBUG_FS */ 263#endif /* CONFIG_DEBUG_FS */
232 264
233#endif /* O2CLUSTER_TCP_INTERNAL_H */ 265#endif /* O2CLUSTER_TCP_INTERNAL_H */
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h
index d34a62a3a625..8c686d22f9c7 100644
--- a/fs/ocfs2/dlm/dlmdebug.h
+++ b/fs/ocfs2/dlm/dlmdebug.h
@@ -60,25 +60,25 @@ void dlm_destroy_debugfs_root(void);
60 60
61#else 61#else
62 62
63static int dlm_debug_init(struct dlm_ctxt *dlm) 63static inline int dlm_debug_init(struct dlm_ctxt *dlm)
64{ 64{
65 return 0; 65 return 0;
66} 66}
67static void dlm_debug_shutdown(struct dlm_ctxt *dlm) 67static inline void dlm_debug_shutdown(struct dlm_ctxt *dlm)
68{ 68{
69} 69}
70static int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) 70static inline int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
71{ 71{
72 return 0; 72 return 0;
73} 73}
74static void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) 74static inline void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
75{ 75{
76} 76}
77static int dlm_create_debugfs_root(void) 77static inline int dlm_create_debugfs_root(void)
78{ 78{
79 return 0; 79 return 0;
80} 80}
81static void dlm_destroy_debugfs_root(void) 81static inline void dlm_destroy_debugfs_root(void)
82{ 82{
83} 83}
84 84
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index bbd1667aa7d3..fcd120f1493a 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -317,8 +317,7 @@ out:
317 return rc; 317 return rc;
318} 318}
319 319
320static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn, 320static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn)
321 int hangup_pending)
322{ 321{
323 struct dlm_ctxt *dlm = conn->cc_lockspace; 322 struct dlm_ctxt *dlm = conn->cc_lockspace;
324 struct o2dlm_private *priv = conn->cc_private; 323 struct o2dlm_private *priv = conn->cc_private;
@@ -333,43 +332,6 @@ static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn,
333 return 0; 332 return 0;
334} 333}
335 334
336static void o2hb_stop(const char *group)
337{
338 int ret;
339 char *argv[5], *envp[3];
340
341 argv[0] = (char *)o2nm_get_hb_ctl_path();
342 argv[1] = "-K";
343 argv[2] = "-u";
344 argv[3] = (char *)group;
345 argv[4] = NULL;
346
347 mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]);
348
349 /* minimal command environment taken from cpu_run_sbin_hotplug */
350 envp[0] = "HOME=/";
351 envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
352 envp[2] = NULL;
353
354 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
355 if (ret < 0)
356 mlog_errno(ret);
357}
358
359/*
360 * Hangup is a hack for tools compatibility. Older ocfs2-tools software
361 * expects the filesystem to call "ocfs2_hb_ctl" during unmount. This
362 * happens regardless of whether the DLM got started, so we can't do it
363 * in ocfs2_cluster_disconnect(). We bring the o2hb_stop() function into
364 * the glue and provide a "hangup" API for super.c to call.
365 *
366 * Other stacks will eventually provide a NULL ->hangup() pointer.
367 */
368static void o2cb_cluster_hangup(const char *group, int grouplen)
369{
370 o2hb_stop(group);
371}
372
373static int o2cb_cluster_this_node(unsigned int *node) 335static int o2cb_cluster_this_node(unsigned int *node)
374{ 336{
375 int node_num; 337 int node_num;
@@ -388,7 +350,6 @@ static int o2cb_cluster_this_node(unsigned int *node)
388static struct ocfs2_stack_operations o2cb_stack_ops = { 350static struct ocfs2_stack_operations o2cb_stack_ops = {
389 .connect = o2cb_cluster_connect, 351 .connect = o2cb_cluster_connect,
390 .disconnect = o2cb_cluster_disconnect, 352 .disconnect = o2cb_cluster_disconnect,
391 .hangup = o2cb_cluster_hangup,
392 .this_node = o2cb_cluster_this_node, 353 .this_node = o2cb_cluster_this_node,
393 .dlm_lock = o2cb_dlm_lock, 354 .dlm_lock = o2cb_dlm_lock,
394 .dlm_unlock = o2cb_dlm_unlock, 355 .dlm_unlock = o2cb_dlm_unlock,
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index b503772cd0ec..c021280dd462 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -61,7 +61,7 @@
61 * negotiated by the client. The client negotiates based on the maximum 61 * negotiated by the client. The client negotiates based on the maximum
62 * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major 62 * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major
63 * number from the "SETV" message must match 63 * number from the "SETV" message must match
64 * user_stack.sp_proto->lp_max_version.pv_major, and the minor number 64 * ocfs2_user_plugin.sp_proto->lp_max_version.pv_major, and the minor number
65 * must be less than or equal to ...->lp_max_version.pv_minor. 65 * must be less than or equal to ...->lp_max_version.pv_minor.
66 * 66 *
67 * Once this information has been set, mounts will be allowed. From this 67 * Once this information has been set, mounts will be allowed. From this
@@ -153,7 +153,7 @@ union ocfs2_control_message {
153 struct ocfs2_control_message_down u_down; 153 struct ocfs2_control_message_down u_down;
154}; 154};
155 155
156static struct ocfs2_stack_plugin user_stack; 156static struct ocfs2_stack_plugin ocfs2_user_plugin;
157 157
158static atomic_t ocfs2_control_opened; 158static atomic_t ocfs2_control_opened;
159static int ocfs2_control_this_node = -1; 159static int ocfs2_control_this_node = -1;
@@ -399,7 +399,7 @@ static int ocfs2_control_do_setversion_msg(struct file *file,
399 char *ptr = NULL; 399 char *ptr = NULL;
400 struct ocfs2_control_private *p = file->private_data; 400 struct ocfs2_control_private *p = file->private_data;
401 struct ocfs2_protocol_version *max = 401 struct ocfs2_protocol_version *max =
402 &user_stack.sp_proto->lp_max_version; 402 &ocfs2_user_plugin.sp_proto->lp_max_version;
403 403
404 if (ocfs2_control_get_handshake_state(file) != 404 if (ocfs2_control_get_handshake_state(file) !=
405 OCFS2_CONTROL_HANDSHAKE_PROTOCOL) 405 OCFS2_CONTROL_HANDSHAKE_PROTOCOL)
@@ -680,7 +680,7 @@ static void fsdlm_lock_ast_wrapper(void *astarg)
680 struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg); 680 struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg);
681 int status = lksb->sb_status; 681 int status = lksb->sb_status;
682 682
683 BUG_ON(user_stack.sp_proto == NULL); 683 BUG_ON(ocfs2_user_plugin.sp_proto == NULL);
684 684
685 /* 685 /*
686 * For now we're punting on the issue of other non-standard errors 686 * For now we're punting on the issue of other non-standard errors
@@ -693,16 +693,16 @@ static void fsdlm_lock_ast_wrapper(void *astarg)
693 */ 693 */
694 694
695 if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL) 695 if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL)
696 user_stack.sp_proto->lp_unlock_ast(astarg, 0); 696 ocfs2_user_plugin.sp_proto->lp_unlock_ast(astarg, 0);
697 else 697 else
698 user_stack.sp_proto->lp_lock_ast(astarg); 698 ocfs2_user_plugin.sp_proto->lp_lock_ast(astarg);
699} 699}
700 700
701static void fsdlm_blocking_ast_wrapper(void *astarg, int level) 701static void fsdlm_blocking_ast_wrapper(void *astarg, int level)
702{ 702{
703 BUG_ON(user_stack.sp_proto == NULL); 703 BUG_ON(ocfs2_user_plugin.sp_proto == NULL);
704 704
705 user_stack.sp_proto->lp_blocking_ast(astarg, level); 705 ocfs2_user_plugin.sp_proto->lp_blocking_ast(astarg, level);
706} 706}
707 707
708static int user_dlm_lock(struct ocfs2_cluster_connection *conn, 708static int user_dlm_lock(struct ocfs2_cluster_connection *conn,
@@ -816,8 +816,7 @@ out:
816 return rc; 816 return rc;
817} 817}
818 818
819static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn, 819static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
820 int hangup_pending)
821{ 820{
822 dlm_release_lockspace(conn->cc_lockspace, 2); 821 dlm_release_lockspace(conn->cc_lockspace, 2);
823 conn->cc_lockspace = NULL; 822 conn->cc_lockspace = NULL;
@@ -838,7 +837,7 @@ static int user_cluster_this_node(unsigned int *this_node)
838 return 0; 837 return 0;
839} 838}
840 839
841static struct ocfs2_stack_operations user_stack_ops = { 840static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
842 .connect = user_cluster_connect, 841 .connect = user_cluster_connect,
843 .disconnect = user_cluster_disconnect, 842 .disconnect = user_cluster_disconnect,
844 .this_node = user_cluster_this_node, 843 .this_node = user_cluster_this_node,
@@ -849,20 +848,20 @@ static struct ocfs2_stack_operations user_stack_ops = {
849 .dump_lksb = user_dlm_dump_lksb, 848 .dump_lksb = user_dlm_dump_lksb,
850}; 849};
851 850
852static struct ocfs2_stack_plugin user_stack = { 851static struct ocfs2_stack_plugin ocfs2_user_plugin = {
853 .sp_name = "user", 852 .sp_name = "user",
854 .sp_ops = &user_stack_ops, 853 .sp_ops = &ocfs2_user_plugin_ops,
855 .sp_owner = THIS_MODULE, 854 .sp_owner = THIS_MODULE,
856}; 855};
857 856
858 857
859static int __init user_stack_init(void) 858static int __init ocfs2_user_plugin_init(void)
860{ 859{
861 int rc; 860 int rc;
862 861
863 rc = ocfs2_control_init(); 862 rc = ocfs2_control_init();
864 if (!rc) { 863 if (!rc) {
865 rc = ocfs2_stack_glue_register(&user_stack); 864 rc = ocfs2_stack_glue_register(&ocfs2_user_plugin);
866 if (rc) 865 if (rc)
867 ocfs2_control_exit(); 866 ocfs2_control_exit();
868 } 867 }
@@ -870,14 +869,14 @@ static int __init user_stack_init(void)
870 return rc; 869 return rc;
871} 870}
872 871
873static void __exit user_stack_exit(void) 872static void __exit ocfs2_user_plugin_exit(void)
874{ 873{
875 ocfs2_stack_glue_unregister(&user_stack); 874 ocfs2_stack_glue_unregister(&ocfs2_user_plugin);
876 ocfs2_control_exit(); 875 ocfs2_control_exit();
877} 876}
878 877
879MODULE_AUTHOR("Oracle"); 878MODULE_AUTHOR("Oracle");
880MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks"); 879MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks");
881MODULE_LICENSE("GPL"); 880MODULE_LICENSE("GPL");
882module_init(user_stack_init); 881module_init(ocfs2_user_plugin_init);
883module_exit(user_stack_exit); 882module_exit(ocfs2_user_plugin_exit);
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 119f60cea9cc..10e149ae5e3a 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -26,6 +26,7 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/kobject.h> 27#include <linux/kobject.h>
28#include <linux/sysfs.h> 28#include <linux/sysfs.h>
29#include <linux/sysctl.h>
29 30
30#include "ocfs2_fs.h" 31#include "ocfs2_fs.h"
31 32
@@ -33,11 +34,13 @@
33 34
34#define OCFS2_STACK_PLUGIN_O2CB "o2cb" 35#define OCFS2_STACK_PLUGIN_O2CB "o2cb"
35#define OCFS2_STACK_PLUGIN_USER "user" 36#define OCFS2_STACK_PLUGIN_USER "user"
37#define OCFS2_MAX_HB_CTL_PATH 256
36 38
37static struct ocfs2_locking_protocol *lproto; 39static struct ocfs2_locking_protocol *lproto;
38static DEFINE_SPINLOCK(ocfs2_stack_lock); 40static DEFINE_SPINLOCK(ocfs2_stack_lock);
39static LIST_HEAD(ocfs2_stack_list); 41static LIST_HEAD(ocfs2_stack_list);
40static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; 42static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1];
43static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
41 44
42/* 45/*
43 * The stack currently in use. If not null, active_stack->sp_count > 0, 46 * The stack currently in use. If not null, active_stack->sp_count > 0,
@@ -349,7 +352,7 @@ int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn,
349 352
350 BUG_ON(conn == NULL); 353 BUG_ON(conn == NULL);
351 354
352 ret = active_stack->sp_ops->disconnect(conn, hangup_pending); 355 ret = active_stack->sp_ops->disconnect(conn);
353 356
354 /* XXX Should we free it anyway? */ 357 /* XXX Should we free it anyway? */
355 if (!ret) { 358 if (!ret) {
@@ -362,13 +365,48 @@ int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn,
362} 365}
363EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect); 366EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect);
364 367
368/*
369 * Leave the group for this filesystem. This is executed by a userspace
370 * program (stored in ocfs2_hb_ctl_path).
371 */
372static void ocfs2_leave_group(const char *group)
373{
374 int ret;
375 char *argv[5], *envp[3];
376
377 argv[0] = ocfs2_hb_ctl_path;
378 argv[1] = "-K";
379 argv[2] = "-u";
380 argv[3] = (char *)group;
381 argv[4] = NULL;
382
383 /* minimal command environment taken from cpu_run_sbin_hotplug */
384 envp[0] = "HOME=/";
385 envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
386 envp[2] = NULL;
387
388 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
389 if (ret < 0) {
390 printk(KERN_ERR
391 "ocfs2: Error %d running user helper "
392 "\"%s %s %s %s\"\n",
393 ret, argv[0], argv[1], argv[2], argv[3]);
394 }
395}
396
397/*
398 * Hangup is a required post-umount. ocfs2-tools software expects the
399 * filesystem to call "ocfs2_hb_ctl" during unmount. This happens
400 * regardless of whether the DLM got started, so we can't do it
401 * in ocfs2_cluster_disconnect(). The ocfs2_leave_group() function does
402 * the actual work.
403 */
365void ocfs2_cluster_hangup(const char *group, int grouplen) 404void ocfs2_cluster_hangup(const char *group, int grouplen)
366{ 405{
367 BUG_ON(group == NULL); 406 BUG_ON(group == NULL);
368 BUG_ON(group[grouplen] != '\0'); 407 BUG_ON(group[grouplen] != '\0');
369 408
370 if (active_stack->sp_ops->hangup) 409 ocfs2_leave_group(group);
371 active_stack->sp_ops->hangup(group, grouplen);
372 410
373 /* cluster_disconnect() was called with hangup_pending==1 */ 411 /* cluster_disconnect() was called with hangup_pending==1 */
374 ocfs2_stack_driver_put(); 412 ocfs2_stack_driver_put();
@@ -548,10 +586,83 @@ error:
548 return ret; 586 return ret;
549} 587}
550 588
589/*
590 * Sysctl bits
591 *
592 * The sysctl lives at /proc/sys/fs/ocfs2/nm/hb_ctl_path. The 'nm' doesn't
593 * make as much sense in a multiple cluster stack world, but it's safer
594 * and easier to preserve the name.
595 */
596
597#define FS_OCFS2_NM 1
598
599static ctl_table ocfs2_nm_table[] = {
600 {
601 .ctl_name = 1,
602 .procname = "hb_ctl_path",
603 .data = ocfs2_hb_ctl_path,
604 .maxlen = OCFS2_MAX_HB_CTL_PATH,
605 .mode = 0644,
606 .proc_handler = &proc_dostring,
607 .strategy = &sysctl_string,
608 },
609 { .ctl_name = 0 }
610};
611
612static ctl_table ocfs2_mod_table[] = {
613 {
614 .ctl_name = FS_OCFS2_NM,
615 .procname = "nm",
616 .data = NULL,
617 .maxlen = 0,
618 .mode = 0555,
619 .child = ocfs2_nm_table
620 },
621 { .ctl_name = 0}
622};
623
624static ctl_table ocfs2_kern_table[] = {
625 {
626 .ctl_name = FS_OCFS2,
627 .procname = "ocfs2",
628 .data = NULL,
629 .maxlen = 0,
630 .mode = 0555,
631 .child = ocfs2_mod_table
632 },
633 { .ctl_name = 0}
634};
635
636static ctl_table ocfs2_root_table[] = {
637 {
638 .ctl_name = CTL_FS,
639 .procname = "fs",
640 .data = NULL,
641 .maxlen = 0,
642 .mode = 0555,
643 .child = ocfs2_kern_table
644 },
645 { .ctl_name = 0 }
646};
647
648static struct ctl_table_header *ocfs2_table_header = NULL;
649
650
651/*
652 * Initialization
653 */
654
551static int __init ocfs2_stack_glue_init(void) 655static int __init ocfs2_stack_glue_init(void)
552{ 656{
553 strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); 657 strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB);
554 658
659 ocfs2_table_header = register_sysctl_table(ocfs2_root_table);
660 if (!ocfs2_table_header) {
661 printk(KERN_ERR
662 "ocfs2 stack glue: unable to register sysctl\n");
663 return -ENOMEM; /* or something. */
664 }
665
555 return ocfs2_sysfs_init(); 666 return ocfs2_sysfs_init();
556} 667}
557 668
@@ -559,6 +670,8 @@ static void __exit ocfs2_stack_glue_exit(void)
559{ 670{
560 lproto = NULL; 671 lproto = NULL;
561 ocfs2_sysfs_exit(); 672 ocfs2_sysfs_exit();
673 if (ocfs2_table_header)
674 unregister_sysctl_table(ocfs2_table_header);
562} 675}
563 676
564MODULE_AUTHOR("Oracle"); 677MODULE_AUTHOR("Oracle");
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index 005e4f170e0f..db56281dd1be 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -134,22 +134,10 @@ struct ocfs2_stack_operations {
134 * be freed. Thus, a stack must not return from ->disconnect() 134 * be freed. Thus, a stack must not return from ->disconnect()
135 * until it will no longer reference the conn pointer. 135 * until it will no longer reference the conn pointer.
136 * 136 *
137 * If hangup_pending is zero, ocfs2_cluster_disconnect() will also 137 * Once this call returns, the stack glue will be dropping this
138 * be dropping the reference on the module. 138 * connection's reference on the module.
139 */ 139 */
140 int (*disconnect)(struct ocfs2_cluster_connection *conn, 140 int (*disconnect)(struct ocfs2_cluster_connection *conn);
141 int hangup_pending);
142
143 /*
144 * ocfs2_cluster_hangup() exists for compatibility with older
145 * ocfs2 tools. Only the classic stack really needs it. As such
146 * ->hangup() is not required of all stacks. See the comment by
147 * ocfs2_cluster_hangup() for more details.
148 *
149 * Note that ocfs2_cluster_hangup() can only be called if
150 * hangup_pending was passed to ocfs2_cluster_disconnect().
151 */
152 void (*hangup)(const char *group, int grouplen);
153 141
154 /* 142 /*
155 * ->this_node() returns the cluster's unique identifier for the 143 * ->this_node() returns the cluster's unique identifier for the
@@ -258,4 +246,5 @@ void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto)
258/* Used by stack plugins */ 246/* Used by stack plugins */
259int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); 247int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
260void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); 248void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
249
261#endif /* STACKGLUE_H */ 250#endif /* STACKGLUE_H */
diff --git a/fs/open.c b/fs/open.c
index a1450086e92f..a99ad09c3197 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -16,6 +16,7 @@
16#include <linux/namei.h> 16#include <linux/namei.h>
17#include <linux/backing-dev.h> 17#include <linux/backing-dev.h>
18#include <linux/capability.h> 18#include <linux/capability.h>
19#include <linux/securebits.h>
19#include <linux/security.h> 20#include <linux/security.h>
20#include <linux/mount.h> 21#include <linux/mount.h>
21#include <linux/vfs.h> 22#include <linux/vfs.h>
@@ -425,7 +426,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
425{ 426{
426 struct nameidata nd; 427 struct nameidata nd;
427 int old_fsuid, old_fsgid; 428 int old_fsuid, old_fsgid;
428 kernel_cap_t old_cap; 429 kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */
429 int res; 430 int res;
430 431
431 if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ 432 if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
@@ -433,23 +434,27 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
433 434
434 old_fsuid = current->fsuid; 435 old_fsuid = current->fsuid;
435 old_fsgid = current->fsgid; 436 old_fsgid = current->fsgid;
436 old_cap = current->cap_effective;
437 437
438 current->fsuid = current->uid; 438 current->fsuid = current->uid;
439 current->fsgid = current->gid; 439 current->fsgid = current->gid;
440 440
441 /* 441 if (!issecure(SECURE_NO_SETUID_FIXUP)) {
442 * Clear the capabilities if we switch to a non-root user 442 /*
443 * 443 * Clear the capabilities if we switch to a non-root user
444 * FIXME: There is a race here against sys_capset. The 444 */
445 * capabilities can change yet we will restore the old 445#ifndef CONFIG_SECURITY_FILE_CAPABILITIES
446 * value below. We should hold task_capabilities_lock, 446 /*
447 * but we cannot because user_path_walk can sleep. 447 * FIXME: There is a race here against sys_capset. The
448 */ 448 * capabilities can change yet we will restore the old
449 if (current->uid) 449 * value below. We should hold task_capabilities_lock,
450 cap_clear(current->cap_effective); 450 * but we cannot because user_path_walk can sleep.
451 else 451 */
452 current->cap_effective = current->cap_permitted; 452#endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */
453 if (current->uid)
454 old_cap = cap_set_effective(__cap_empty_set);
455 else
456 old_cap = cap_set_effective(current->cap_permitted);
457 }
453 458
454 res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); 459 res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
455 if (res) 460 if (res)
@@ -478,7 +483,9 @@ out_path_release:
478out: 483out:
479 current->fsuid = old_fsuid; 484 current->fsuid = old_fsuid;
480 current->fsgid = old_fsgid; 485 current->fsgid = old_fsgid;
481 current->cap_effective = old_cap; 486
487 if (!issecure(SECURE_NO_SETUID_FIXUP))
488 cap_set_effective(old_cap);
482 489
483 return res; 490 return res;
484} 491}
diff --git a/fs/pipe.c b/fs/pipe.c
index ec228bc9f882..700f4e0d9572 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1003,8 +1003,7 @@ struct file *create_write_pipe(void)
1003void free_write_pipe(struct file *f) 1003void free_write_pipe(struct file *f)
1004{ 1004{
1005 free_pipe_info(f->f_dentry->d_inode); 1005 free_pipe_info(f->f_dentry->d_inode);
1006 dput(f->f_path.dentry); 1006 path_put(&f->f_path);
1007 mntput(f->f_path.mnt);
1008 put_filp(f); 1007 put_filp(f);
1009} 1008}
1010 1009
@@ -1015,8 +1014,8 @@ struct file *create_read_pipe(struct file *wrf)
1015 return ERR_PTR(-ENFILE); 1014 return ERR_PTR(-ENFILE);
1016 1015
1017 /* Grab pipe from the writer */ 1016 /* Grab pipe from the writer */
1018 f->f_path.mnt = mntget(wrf->f_path.mnt); 1017 f->f_path = wrf->f_path;
1019 f->f_path.dentry = dget(wrf->f_path.dentry); 1018 path_get(&wrf->f_path);
1020 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping; 1019 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
1021 1020
1022 f->f_pos = 0; 1021 f->f_pos = 0;
@@ -1068,8 +1067,7 @@ int do_pipe(int *fd)
1068 err_fdr: 1067 err_fdr:
1069 put_unused_fd(fdr); 1068 put_unused_fd(fdr);
1070 err_read_pipe: 1069 err_read_pipe:
1071 dput(fr->f_dentry); 1070 path_put(&fr->f_path);
1072 mntput(fr->f_vfsmnt);
1073 put_filp(fr); 1071 put_filp(fr);
1074 err_write_pipe: 1072 err_write_pipe:
1075 free_write_pipe(fw); 1073 free_write_pipe(fw);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 9e3b8c33c24b..797d775e0354 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -288,7 +288,7 @@ static void render_cap_t(struct seq_file *m, const char *header,
288 seq_printf(m, "%s", header); 288 seq_printf(m, "%s", header);
289 CAP_FOR_EACH_U32(__capi) { 289 CAP_FOR_EACH_U32(__capi) {
290 seq_printf(m, "%08x", 290 seq_printf(m, "%08x",
291 a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]); 291 a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
292 } 292 }
293 seq_printf(m, "\n"); 293 seq_printf(m, "\n");
294} 294}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c447e0743a3c..3b455371e7ff 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -127,6 +127,25 @@ struct pid_entry {
127 NULL, &proc_single_file_operations, \ 127 NULL, &proc_single_file_operations, \
128 { .proc_show = &proc_##OTYPE } ) 128 { .proc_show = &proc_##OTYPE } )
129 129
130/*
131 * Count the number of hardlinks for the pid_entry table, excluding the .
132 * and .. links.
133 */
134static unsigned int pid_entry_count_dirs(const struct pid_entry *entries,
135 unsigned int n)
136{
137 unsigned int i;
138 unsigned int count;
139
140 count = 0;
141 for (i = 0; i < n; ++i) {
142 if (S_ISDIR(entries[i].mode))
143 ++count;
144 }
145
146 return count;
147}
148
130int maps_protect; 149int maps_protect;
131EXPORT_SYMBOL(maps_protect); 150EXPORT_SYMBOL(maps_protect);
132 151
@@ -2585,10 +2604,9 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
2585 inode->i_op = &proc_tgid_base_inode_operations; 2604 inode->i_op = &proc_tgid_base_inode_operations;
2586 inode->i_fop = &proc_tgid_base_operations; 2605 inode->i_fop = &proc_tgid_base_operations;
2587 inode->i_flags|=S_IMMUTABLE; 2606 inode->i_flags|=S_IMMUTABLE;
2588 inode->i_nlink = 5; 2607
2589#ifdef CONFIG_SECURITY 2608 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
2590 inode->i_nlink += 1; 2609 ARRAY_SIZE(tgid_base_stuff));
2591#endif
2592 2610
2593 dentry->d_op = &pid_dentry_operations; 2611 dentry->d_op = &pid_dentry_operations;
2594 2612
@@ -2816,10 +2834,9 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
2816 inode->i_op = &proc_tid_base_inode_operations; 2834 inode->i_op = &proc_tid_base_inode_operations;
2817 inode->i_fop = &proc_tid_base_operations; 2835 inode->i_fop = &proc_tid_base_operations;
2818 inode->i_flags|=S_IMMUTABLE; 2836 inode->i_flags|=S_IMMUTABLE;
2819 inode->i_nlink = 4; 2837
2820#ifdef CONFIG_SECURITY 2838 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff,
2821 inode->i_nlink += 1; 2839 ARRAY_SIZE(tid_base_stuff));
2822#endif
2823 2840
2824 dentry->d_op = &pid_dentry_operations; 2841 dentry->d_op = &pid_dentry_operations;
2825 2842
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 6f4e8dc97da1..b08d10017911 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -425,7 +425,8 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
425 } 425 }
426 } 426 }
427 unlock_new_inode(inode); 427 unlock_new_inode(inode);
428 } 428 } else
429 module_put(de->owner);
429 return inode; 430 return inode;
430 431
431out_ino: 432out_ino:
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 74a323d2b850..7e277f2ad466 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -139,7 +139,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
139#define K(x) ((x) << (PAGE_SHIFT - 10)) 139#define K(x) ((x) << (PAGE_SHIFT - 10))
140 si_meminfo(&i); 140 si_meminfo(&i);
141 si_swapinfo(&i); 141 si_swapinfo(&i);
142 committed = atomic_read(&vm_committed_space); 142 committed = atomic_long_read(&vm_committed_space);
143 allowed = ((totalram_pages - hugetlb_total_pages()) 143 allowed = ((totalram_pages - hugetlb_total_pages())
144 * sysctl_overcommit_ratio / 100) + total_swap_pages; 144 * sysctl_overcommit_ratio / 100) + total_swap_pages;
145 145
@@ -716,7 +716,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
716 pfn = src / KPMSIZE; 716 pfn = src / KPMSIZE;
717 count = min_t(size_t, count, (max_pfn * KPMSIZE) - src); 717 count = min_t(size_t, count, (max_pfn * KPMSIZE) - src);
718 if (src & KPMMASK || count & KPMMASK) 718 if (src & KPMMASK || count & KPMMASK)
719 return -EIO; 719 return -EINVAL;
720 720
721 while (count > 0) { 721 while (count > 0) {
722 ppage = NULL; 722 ppage = NULL;
@@ -726,7 +726,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
726 if (!ppage) 726 if (!ppage)
727 pcount = 0; 727 pcount = 0;
728 else 728 else
729 pcount = atomic_read(&ppage->_count); 729 pcount = page_mapcount(ppage);
730 730
731 if (put_user(pcount, out++)) { 731 if (put_user(pcount, out++)) {
732 ret = -EFAULT; 732 ret = -EFAULT;
@@ -782,7 +782,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
782 pfn = src / KPMSIZE; 782 pfn = src / KPMSIZE;
783 count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); 783 count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src);
784 if (src & KPMMASK || count & KPMMASK) 784 if (src & KPMMASK || count & KPMMASK)
785 return -EIO; 785 return -EINVAL;
786 786
787 while (count > 0) { 787 while (count > 0) {
788 ppage = NULL; 788 ppage = NULL;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 88717c0f941b..c492449f3b45 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -315,9 +315,9 @@ struct mem_size_stats {
315}; 315};
316 316
317static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 317static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
318 void *private) 318 struct mm_walk *walk)
319{ 319{
320 struct mem_size_stats *mss = private; 320 struct mem_size_stats *mss = walk->private;
321 struct vm_area_struct *vma = mss->vma; 321 struct vm_area_struct *vma = mss->vma;
322 pte_t *pte, ptent; 322 pte_t *pte, ptent;
323 spinlock_t *ptl; 323 spinlock_t *ptl;
@@ -365,19 +365,21 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
365 return 0; 365 return 0;
366} 366}
367 367
368static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range };
369
370static int show_smap(struct seq_file *m, void *v) 368static int show_smap(struct seq_file *m, void *v)
371{ 369{
372 struct vm_area_struct *vma = v; 370 struct vm_area_struct *vma = v;
373 struct mem_size_stats mss; 371 struct mem_size_stats mss;
374 int ret; 372 int ret;
373 struct mm_walk smaps_walk = {
374 .pmd_entry = smaps_pte_range,
375 .mm = vma->vm_mm,
376 .private = &mss,
377 };
375 378
376 memset(&mss, 0, sizeof mss); 379 memset(&mss, 0, sizeof mss);
377 mss.vma = vma; 380 mss.vma = vma;
378 if (vma->vm_mm && !is_vm_hugetlb_page(vma)) 381 if (vma->vm_mm && !is_vm_hugetlb_page(vma))
379 walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end, 382 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
380 &smaps_walk, &mss);
381 383
382 ret = show_map(m, v); 384 ret = show_map(m, v);
383 if (ret) 385 if (ret)
@@ -426,9 +428,9 @@ const struct file_operations proc_smaps_operations = {
426}; 428};
427 429
428static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, 430static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
429 unsigned long end, void *private) 431 unsigned long end, struct mm_walk *walk)
430{ 432{
431 struct vm_area_struct *vma = private; 433 struct vm_area_struct *vma = walk->private;
432 pte_t *pte, ptent; 434 pte_t *pte, ptent;
433 spinlock_t *ptl; 435 spinlock_t *ptl;
434 struct page *page; 436 struct page *page;
@@ -452,8 +454,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
452 return 0; 454 return 0;
453} 455}
454 456
455static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range };
456
457static ssize_t clear_refs_write(struct file *file, const char __user *buf, 457static ssize_t clear_refs_write(struct file *file, const char __user *buf,
458 size_t count, loff_t *ppos) 458 size_t count, loff_t *ppos)
459{ 459{
@@ -476,11 +476,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
476 return -ESRCH; 476 return -ESRCH;
477 mm = get_task_mm(task); 477 mm = get_task_mm(task);
478 if (mm) { 478 if (mm) {
479 struct mm_walk clear_refs_walk = {
480 .pmd_entry = clear_refs_pte_range,
481 .mm = mm,
482 };
479 down_read(&mm->mmap_sem); 483 down_read(&mm->mmap_sem);
480 for (vma = mm->mmap; vma; vma = vma->vm_next) 484 for (vma = mm->mmap; vma; vma = vma->vm_next) {
485 clear_refs_walk.private = vma;
481 if (!is_vm_hugetlb_page(vma)) 486 if (!is_vm_hugetlb_page(vma))
482 walk_page_range(mm, vma->vm_start, vma->vm_end, 487 walk_page_range(vma->vm_start, vma->vm_end,
483 &clear_refs_walk, vma); 488 &clear_refs_walk);
489 }
484 flush_tlb_mm(mm); 490 flush_tlb_mm(mm);
485 up_read(&mm->mmap_sem); 491 up_read(&mm->mmap_sem);
486 mmput(mm); 492 mmput(mm);
@@ -496,7 +502,7 @@ const struct file_operations proc_clear_refs_operations = {
496}; 502};
497 503
498struct pagemapread { 504struct pagemapread {
499 char __user *out, *end; 505 u64 __user *out, *end;
500}; 506};
501 507
502#define PM_ENTRY_BYTES sizeof(u64) 508#define PM_ENTRY_BYTES sizeof(u64)
@@ -519,28 +525,18 @@ struct pagemapread {
519static int add_to_pagemap(unsigned long addr, u64 pfn, 525static int add_to_pagemap(unsigned long addr, u64 pfn,
520 struct pagemapread *pm) 526 struct pagemapread *pm)
521{ 527{
522 /*
523 * Make sure there's room in the buffer for an
524 * entire entry. Otherwise, only copy part of
525 * the pfn.
526 */
527 if (pm->out + PM_ENTRY_BYTES >= pm->end) {
528 if (copy_to_user(pm->out, &pfn, pm->end - pm->out))
529 return -EFAULT;
530 pm->out = pm->end;
531 return PM_END_OF_BUFFER;
532 }
533
534 if (put_user(pfn, pm->out)) 528 if (put_user(pfn, pm->out))
535 return -EFAULT; 529 return -EFAULT;
536 pm->out += PM_ENTRY_BYTES; 530 pm->out++;
531 if (pm->out >= pm->end)
532 return PM_END_OF_BUFFER;
537 return 0; 533 return 0;
538} 534}
539 535
540static int pagemap_pte_hole(unsigned long start, unsigned long end, 536static int pagemap_pte_hole(unsigned long start, unsigned long end,
541 void *private) 537 struct mm_walk *walk)
542{ 538{
543 struct pagemapread *pm = private; 539 struct pagemapread *pm = walk->private;
544 unsigned long addr; 540 unsigned long addr;
545 int err = 0; 541 int err = 0;
546 for (addr = start; addr < end; addr += PAGE_SIZE) { 542 for (addr = start; addr < end; addr += PAGE_SIZE) {
@@ -557,24 +553,45 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte)
557 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); 553 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
558} 554}
559 555
556static unsigned long pte_to_pagemap_entry(pte_t pte)
557{
558 unsigned long pme = 0;
559 if (is_swap_pte(pte))
560 pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte))
561 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP;
562 else if (pte_present(pte))
563 pme = PM_PFRAME(pte_pfn(pte))
564 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
565 return pme;
566}
567
560static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 568static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
561 void *private) 569 struct mm_walk *walk)
562{ 570{
563 struct pagemapread *pm = private; 571 struct vm_area_struct *vma;
572 struct pagemapread *pm = walk->private;
564 pte_t *pte; 573 pte_t *pte;
565 int err = 0; 574 int err = 0;
566 575
576 /* find the first VMA at or above 'addr' */
577 vma = find_vma(walk->mm, addr);
567 for (; addr != end; addr += PAGE_SIZE) { 578 for (; addr != end; addr += PAGE_SIZE) {
568 u64 pfn = PM_NOT_PRESENT; 579 u64 pfn = PM_NOT_PRESENT;
569 pte = pte_offset_map(pmd, addr); 580
570 if (is_swap_pte(*pte)) 581 /* check to see if we've left 'vma' behind
571 pfn = PM_PFRAME(swap_pte_to_pagemap_entry(*pte)) 582 * and need a new, higher one */
572 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; 583 if (vma && (addr >= vma->vm_end))
573 else if (pte_present(*pte)) 584 vma = find_vma(walk->mm, addr);
574 pfn = PM_PFRAME(pte_pfn(*pte)) 585
575 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; 586 /* check that 'vma' actually covers this address,
576 /* unmap so we're not in atomic when we copy to userspace */ 587 * and that it isn't a huge page vma */
577 pte_unmap(pte); 588 if (vma && (vma->vm_start <= addr) &&
589 !is_vm_hugetlb_page(vma)) {
590 pte = pte_offset_map(pmd, addr);
591 pfn = pte_to_pagemap_entry(*pte);
592 /* unmap before userspace copy */
593 pte_unmap(pte);
594 }
578 err = add_to_pagemap(addr, pfn, pm); 595 err = add_to_pagemap(addr, pfn, pm);
579 if (err) 596 if (err)
580 return err; 597 return err;
@@ -585,11 +602,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
585 return err; 602 return err;
586} 603}
587 604
588static struct mm_walk pagemap_walk = {
589 .pmd_entry = pagemap_pte_range,
590 .pte_hole = pagemap_pte_hole
591};
592
593/* 605/*
594 * /proc/pid/pagemap - an array mapping virtual pages to pfns 606 * /proc/pid/pagemap - an array mapping virtual pages to pfns
595 * 607 *
@@ -624,6 +636,11 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
624 struct pagemapread pm; 636 struct pagemapread pm;
625 int pagecount; 637 int pagecount;
626 int ret = -ESRCH; 638 int ret = -ESRCH;
639 struct mm_walk pagemap_walk;
640 unsigned long src;
641 unsigned long svpfn;
642 unsigned long start_vaddr;
643 unsigned long end_vaddr;
627 644
628 if (!task) 645 if (!task)
629 goto out; 646 goto out;
@@ -634,7 +651,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
634 651
635 ret = -EINVAL; 652 ret = -EINVAL;
636 /* file position must be aligned */ 653 /* file position must be aligned */
637 if (*ppos % PM_ENTRY_BYTES) 654 if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
638 goto out_task; 655 goto out_task;
639 656
640 ret = 0; 657 ret = 0;
@@ -642,11 +659,15 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
642 if (!mm) 659 if (!mm)
643 goto out_task; 660 goto out_task;
644 661
645 ret = -ENOMEM; 662
646 uaddr = (unsigned long)buf & PAGE_MASK; 663 uaddr = (unsigned long)buf & PAGE_MASK;
647 uend = (unsigned long)(buf + count); 664 uend = (unsigned long)(buf + count);
648 pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; 665 pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
649 pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); 666 ret = 0;
667 if (pagecount == 0)
668 goto out_mm;
669 pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
670 ret = -ENOMEM;
650 if (!pages) 671 if (!pages)
651 goto out_mm; 672 goto out_mm;
652 673
@@ -664,36 +685,36 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
664 goto out_pages; 685 goto out_pages;
665 } 686 }
666 687
667 pm.out = buf; 688 pm.out = (u64 *)buf;
668 pm.end = buf + count; 689 pm.end = (u64 *)(buf + count);
669 690
670 if (!ptrace_may_attach(task)) { 691 pagemap_walk.pmd_entry = pagemap_pte_range;
671 ret = -EIO; 692 pagemap_walk.pte_hole = pagemap_pte_hole;
672 } else { 693 pagemap_walk.mm = mm;
673 unsigned long src = *ppos; 694 pagemap_walk.private = &pm;
674 unsigned long svpfn = src / PM_ENTRY_BYTES; 695
675 unsigned long start_vaddr = svpfn << PAGE_SHIFT; 696 src = *ppos;
676 unsigned long end_vaddr = TASK_SIZE_OF(task); 697 svpfn = src / PM_ENTRY_BYTES;
677 698 start_vaddr = svpfn << PAGE_SHIFT;
678 /* watch out for wraparound */ 699 end_vaddr = TASK_SIZE_OF(task);
679 if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) 700
680 start_vaddr = end_vaddr; 701 /* watch out for wraparound */
681 702 if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
682 /* 703 start_vaddr = end_vaddr;
683 * The odds are that this will stop walking way 704
684 * before end_vaddr, because the length of the 705 /*
685 * user buffer is tracked in "pm", and the walk 706 * The odds are that this will stop walking way
686 * will stop when we hit the end of the buffer. 707 * before end_vaddr, because the length of the
687 */ 708 * user buffer is tracked in "pm", and the walk
688 ret = walk_page_range(mm, start_vaddr, end_vaddr, 709 * will stop when we hit the end of the buffer.
689 &pagemap_walk, &pm); 710 */
690 if (ret == PM_END_OF_BUFFER) 711 ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk);
691 ret = 0; 712 if (ret == PM_END_OF_BUFFER)
692 /* don't need mmap_sem for these, but this looks cleaner */ 713 ret = 0;
693 *ppos += pm.out - buf; 714 /* don't need mmap_sem for these, but this looks cleaner */
694 if (!ret) 715 *ppos += (char *)pm.out - buf;
695 ret = pm.out - buf; 716 if (!ret)
696 } 717 ret = (char *)pm.out - buf;
697 718
698out_pages: 719out_pages:
699 for (; pagecount; pagecount--) { 720 for (; pagecount; pagecount--) {
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index ed424d708e69..1d40f2bd1970 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2165,8 +2165,10 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2165 blk++; 2165 blk++;
2166 } 2166 }
2167out: 2167out:
2168 if (len == towrite) 2168 if (len == towrite) {
2169 mutex_unlock(&inode->i_mutex);
2169 return err; 2170 return err;
2171 }
2170 if (inode->i_size < off + len - towrite) 2172 if (inode->i_size < off + len - towrite)
2171 i_size_write(inode, off + len - towrite); 2173 i_size_write(inode, off + len - towrite);
2172 inode->i_version++; 2174 inode->i_version++;
diff --git a/fs/select.c b/fs/select.c
index 8dda969614a9..da0e88201c3a 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -249,7 +249,6 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
249 retval++; 249 retval++;
250 } 250 }
251 } 251 }
252 cond_resched();
253 } 252 }
254 if (res_in) 253 if (res_in)
255 *rinp = res_in; 254 *rinp = res_in;
@@ -257,6 +256,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
257 *routp = res_out; 256 *routp = res_out;
258 if (res_ex) 257 if (res_ex)
259 *rexp = res_ex; 258 *rexp = res_ex;
259 cond_resched();
260 } 260 }
261 wait = NULL; 261 wait = NULL;
262 if (retval || !*timeout || signal_pending(current)) 262 if (retval || !*timeout || signal_pending(current))
diff --git a/fs/splice.c b/fs/splice.c
index 78150038b584..aa5f6f60b305 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -58,8 +58,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
58 */ 58 */
59 wait_on_page_writeback(page); 59 wait_on_page_writeback(page);
60 60
61 if (PagePrivate(page)) 61 if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
62 try_to_release_page(page, GFP_KERNEL); 62 goto out_unlock;
63 63
64 /* 64 /*
65 * If we succeeded in removing the mapping, set LRU flag 65 * If we succeeded in removing the mapping, set LRU flag
@@ -75,6 +75,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
75 * Raced with truncate or failed to remove page from current 75 * Raced with truncate or failed to remove page from current
76 * address space, unlock and return failure. 76 * address space, unlock and return failure.
77 */ 77 */
78out_unlock:
78 unlock_page(page); 79 unlock_page(page);
79 return 1; 80 return 1;
80} 81}
@@ -983,7 +984,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
983 984
984 while (len) { 985 while (len) {
985 size_t read_len; 986 size_t read_len;
986 loff_t pos = sd->pos; 987 loff_t pos = sd->pos, prev_pos = pos;
987 988
988 ret = do_splice_to(in, &pos, pipe, len, flags); 989 ret = do_splice_to(in, &pos, pipe, len, flags);
989 if (unlikely(ret <= 0)) 990 if (unlikely(ret <= 0))
@@ -998,15 +999,19 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
998 * could get stuck data in the internal pipe: 999 * could get stuck data in the internal pipe:
999 */ 1000 */
1000 ret = actor(pipe, sd); 1001 ret = actor(pipe, sd);
1001 if (unlikely(ret <= 0)) 1002 if (unlikely(ret <= 0)) {
1003 sd->pos = prev_pos;
1002 goto out_release; 1004 goto out_release;
1005 }
1003 1006
1004 bytes += ret; 1007 bytes += ret;
1005 len -= ret; 1008 len -= ret;
1006 sd->pos = pos; 1009 sd->pos = pos;
1007 1010
1008 if (ret < read_len) 1011 if (ret < read_len) {
1012 sd->pos = prev_pos + ret;
1009 goto out_release; 1013 goto out_release;
1014 }
1010 } 1015 }
1011 1016
1012done: 1017done:
@@ -1072,7 +1077,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1072 1077
1073 ret = splice_direct_to_actor(in, &sd, direct_splice_actor); 1078 ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
1074 if (ret > 0) 1079 if (ret > 0)
1075 *ppos += ret; 1080 *ppos = sd.pos;
1076 1081
1077 return ret; 1082 return ret;
1078} 1083}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 7a5f69be6ac2..44cc702f96cc 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -682,38 +682,26 @@ static int udf_vrs(struct super_block *sb, int silent)
682/* 682/*
683 * Check whether there is an anchor block in the given block 683 * Check whether there is an anchor block in the given block
684 */ 684 */
685static int udf_check_anchor_block(struct super_block *sb, sector_t block, 685static int udf_check_anchor_block(struct super_block *sb, sector_t block)
686 bool varconv)
687{ 686{
688 struct buffer_head *bh = NULL; 687 struct buffer_head *bh;
689 tag *t;
690 uint16_t ident; 688 uint16_t ident;
691 uint32_t location;
692 689
693 if (varconv) { 690 if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) &&
694 if (udf_fixed_to_variable(block) >= 691 udf_fixed_to_variable(block) >=
695 sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits) 692 sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits)
696 return 0; 693 return 0;
697 bh = sb_bread(sb, udf_fixed_to_variable(block));
698 }
699 else
700 bh = sb_bread(sb, block);
701 694
695 bh = udf_read_tagged(sb, block, block, &ident);
702 if (!bh) 696 if (!bh)
703 return 0; 697 return 0;
704
705 t = (tag *)bh->b_data;
706 ident = le16_to_cpu(t->tagIdent);
707 location = le32_to_cpu(t->tagLocation);
708 brelse(bh); 698 brelse(bh);
709 if (ident != TAG_IDENT_AVDP) 699
710 return 0; 700 return ident == TAG_IDENT_AVDP;
711 return location == block;
712} 701}
713 702
714/* Search for an anchor volume descriptor pointer */ 703/* Search for an anchor volume descriptor pointer */
715static sector_t udf_scan_anchors(struct super_block *sb, bool varconv, 704static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock)
716 sector_t lastblock)
717{ 705{
718 sector_t last[6]; 706 sector_t last[6];
719 int i; 707 int i;
@@ -739,7 +727,7 @@ static sector_t udf_scan_anchors(struct super_block *sb, bool varconv,
739 sb->s_blocksize_bits) 727 sb->s_blocksize_bits)
740 continue; 728 continue;
741 729
742 if (udf_check_anchor_block(sb, last[i], varconv)) { 730 if (udf_check_anchor_block(sb, last[i])) {
743 sbi->s_anchor[0] = last[i]; 731 sbi->s_anchor[0] = last[i];
744 sbi->s_anchor[1] = last[i] - 256; 732 sbi->s_anchor[1] = last[i] - 256;
745 return last[i]; 733 return last[i];
@@ -748,17 +736,17 @@ static sector_t udf_scan_anchors(struct super_block *sb, bool varconv,
748 if (last[i] < 256) 736 if (last[i] < 256)
749 continue; 737 continue;
750 738
751 if (udf_check_anchor_block(sb, last[i] - 256, varconv)) { 739 if (udf_check_anchor_block(sb, last[i] - 256)) {
752 sbi->s_anchor[1] = last[i] - 256; 740 sbi->s_anchor[1] = last[i] - 256;
753 return last[i]; 741 return last[i];
754 } 742 }
755 } 743 }
756 744
757 if (udf_check_anchor_block(sb, sbi->s_session + 256, varconv)) { 745 if (udf_check_anchor_block(sb, sbi->s_session + 256)) {
758 sbi->s_anchor[0] = sbi->s_session + 256; 746 sbi->s_anchor[0] = sbi->s_session + 256;
759 return last[0]; 747 return last[0];
760 } 748 }
761 if (udf_check_anchor_block(sb, sbi->s_session + 512, varconv)) { 749 if (udf_check_anchor_block(sb, sbi->s_session + 512)) {
762 sbi->s_anchor[0] = sbi->s_session + 512; 750 sbi->s_anchor[0] = sbi->s_session + 512;
763 return last[0]; 751 return last[0];
764 } 752 }
@@ -780,23 +768,24 @@ static void udf_find_anchor(struct super_block *sb)
780 int i; 768 int i;
781 struct udf_sb_info *sbi = UDF_SB(sb); 769 struct udf_sb_info *sbi = UDF_SB(sb);
782 770
783 lastblock = udf_scan_anchors(sb, 0, sbi->s_last_block); 771 lastblock = udf_scan_anchors(sb, sbi->s_last_block);
784 if (lastblock) 772 if (lastblock)
785 goto check_anchor; 773 goto check_anchor;
786 774
787 /* No anchor found? Try VARCONV conversion of block numbers */ 775 /* No anchor found? Try VARCONV conversion of block numbers */
776 UDF_SET_FLAG(sb, UDF_FLAG_VARCONV);
788 /* Firstly, we try to not convert number of the last block */ 777 /* Firstly, we try to not convert number of the last block */
789 lastblock = udf_scan_anchors(sb, 1, 778 lastblock = udf_scan_anchors(sb,
790 udf_variable_to_fixed(sbi->s_last_block)); 779 udf_variable_to_fixed(sbi->s_last_block));
791 if (lastblock) { 780 if (lastblock)
792 UDF_SET_FLAG(sb, UDF_FLAG_VARCONV);
793 goto check_anchor; 781 goto check_anchor;
794 }
795 782
796 /* Secondly, we try with converted number of the last block */ 783 /* Secondly, we try with converted number of the last block */
797 lastblock = udf_scan_anchors(sb, 1, sbi->s_last_block); 784 lastblock = udf_scan_anchors(sb, sbi->s_last_block);
798 if (lastblock) 785 if (!lastblock) {
799 UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); 786 /* VARCONV didn't help. Clear it. */
787 UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV);
788 }
800 789
801check_anchor: 790check_anchor:
802 /* 791 /*
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 8fa9c2d70911..8ec865de5f13 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -16,7 +16,7 @@
16#define UDF_PREALLOCATE 16#define UDF_PREALLOCATE
17#define UDF_DEFAULT_PREALLOC_BLOCKS 8 17#define UDF_DEFAULT_PREALLOC_BLOCKS 8
18 18
19#define UDFFS_DEBUG 19#undef UDFFS_DEBUG
20 20
21#ifdef UDFFS_DEBUG 21#ifdef UDFFS_DEBUG
22#define udf_debug(f, a...) \ 22#define udf_debug(f, a...) \
diff --git a/fs/utimes.c b/fs/utimes.c
index af059d5cb485..b6b664e7145e 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -40,14 +40,9 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times)
40 40
41#endif 41#endif
42 42
43static bool nsec_special(long nsec)
44{
45 return nsec == UTIME_OMIT || nsec == UTIME_NOW;
46}
47
48static bool nsec_valid(long nsec) 43static bool nsec_valid(long nsec)
49{ 44{
50 if (nsec_special(nsec)) 45 if (nsec == UTIME_OMIT || nsec == UTIME_NOW)
51 return true; 46 return true;
52 47
53 return nsec >= 0 && nsec <= 999999999; 48 return nsec >= 0 && nsec <= 999999999;
@@ -102,7 +97,11 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
102 if (error) 97 if (error)
103 goto dput_and_out; 98 goto dput_and_out;
104 99
105 /* Don't worry, the checks are done in inode_change_ok() */ 100 if (times && times[0].tv_nsec == UTIME_NOW &&
101 times[1].tv_nsec == UTIME_NOW)
102 times = NULL;
103
104 /* In most cases, the checks are done in inode_change_ok() */
106 newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; 105 newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
107 if (times) { 106 if (times) {
108 error = -EPERM; 107 error = -EPERM;
@@ -124,28 +123,34 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
124 newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; 123 newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
125 newattrs.ia_valid |= ATTR_MTIME_SET; 124 newattrs.ia_valid |= ATTR_MTIME_SET;
126 } 125 }
127 }
128 126
129 /* 127 /*
130 * If times is NULL or both times are either UTIME_OMIT or 128 * For the UTIME_OMIT/UTIME_NOW and UTIME_NOW/UTIME_OMIT
131 * UTIME_NOW, then need to check permissions, because 129 * cases, we need to make an extra check that is not done by
132 * inode_change_ok() won't do it. 130 * inode_change_ok().
133 */ 131 */
134 if (!times || (nsec_special(times[0].tv_nsec) && 132 if (((times[0].tv_nsec == UTIME_NOW &&
135 nsec_special(times[1].tv_nsec))) { 133 times[1].tv_nsec == UTIME_OMIT)
134 ||
135 (times[0].tv_nsec == UTIME_OMIT &&
136 times[1].tv_nsec == UTIME_NOW))
137 && !is_owner_or_cap(inode))
138 goto mnt_drop_write_and_out;
139 } else {
140
141 /*
142 * If times is NULL (or both times are UTIME_NOW),
143 * then we need to check permissions, because
144 * inode_change_ok() won't do it.
145 */
136 error = -EACCES; 146 error = -EACCES;
137 if (IS_IMMUTABLE(inode)) 147 if (IS_IMMUTABLE(inode))
138 goto mnt_drop_write_and_out; 148 goto mnt_drop_write_and_out;
139 149
140 if (!is_owner_or_cap(inode)) { 150 if (!is_owner_or_cap(inode)) {
141 if (f) { 151 error = permission(inode, MAY_WRITE, NULL);
142 if (!(f->f_mode & FMODE_WRITE)) 152 if (error)
143 goto mnt_drop_write_and_out; 153 goto mnt_drop_write_and_out;
144 } else {
145 error = vfs_permission(&nd, MAY_WRITE);
146 if (error)
147 goto mnt_drop_write_and_out;
148 }
149 } 154 }
150 } 155 }
151 mutex_lock(&inode->i_mutex); 156 mutex_lock(&inode->i_mutex);
@@ -169,14 +174,6 @@ asmlinkage long sys_utimensat(int dfd, char __user *filename, struct timespec __
169 if (utimes) { 174 if (utimes) {
170 if (copy_from_user(&tstimes, utimes, sizeof(tstimes))) 175 if (copy_from_user(&tstimes, utimes, sizeof(tstimes)))
171 return -EFAULT; 176 return -EFAULT;
172 if ((tstimes[0].tv_nsec == UTIME_OMIT ||
173 tstimes[0].tv_nsec == UTIME_NOW) &&
174 tstimes[0].tv_sec != 0)
175 return -EINVAL;
176 if ((tstimes[1].tv_nsec == UTIME_OMIT ||
177 tstimes[1].tv_nsec == UTIME_NOW) &&
178 tstimes[1].tv_sec != 0)
179 return -EINVAL;
180 177
181 /* Nothing to do, we must not even check the path. */ 178 /* Nothing to do, we must not even check the path. */
182 if (tstimes[0].tv_nsec == UTIME_OMIT && 179 if (tstimes[0].tv_nsec == UTIME_OMIT &&
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 5105015a75ad..98e0e86093b4 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -387,6 +387,8 @@ _xfs_buf_lookup_pages(
387 if (unlikely(page == NULL)) { 387 if (unlikely(page == NULL)) {
388 if (flags & XBF_READ_AHEAD) { 388 if (flags & XBF_READ_AHEAD) {
389 bp->b_page_count = i; 389 bp->b_page_count = i;
390 for (i = 0; i < bp->b_page_count; i++)
391 unlock_page(bp->b_pages[i]);
390 return -ENOMEM; 392 return -ENOMEM;
391 } 393 }
392 394
@@ -416,17 +418,24 @@ _xfs_buf_lookup_pages(
416 ASSERT(!PagePrivate(page)); 418 ASSERT(!PagePrivate(page));
417 if (!PageUptodate(page)) { 419 if (!PageUptodate(page)) {
418 page_count--; 420 page_count--;
419 if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) { 421 if (blocksize >= PAGE_CACHE_SIZE) {
422 if (flags & XBF_READ)
423 bp->b_flags |= _XBF_PAGE_LOCKED;
424 } else if (!PagePrivate(page)) {
420 if (test_page_region(page, offset, nbytes)) 425 if (test_page_region(page, offset, nbytes))
421 page_count++; 426 page_count++;
422 } 427 }
423 } 428 }
424 429
425 unlock_page(page);
426 bp->b_pages[i] = page; 430 bp->b_pages[i] = page;
427 offset = 0; 431 offset = 0;
428 } 432 }
429 433
434 if (!(bp->b_flags & _XBF_PAGE_LOCKED)) {
435 for (i = 0; i < bp->b_page_count; i++)
436 unlock_page(bp->b_pages[i]);
437 }
438
430 if (page_count == bp->b_page_count) 439 if (page_count == bp->b_page_count)
431 bp->b_flags |= XBF_DONE; 440 bp->b_flags |= XBF_DONE;
432 441
@@ -746,6 +755,7 @@ xfs_buf_associate_memory(
746 bp->b_count_desired = len; 755 bp->b_count_desired = len;
747 bp->b_buffer_length = buflen; 756 bp->b_buffer_length = buflen;
748 bp->b_flags |= XBF_MAPPED; 757 bp->b_flags |= XBF_MAPPED;
758 bp->b_flags &= ~_XBF_PAGE_LOCKED;
749 759
750 return 0; 760 return 0;
751} 761}
@@ -1093,8 +1103,10 @@ _xfs_buf_ioend(
1093 xfs_buf_t *bp, 1103 xfs_buf_t *bp,
1094 int schedule) 1104 int schedule)
1095{ 1105{
1096 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) 1106 if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
1107 bp->b_flags &= ~_XBF_PAGE_LOCKED;
1097 xfs_buf_ioend(bp, schedule); 1108 xfs_buf_ioend(bp, schedule);
1109 }
1098} 1110}
1099 1111
1100STATIC void 1112STATIC void
@@ -1125,6 +1137,9 @@ xfs_buf_bio_end_io(
1125 1137
1126 if (--bvec >= bio->bi_io_vec) 1138 if (--bvec >= bio->bi_io_vec)
1127 prefetchw(&bvec->bv_page->flags); 1139 prefetchw(&bvec->bv_page->flags);
1140
1141 if (bp->b_flags & _XBF_PAGE_LOCKED)
1142 unlock_page(page);
1128 } while (bvec >= bio->bi_io_vec); 1143 } while (bvec >= bio->bi_io_vec);
1129 1144
1130 _xfs_buf_ioend(bp, 1); 1145 _xfs_buf_ioend(bp, 1);
@@ -1163,7 +1178,8 @@ _xfs_buf_ioapply(
1163 * filesystem block size is not smaller than the page size. 1178 * filesystem block size is not smaller than the page size.
1164 */ 1179 */
1165 if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && 1180 if ((bp->b_buffer_length < PAGE_CACHE_SIZE) &&
1166 (bp->b_flags & XBF_READ) && 1181 ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) ==
1182 (XBF_READ|_XBF_PAGE_LOCKED)) &&
1167 (blocksize >= PAGE_CACHE_SIZE)) { 1183 (blocksize >= PAGE_CACHE_SIZE)) {
1168 bio = bio_alloc(GFP_NOIO, 1); 1184 bio = bio_alloc(GFP_NOIO, 1);
1169 1185
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 841d7883528d..f948ec7ba9a4 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -66,6 +66,25 @@ typedef enum {
66 _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ 66 _XBF_PAGES = (1 << 18), /* backed by refcounted pages */
67 _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ 67 _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
68 _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ 68 _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
69
70 /*
71 * Special flag for supporting metadata blocks smaller than a FSB.
72 *
73 * In this case we can have multiple xfs_buf_t on a single page and
74 * need to lock out concurrent xfs_buf_t readers as they only
75 * serialise access to the buffer.
76 *
77 * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
78 * between reads of the page. Hence we can have one thread read the
79 * page and modify it, but then race with another thread that thinks
80 * the page is not up-to-date and hence reads it again.
81 *
82 * The result is that the first modifcation to the page is lost.
83 * This sort of AGF/AGI reading race can happen when unlinking inodes
84 * that require truncation and results in the AGI unlinked list
85 * modifications being lost.
86 */
87 _XBF_PAGE_LOCKED = (1 << 22),
69} xfs_buf_flags_t; 88} xfs_buf_flags_t;
70 89
71typedef enum { 90typedef enum {
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 65e78c13d4ae..5f60363b9343 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -184,19 +184,24 @@ xfs_file_release(
184 return -xfs_release(XFS_I(inode)); 184 return -xfs_release(XFS_I(inode));
185} 185}
186 186
187/*
188 * We ignore the datasync flag here because a datasync is effectively
189 * identical to an fsync. That is, datasync implies that we need to write
190 * only the metadata needed to be able to access the data that is written
191 * if we crash after the call completes. Hence if we are writing beyond
192 * EOF we have to log the inode size change as well, which makes it a
193 * full fsync. If we don't write beyond EOF, the inode core will be
194 * clean in memory and so we don't need to log the inode, just like
195 * fsync.
196 */
187STATIC int 197STATIC int
188xfs_file_fsync( 198xfs_file_fsync(
189 struct file *filp, 199 struct file *filp,
190 struct dentry *dentry, 200 struct dentry *dentry,
191 int datasync) 201 int datasync)
192{ 202{
193 int flags = FSYNC_WAIT;
194
195 if (datasync)
196 flags |= FSYNC_DATA;
197 xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED); 203 xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED);
198 return -xfs_fsync(XFS_I(dentry->d_inode), flags, 204 return -xfs_fsync(XFS_I(dentry->d_inode));
199 (xfs_off_t)0, (xfs_off_t)-1);
200} 205}
201 206
202/* 207/*
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 9d73cb5c0fc7..25eb2a9e8d9b 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -230,14 +230,6 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt)
230#define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */ 230#define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */
231 231
232/* 232/*
233 * Flags to vop_fsync/reclaim.
234 */
235#define FSYNC_NOWAIT 0 /* asynchronous flush */
236#define FSYNC_WAIT 0x1 /* synchronous fsync or forced reclaim */
237#define FSYNC_INVAL 0x2 /* flush and invalidate cached data */
238#define FSYNC_DATA 0x4 /* synchronous fsync of data only */
239
240/*
241 * Tracking vnode activity. 233 * Tracking vnode activity.
242 */ 234 */
243#if defined(XFS_INODE_TRACE) 235#if defined(XFS_INODE_TRACE)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index cf0bb9c1d621..e569bf5d6cf0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2974,6 +2974,7 @@ xfs_iflush_cluster(
2974 xfs_mount_t *mp = ip->i_mount; 2974 xfs_mount_t *mp = ip->i_mount;
2975 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); 2975 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
2976 unsigned long first_index, mask; 2976 unsigned long first_index, mask;
2977 unsigned long inodes_per_cluster;
2977 int ilist_size; 2978 int ilist_size;
2978 xfs_inode_t **ilist; 2979 xfs_inode_t **ilist;
2979 xfs_inode_t *iq; 2980 xfs_inode_t *iq;
@@ -2985,8 +2986,9 @@ xfs_iflush_cluster(
2985 ASSERT(pag->pagi_inodeok); 2986 ASSERT(pag->pagi_inodeok);
2986 ASSERT(pag->pag_ici_init); 2987 ASSERT(pag->pag_ici_init);
2987 2988
2988 ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *); 2989 inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
2989 ilist = kmem_alloc(ilist_size, KM_MAYFAIL); 2990 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
2991 ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
2990 if (!ilist) 2992 if (!ilist)
2991 return 0; 2993 return 0;
2992 2994
@@ -2995,8 +2997,7 @@ xfs_iflush_cluster(
2995 read_lock(&pag->pag_ici_lock); 2997 read_lock(&pag->pag_ici_lock);
2996 /* really need a gang lookup range call here */ 2998 /* really need a gang lookup range call here */
2997 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, 2999 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
2998 first_index, 3000 first_index, inodes_per_cluster);
2999 XFS_INODE_CLUSTER_SIZE(mp));
3000 if (nr_found == 0) 3001 if (nr_found == 0)
3001 goto out_free; 3002 goto out_free;
3002 3003
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 70702a60b4bb..e475e3717eb3 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -856,18 +856,14 @@ xfs_readlink(
856/* 856/*
857 * xfs_fsync 857 * xfs_fsync
858 * 858 *
859 * This is called to sync the inode and its data out to disk. 859 * This is called to sync the inode and its data out to disk. We need to hold
860 * We need to hold the I/O lock while flushing the data, and 860 * the I/O lock while flushing the data, and the inode lock while flushing the
861 * the inode lock while flushing the inode. The inode lock CANNOT 861 * inode. The inode lock CANNOT be held while flushing the data, so acquire
862 * be held while flushing the data, so acquire after we're done 862 * after we're done with that.
863 * with that.
864 */ 863 */
865int 864int
866xfs_fsync( 865xfs_fsync(
867 xfs_inode_t *ip, 866 xfs_inode_t *ip)
868 int flag,
869 xfs_off_t start,
870 xfs_off_t stop)
871{ 867{
872 xfs_trans_t *tp; 868 xfs_trans_t *tp;
873 int error; 869 int error;
@@ -875,103 +871,79 @@ xfs_fsync(
875 871
876 xfs_itrace_entry(ip); 872 xfs_itrace_entry(ip);
877 873
878 ASSERT(start >= 0 && stop >= -1);
879
880 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 874 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
881 return XFS_ERROR(EIO); 875 return XFS_ERROR(EIO);
882 876
883 if (flag & FSYNC_DATA) 877 /* capture size updates in I/O completion before writing the inode. */
884 filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); 878 error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping);
879 if (error)
880 return XFS_ERROR(error);
885 881
886 /* 882 /*
887 * We always need to make sure that the required inode state 883 * We always need to make sure that the required inode state is safe on
888 * is safe on disk. The vnode might be clean but because 884 * disk. The vnode might be clean but we still might need to force the
889 * of committed transactions that haven't hit the disk yet. 885 * log because of committed transactions that haven't hit the disk yet.
890 * Likewise, there could be unflushed non-transactional 886 * Likewise, there could be unflushed non-transactional changes to the
891 * changes to the inode core that have to go to disk. 887 * inode core that have to go to disk and this requires us to issue
888 * a synchronous transaction to capture these changes correctly.
892 * 889 *
893 * The following code depends on one assumption: that 890 * This code relies on the assumption that if the update_* fields
894 * any transaction that changes an inode logs the core 891 * of the inode are clear and the inode is unpinned then it is clean
895 * because it has to change some field in the inode core 892 * and no action is required.
896 * (typically nextents or nblocks). That assumption
897 * implies that any transactions against an inode will
898 * catch any non-transactional updates. If inode-altering
899 * transactions exist that violate this assumption, the
900 * code breaks. Right now, it figures that if the involved
901 * update_* field is clear and the inode is unpinned, the
902 * inode is clean. Either it's been flushed or it's been
903 * committed and the commit has hit the disk unpinning the inode.
904 * (Note that xfs_inode_item_format() called at commit clears
905 * the update_* fields.)
906 */ 893 */
907 xfs_ilock(ip, XFS_ILOCK_SHARED); 894 xfs_ilock(ip, XFS_ILOCK_SHARED);
908 895
909 /* If we are flushing data then we care about update_size 896 if (!(ip->i_update_size || ip->i_update_core)) {
910 * being set, otherwise we care about update_core
911 */
912 if ((flag & FSYNC_DATA) ?
913 (ip->i_update_size == 0) :
914 (ip->i_update_core == 0)) {
915 /* 897 /*
916 * Timestamps/size haven't changed since last inode 898 * Timestamps/size haven't changed since last inode flush or
917 * flush or inode transaction commit. That means 899 * inode transaction commit. That means either nothing got
918 * either nothing got written or a transaction 900 * written or a transaction committed which caught the updates.
919 * committed which caught the updates. If the 901 * If the latter happened and the transaction hasn't hit the
920 * latter happened and the transaction hasn't 902 * disk yet, the inode will be still be pinned. If it is,
921 * hit the disk yet, the inode will be still 903 * force the log.
922 * be pinned. If it is, force the log.
923 */ 904 */
924 905
925 xfs_iunlock(ip, XFS_ILOCK_SHARED); 906 xfs_iunlock(ip, XFS_ILOCK_SHARED);
926 907
927 if (xfs_ipincount(ip)) { 908 if (xfs_ipincount(ip)) {
928 _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, 909 error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
929 XFS_LOG_FORCE | 910 XFS_LOG_FORCE | XFS_LOG_SYNC,
930 ((flag & FSYNC_WAIT)
931 ? XFS_LOG_SYNC : 0),
932 &log_flushed); 911 &log_flushed);
933 } else { 912 } else {
934 /* 913 /*
935 * If the inode is not pinned and nothing 914 * If the inode is not pinned and nothing has changed
936 * has changed we don't need to flush the 915 * we don't need to flush the cache.
937 * cache.
938 */ 916 */
939 changed = 0; 917 changed = 0;
940 } 918 }
941 error = 0;
942 } else { 919 } else {
943 /* 920 /*
944 * Kick off a transaction to log the inode 921 * Kick off a transaction to log the inode core to get the
945 * core to get the updates. Make it 922 * updates. The sync transaction will also force the log.
946 * sync if FSYNC_WAIT is passed in (which
947 * is done by everybody but specfs). The
948 * sync transaction will also force the log.
949 */ 923 */
950 xfs_iunlock(ip, XFS_ILOCK_SHARED); 924 xfs_iunlock(ip, XFS_ILOCK_SHARED);
951 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); 925 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
952 if ((error = xfs_trans_reserve(tp, 0, 926 error = xfs_trans_reserve(tp, 0,
953 XFS_FSYNC_TS_LOG_RES(ip->i_mount), 927 XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
954 0, 0, 0))) { 928 if (error) {
955 xfs_trans_cancel(tp, 0); 929 xfs_trans_cancel(tp, 0);
956 return error; 930 return error;
957 } 931 }
958 xfs_ilock(ip, XFS_ILOCK_EXCL); 932 xfs_ilock(ip, XFS_ILOCK_EXCL);
959 933
960 /* 934 /*
961 * Note - it's possible that we might have pushed 935 * Note - it's possible that we might have pushed ourselves out
962 * ourselves out of the way during trans_reserve 936 * of the way during trans_reserve which would flush the inode.
963 * which would flush the inode. But there's no 937 * But there's no guarantee that the inode buffer has actually
964 * guarantee that the inode buffer has actually 938 * gone out yet (it's delwri). Plus the buffer could be pinned
965 * gone out yet (it's delwri). Plus the buffer 939 * anyway if it's part of an inode in another recent
966 * could be pinned anyway if it's part of an 940 * transaction. So we play it safe and fire off the
967 * inode in another recent transaction. So we 941 * transaction anyway.
968 * play it safe and fire off the transaction anyway.
969 */ 942 */
970 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 943 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
971 xfs_trans_ihold(tp, ip); 944 xfs_trans_ihold(tp, ip);
972 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 945 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
973 if (flag & FSYNC_WAIT) 946 xfs_trans_set_sync(tp);
974 xfs_trans_set_sync(tp);
975 error = _xfs_trans_commit(tp, 0, &log_flushed); 947 error = _xfs_trans_commit(tp, 0, &log_flushed);
976 948
977 xfs_iunlock(ip, XFS_ILOCK_EXCL); 949 xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 8abe8f186e20..57335ba4ce53 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -18,8 +18,7 @@ int xfs_open(struct xfs_inode *ip);
18int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, 18int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags,
19 struct cred *credp); 19 struct cred *credp);
20int xfs_readlink(struct xfs_inode *ip, char *link); 20int xfs_readlink(struct xfs_inode *ip, char *link);
21int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start, 21int xfs_fsync(struct xfs_inode *ip);
22 xfs_off_t stop);
23int xfs_release(struct xfs_inode *ip); 22int xfs_release(struct xfs_inode *ip);
24int xfs_inactive(struct xfs_inode *ip); 23int xfs_inactive(struct xfs_inode *ip);
25int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, 24int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,