aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@infradead.org>2006-08-30 18:30:38 -0400
committerDavid Woodhouse <dwmw2@infradead.org>2006-08-30 18:30:38 -0400
commit0a7d5f8ce960e74fa22986bda4af488539796e49 (patch)
treee29ad17808a5c3410518e22dae8dfe94801b59f3 /fs
parent0165508c80a2b5d5268d9c5dfa9b30c534a33693 (diff)
parentdc709bd190c130b299ac19d596594256265c042a (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/conv.c6
-rw-r--r--fs/9p/vfs_inode.c6
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/adfs/super.c2
-rw-r--r--fs/befs/linuxvfs.c11
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/binfmt_elf_fdpic.c980
-rw-r--r--fs/block_dev.c114
-rw-r--r--fs/buffer.c7
-rw-r--r--fs/char_dev.c22
-rw-r--r--fs/cifs/CHANGES10
-rw-r--r--fs/cifs/README2
-rw-r--r--fs/cifs/cifsencrypt.c3
-rw-r--r--fs/cifs/cifsfs.c6
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h18
-rw-r--r--fs/cifs/cifsproto.h4
-rw-r--r--fs/cifs/cifssmb.c28
-rw-r--r--fs/cifs/connect.c32
-rw-r--r--fs/cifs/dir.c4
-rw-r--r--fs/cifs/file.c97
-rw-r--r--fs/cifs/netmisc.c1
-rw-r--r--fs/cifs/readdir.c2
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/cifs/smberr.h1
-rw-r--r--fs/cifs/transport.c618
-rw-r--r--fs/cifs/xattr.c6
-rw-r--r--fs/coda/file.c4
-rw-r--r--fs/efs/symlink.c3
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/exec.c10
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext3/acl.h3
-rw-r--r--fs/ext3/balloc.c6
-rw-r--r--fs/ext3/inode.c19
-rw-r--r--fs/ext3/namei.c15
-rw-r--r--fs/file.c14
-rw-r--r--fs/freevxfs/vxfs_lookup.c2
-rw-r--r--fs/fuse/control.c4
-rw-r--r--fs/fuse/dir.c47
-rw-r--r--fs/fuse/file.c10
-rw-r--r--fs/fuse/fuse_i.h2
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/hugetlbfs/inode.c4
-rw-r--r--fs/inotify_user.c2
-rw-r--r--fs/ioprio.c30
-rw-r--r--fs/jbd/commit.c6
-rw-r--r--fs/jbd/journal.c92
-rw-r--r--fs/jbd/transaction.c9
-rw-r--r--fs/jfs/inode.c16
-rw-r--r--fs/jfs/jfs_inode.h1
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/jfs/namei.c33
-rw-r--r--fs/jfs/super.c118
-rw-r--r--fs/lockd/clntproc.c26
-rw-r--r--fs/lockd/svclock.c12
-rw-r--r--fs/lockd/svcsubs.c15
-rw-r--r--fs/locks.c29
-rw-r--r--fs/minix/inode.c13
-rw-r--r--fs/namei.c27
-rw-r--r--fs/nfs/dir.c4
-rw-r--r--fs/nfs/direct.c435
-rw-r--r--fs/nfs/file.c8
-rw-r--r--fs/nfs/idmap.c4
-rw-r--r--fs/nfs/namespace.c4
-rw-r--r--fs/nfs/nfs4proc.c103
-rw-r--r--fs/nfs/nfs4xdr.c21
-rw-r--r--fs/nfs/read.c25
-rw-r--r--fs/nfs/write.c22
-rw-r--r--fs/nfsd/nfs4proc.c8
-rw-r--r--fs/nfsd/nfsfh.c20
-rw-r--r--fs/nfsd/stats.c10
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c1
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c43
-rw-r--r--fs/ocfs2/localalloc.c8
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/suballoc.c261
-rw-r--r--fs/ocfs2/suballoc.h2
-rw-r--r--fs/ocfs2/super.c8
-rw-r--r--fs/partitions/Kconfig2
-rw-r--r--fs/partitions/check.c1
-rw-r--r--fs/partitions/sun.c2
-rw-r--r--fs/proc/array.c6
-rw-r--r--fs/proc/base.c33
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/kcore.c4
-rw-r--r--fs/proc/proc_misc.c2
-rw-r--r--fs/ramfs/file-nommu.c4
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/reiserfs/file.c8
-rw-r--r--fs/reiserfs/inode.c26
-rw-r--r--fs/reiserfs/ioctl.c2
-rw-r--r--fs/reiserfs/procfs.c25
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/splice.c238
-rw-r--r--fs/sysfs/inode.c12
-rw-r--r--fs/udf/ialloc.c11
-rw-r--r--fs/udf/super.c9
-rw-r--r--fs/udf/truncate.c64
-rw-r--r--fs/ufs/balloc.c2
-rw-r--r--fs/ufs/inode.c35
-rw-r--r--fs/ufs/namei.c3
-rw-r--r--fs/ufs/truncate.c77
-rw-r--r--fs/ufs/util.c17
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c7
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c19
-rw-r--r--fs/xfs/xfs_alloc.c103
-rw-r--r--fs/xfs/xfs_inode.c17
-rw-r--r--fs/xfs/xfs_log.c12
-rw-r--r--fs/xfs/xfs_vfsops.c2
112 files changed, 3018 insertions, 1294 deletions
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index 1e898144eb7c..56d88c1a09c5 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -673,8 +673,10 @@ struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
673 struct cbuf *bufp = &buffer; 673 struct cbuf *bufp = &buffer;
674 674
675 size = 4 + 2 + strlen(name) + 4 + 1; /* fid[4] name[s] perm[4] mode[1] */ 675 size = 4 + 2 + strlen(name) + 4 + 1; /* fid[4] name[s] perm[4] mode[1] */
676 if (extended && extension!=NULL) 676 if (extended) {
677 size += 2 + strlen(extension); /* extension[s] */ 677 size += 2 + /* extension[s] */
678 (extension == NULL ? 0 : strlen(extension));
679 }
678 680
679 fc = v9fs_create_common(bufp, size, TCREATE); 681 fc = v9fs_create_common(bufp, size, TCREATE);
680 if (IS_ERR(fc)) 682 if (IS_ERR(fc))
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 2f580a197b8d..eae50c9d6dc4 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -434,11 +434,11 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
434 result = v9fs_t_remove(v9ses, fid, &fcall); 434 result = v9fs_t_remove(v9ses, fid, &fcall);
435 if (result < 0) { 435 if (result < 0) {
436 PRINT_FCALL_ERROR("remove fails", fcall); 436 PRINT_FCALL_ERROR("remove fails", fcall);
437 } else {
438 v9fs_put_idpool(fid, &v9ses->fidpool);
439 v9fs_fid_destroy(v9fid);
440 } 437 }
441 438
439 v9fs_put_idpool(fid, &v9ses->fidpool);
440 v9fs_fid_destroy(v9fid);
441
442 kfree(fcall); 442 kfree(fcall);
443 return result; 443 return result;
444} 444}
diff --git a/fs/Kconfig b/fs/Kconfig
index 53f5c6d61121..3f00a9faabcb 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1801,6 +1801,7 @@ config CIFS_POSIX
1801 1801
1802config CIFS_DEBUG2 1802config CIFS_DEBUG2
1803 bool "Enable additional CIFS debugging routines" 1803 bool "Enable additional CIFS debugging routines"
1804 depends on CIFS
1804 help 1805 help
1805 Enabling this option adds a few more debugging routines 1806 Enabling this option adds a few more debugging routines
1806 to the cifs code which slightly increases the size of 1807 to the cifs code which slightly increases the size of
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index ba1c88af49fe..82011019494c 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -308,7 +308,7 @@ static struct adfs_discmap *adfs_read_map(struct super_block *sb, struct adfs_di
308 if (adfs_checkmap(sb, dm)) 308 if (adfs_checkmap(sb, dm))
309 return dm; 309 return dm;
310 310
311 adfs_error(sb, NULL, "map corrupted"); 311 adfs_error(sb, "map corrupted");
312 312
313error_free: 313error_free:
314 while (--zone >= 0) 314 while (--zone >= 0)
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index fcaeead9696b..50cfca5c7efd 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -512,7 +512,11 @@ befs_utf2nls(struct super_block *sb, const char *in,
512 wchar_t uni; 512 wchar_t uni;
513 int unilen, utflen; 513 int unilen, utflen;
514 char *result; 514 char *result;
515 int maxlen = in_len; /* The utf8->nls conversion can't make more chars */ 515 /* The utf8->nls conversion won't make the final nls string bigger
516 * than the utf one, but if the string is pure ascii they'll have the
517 * same width and an extra char is needed to save the additional \0
518 */
519 int maxlen = in_len + 1;
516 520
517 befs_debug(sb, "---> utf2nls()"); 521 befs_debug(sb, "---> utf2nls()");
518 522
@@ -588,7 +592,10 @@ befs_nls2utf(struct super_block *sb, const char *in,
588 wchar_t uni; 592 wchar_t uni;
589 int unilen, utflen; 593 int unilen, utflen;
590 char *result; 594 char *result;
591 int maxlen = 3 * in_len; 595 /* There're nls characters that will translate to 3-chars-wide UTF-8
596 * characters, a additional byte is needed to save the final \0
597 * in special cases */
598 int maxlen = (3 * in_len) + 1;
592 599
593 befs_debug(sb, "---> nls2utf()\n"); 600 befs_debug(sb, "---> nls2utf()\n");
594 601
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f42e64210ee5..672a3b90bc55 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1185,8 +1185,6 @@ static int maydump(struct vm_area_struct *vma)
1185 return 1; 1185 return 1;
1186} 1186}
1187 1187
1188#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
1189
1190/* An ELF note in memory */ 1188/* An ELF note in memory */
1191struct memelfnote 1189struct memelfnote
1192{ 1190{
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index eba4e23b9ca0..2f3365829229 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1,6 +1,6 @@
1/* binfmt_elf_fdpic.c: FDPIC ELF binary format 1/* binfmt_elf_fdpic.c: FDPIC ELF binary format
2 * 2 *
3 * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2003, 2004, 2006 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * Derived from binfmt_elf.c 5 * Derived from binfmt_elf.c
6 * 6 *
@@ -24,7 +24,9 @@
24#include <linux/file.h> 24#include <linux/file.h>
25#include <linux/fcntl.h> 25#include <linux/fcntl.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/pagemap.h>
27#include <linux/highmem.h> 28#include <linux/highmem.h>
29#include <linux/highuid.h>
28#include <linux/personality.h> 30#include <linux/personality.h>
29#include <linux/ptrace.h> 31#include <linux/ptrace.h>
30#include <linux/init.h> 32#include <linux/init.h>
@@ -48,45 +50,59 @@ typedef char *elf_caddr_t;
48#define kdebug(fmt, ...) do {} while(0) 50#define kdebug(fmt, ...) do {} while(0)
49#endif 51#endif
50 52
53#if 0
54#define kdcore(fmt, ...) printk("FDPIC "fmt"\n" ,##__VA_ARGS__ )
55#else
56#define kdcore(fmt, ...) do {} while(0)
57#endif
58
51MODULE_LICENSE("GPL"); 59MODULE_LICENSE("GPL");
52 60
53static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs); 61static int load_elf_fdpic_binary(struct linux_binprm *, struct pt_regs *);
54//static int load_elf_fdpic_library(struct file *); 62static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *, struct file *);
55static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct file *file); 63static int elf_fdpic_map_file(struct elf_fdpic_params *, struct file *,
56static int elf_fdpic_map_file(struct elf_fdpic_params *params, 64 struct mm_struct *, const char *);
57 struct file *file,
58 struct mm_struct *mm,
59 const char *what);
60 65
61static int create_elf_fdpic_tables(struct linux_binprm *bprm, 66static int create_elf_fdpic_tables(struct linux_binprm *, struct mm_struct *,
62 struct mm_struct *mm, 67 struct elf_fdpic_params *,
63 struct elf_fdpic_params *exec_params, 68 struct elf_fdpic_params *);
64 struct elf_fdpic_params *interp_params);
65 69
66#ifndef CONFIG_MMU 70#ifndef CONFIG_MMU
67static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm, unsigned long *_sp); 71static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *,
68static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *params, 72 unsigned long *);
69 struct file *file, 73static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *,
70 struct mm_struct *mm); 74 struct file *,
75 struct mm_struct *);
71#endif 76#endif
72 77
73static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, 78static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
74 struct file *file, 79 struct file *, struct mm_struct *);
75 struct mm_struct *mm); 80
81#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
82static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *);
83#endif
76 84
77static struct linux_binfmt elf_fdpic_format = { 85static struct linux_binfmt elf_fdpic_format = {
78 .module = THIS_MODULE, 86 .module = THIS_MODULE,
79 .load_binary = load_elf_fdpic_binary, 87 .load_binary = load_elf_fdpic_binary,
80// .load_shlib = load_elf_fdpic_library, 88#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
81// .core_dump = elf_fdpic_core_dump, 89 .core_dump = elf_fdpic_core_dump,
90#endif
82 .min_coredump = ELF_EXEC_PAGESIZE, 91 .min_coredump = ELF_EXEC_PAGESIZE,
83}; 92};
84 93
85static int __init init_elf_fdpic_binfmt(void) { return register_binfmt(&elf_fdpic_format); } 94static int __init init_elf_fdpic_binfmt(void)
86static void __exit exit_elf_fdpic_binfmt(void) { unregister_binfmt(&elf_fdpic_format); } 95{
96 return register_binfmt(&elf_fdpic_format);
97}
98
99static void __exit exit_elf_fdpic_binfmt(void)
100{
101 unregister_binfmt(&elf_fdpic_format);
102}
87 103
88module_init(init_elf_fdpic_binfmt) 104core_initcall(init_elf_fdpic_binfmt);
89module_exit(exit_elf_fdpic_binfmt) 105module_exit(exit_elf_fdpic_binfmt);
90 106
91static int is_elf_fdpic(struct elfhdr *hdr, struct file *file) 107static int is_elf_fdpic(struct elfhdr *hdr, struct file *file)
92{ 108{
@@ -105,7 +121,8 @@ static int is_elf_fdpic(struct elfhdr *hdr, struct file *file)
105/* 121/*
106 * read the program headers table into memory 122 * read the program headers table into memory
107 */ 123 */
108static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct file *file) 124static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params,
125 struct file *file)
109{ 126{
110 struct elf32_phdr *phdr; 127 struct elf32_phdr *phdr;
111 unsigned long size; 128 unsigned long size;
@@ -121,7 +138,8 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct file *f
121 if (!params->phdrs) 138 if (!params->phdrs)
122 return -ENOMEM; 139 return -ENOMEM;
123 140
124 retval = kernel_read(file, params->hdr.e_phoff, (char *) params->phdrs, size); 141 retval = kernel_read(file, params->hdr.e_phoff,
142 (char *) params->phdrs, size);
125 if (retval < 0) 143 if (retval < 0)
126 return retval; 144 return retval;
127 145
@@ -141,17 +159,24 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct file *f
141 } 159 }
142 160
143 return 0; 161 return 0;
144} /* end elf_fdpic_fetch_phdrs() */ 162}
145 163
146/*****************************************************************************/ 164/*****************************************************************************/
147/* 165/*
148 * load an fdpic binary into various bits of memory 166 * load an fdpic binary into various bits of memory
149 */ 167 */
150static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs) 168static int load_elf_fdpic_binary(struct linux_binprm *bprm,
169 struct pt_regs *regs)
151{ 170{
152 struct elf_fdpic_params exec_params, interp_params; 171 struct elf_fdpic_params exec_params, interp_params;
153 struct elf_phdr *phdr; 172 struct elf_phdr *phdr;
154 unsigned long stack_size; 173 unsigned long stack_size, entryaddr;
174#ifndef CONFIG_MMU
175 unsigned long fullsize;
176#endif
177#ifdef ELF_FDPIC_PLAT_INIT
178 unsigned long dynaddr;
179#endif
155 struct file *interpreter = NULL; /* to shut gcc up */ 180 struct file *interpreter = NULL; /* to shut gcc up */
156 char *interpreter_name = NULL; 181 char *interpreter_name = NULL;
157 int executable_stack; 182 int executable_stack;
@@ -212,7 +237,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
212 goto error; 237 goto error;
213 } 238 }
214 239
215 retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE); 240 retval = kernel_read(interpreter, 0, bprm->buf,
241 BINPRM_BUF_SIZE);
216 if (retval < 0) 242 if (retval < 0)
217 goto error; 243 goto error;
218 244
@@ -295,7 +321,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
295 &current->mm->start_stack, 321 &current->mm->start_stack,
296 &current->mm->start_brk); 322 &current->mm->start_brk);
297 323
298 retval = setup_arg_pages(bprm, current->mm->start_stack, executable_stack); 324 retval = setup_arg_pages(bprm, current->mm->start_stack,
325 executable_stack);
299 if (retval < 0) { 326 if (retval < 0) {
300 send_sig(SIGKILL, current, 0); 327 send_sig(SIGKILL, current, 0);
301 goto error_kill; 328 goto error_kill;
@@ -303,7 +330,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
303#endif 330#endif
304 331
305 /* load the executable and interpreter into memory */ 332 /* load the executable and interpreter into memory */
306 retval = elf_fdpic_map_file(&exec_params, bprm->file, current->mm, "executable"); 333 retval = elf_fdpic_map_file(&exec_params, bprm->file, current->mm,
334 "executable");
307 if (retval < 0) 335 if (retval < 0)
308 goto error_kill; 336 goto error_kill;
309 337
@@ -324,7 +352,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
324 if (!current->mm->start_brk) 352 if (!current->mm->start_brk)
325 current->mm->start_brk = current->mm->end_data; 353 current->mm->start_brk = current->mm->end_data;
326 354
327 current->mm->brk = current->mm->start_brk = PAGE_ALIGN(current->mm->start_brk); 355 current->mm->brk = current->mm->start_brk =
356 PAGE_ALIGN(current->mm->start_brk);
328 357
329#else 358#else
330 /* create a stack and brk area big enough for everyone 359 /* create a stack and brk area big enough for everyone
@@ -336,47 +365,45 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
336 stack_size = PAGE_SIZE * 2; 365 stack_size = PAGE_SIZE * 2;
337 366
338 down_write(&current->mm->mmap_sem); 367 down_write(&current->mm->mmap_sem);
339 current->mm->start_brk = do_mmap(NULL, 368 current->mm->start_brk = do_mmap(NULL, 0, stack_size,
340 0,
341 stack_size,
342 PROT_READ | PROT_WRITE | PROT_EXEC, 369 PROT_READ | PROT_WRITE | PROT_EXEC,
343 MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN, 370 MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN,
344 0); 371 0);
345 372
346 if (IS_ERR((void *) current->mm->start_brk)) { 373 if (IS_ERR_VALUE(current->mm->start_brk)) {
347 up_write(&current->mm->mmap_sem); 374 up_write(&current->mm->mmap_sem);
348 retval = current->mm->start_brk; 375 retval = current->mm->start_brk;
349 current->mm->start_brk = 0; 376 current->mm->start_brk = 0;
350 goto error_kill; 377 goto error_kill;
351 } 378 }
352 379
353 if (do_mremap(current->mm->start_brk, 380 /* expand the stack mapping to use up the entire allocation granule */
354 stack_size, 381 fullsize = ksize((char *) current->mm->start_brk);
355 ksize((char *) current->mm->start_brk), 382 if (!IS_ERR_VALUE(do_mremap(current->mm->start_brk, stack_size,
356 0, 0 383 fullsize, 0, 0)))
357 ) == current->mm->start_brk 384 stack_size = fullsize;
358 )
359 stack_size = ksize((char *) current->mm->start_brk);
360 up_write(&current->mm->mmap_sem); 385 up_write(&current->mm->mmap_sem);
361 386
362 current->mm->brk = current->mm->start_brk; 387 current->mm->brk = current->mm->start_brk;
363 current->mm->context.end_brk = current->mm->start_brk; 388 current->mm->context.end_brk = current->mm->start_brk;
364 current->mm->context.end_brk += (stack_size > PAGE_SIZE) ? (stack_size - PAGE_SIZE) : 0; 389 current->mm->context.end_brk +=
390 (stack_size > PAGE_SIZE) ? (stack_size - PAGE_SIZE) : 0;
365 current->mm->start_stack = current->mm->start_brk + stack_size; 391 current->mm->start_stack = current->mm->start_brk + stack_size;
366#endif 392#endif
367 393
368 compute_creds(bprm); 394 compute_creds(bprm);
369 current->flags &= ~PF_FORKNOEXEC; 395 current->flags &= ~PF_FORKNOEXEC;
370 if (create_elf_fdpic_tables(bprm, current->mm, &exec_params, &interp_params) < 0) 396 if (create_elf_fdpic_tables(bprm, current->mm,
397 &exec_params, &interp_params) < 0)
371 goto error_kill; 398 goto error_kill;
372 399
373 kdebug("- start_code %lx", (long) current->mm->start_code); 400 kdebug("- start_code %lx", current->mm->start_code);
374 kdebug("- end_code %lx", (long) current->mm->end_code); 401 kdebug("- end_code %lx", current->mm->end_code);
375 kdebug("- start_data %lx", (long) current->mm->start_data); 402 kdebug("- start_data %lx", current->mm->start_data);
376 kdebug("- end_data %lx", (long) current->mm->end_data); 403 kdebug("- end_data %lx", current->mm->end_data);
377 kdebug("- start_brk %lx", (long) current->mm->start_brk); 404 kdebug("- start_brk %lx", current->mm->start_brk);
378 kdebug("- brk %lx", (long) current->mm->brk); 405 kdebug("- brk %lx", current->mm->brk);
379 kdebug("- start_stack %lx", (long) current->mm->start_stack); 406 kdebug("- start_stack %lx", current->mm->start_stack);
380 407
381#ifdef ELF_FDPIC_PLAT_INIT 408#ifdef ELF_FDPIC_PLAT_INIT
382 /* 409 /*
@@ -385,21 +412,18 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs
385 * example. This macro performs whatever initialization to 412 * example. This macro performs whatever initialization to
386 * the regs structure is required. 413 * the regs structure is required.
387 */ 414 */
388 ELF_FDPIC_PLAT_INIT(regs, 415 dynaddr = interp_params.dynamic_addr ?: exec_params.dynamic_addr;
389 exec_params.map_addr, 416 ELF_FDPIC_PLAT_INIT(regs, exec_params.map_addr, interp_params.map_addr,
390 interp_params.map_addr, 417 dynaddr);
391 interp_params.dynamic_addr ?: exec_params.dynamic_addr
392 );
393#endif 418#endif
394 419
395 /* everything is now ready... get the userspace context ready to roll */ 420 /* everything is now ready... get the userspace context ready to roll */
396 start_thread(regs, 421 entryaddr = interp_params.entry_addr ?: exec_params.entry_addr;
397 interp_params.entry_addr ?: exec_params.entry_addr, 422 start_thread(regs, entryaddr, current->mm->start_stack);
398 current->mm->start_stack);
399 423
400 if (unlikely(current->ptrace & PT_PTRACED)) { 424 if (unlikely(current->ptrace & PT_PTRACED)) {
401 if (current->ptrace & PT_TRACE_EXEC) 425 if (current->ptrace & PT_TRACE_EXEC)
402 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP); 426 ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
403 else 427 else
404 send_sig(SIGTRAP, current, 0); 428 send_sig(SIGTRAP, current, 0);
405 } 429 }
@@ -419,11 +443,11 @@ error:
419 return retval; 443 return retval;
420 444
421 /* unrecoverable error - kill the process */ 445 /* unrecoverable error - kill the process */
422 error_kill: 446error_kill:
423 send_sig(SIGSEGV, current, 0); 447 send_sig(SIGSEGV, current, 0);
424 goto error; 448 goto error;
425 449
426} /* end load_elf_fdpic_binary() */ 450}
427 451
428/*****************************************************************************/ 452/*****************************************************************************/
429/* 453/*
@@ -459,6 +483,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
459 */ 483 */
460 hwcap = ELF_HWCAP; 484 hwcap = ELF_HWCAP;
461 k_platform = ELF_PLATFORM; 485 k_platform = ELF_PLATFORM;
486 u_platform = NULL;
462 487
463 if (k_platform) { 488 if (k_platform) {
464 platform_len = strlen(k_platform) + 1; 489 platform_len = strlen(k_platform) + 1;
@@ -470,11 +495,11 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
470 495
471#if defined(__i386__) && defined(CONFIG_SMP) 496#if defined(__i386__) && defined(CONFIG_SMP)
472 /* in some cases (e.g. Hyper-Threading), we want to avoid L1 evictions 497 /* in some cases (e.g. Hyper-Threading), we want to avoid L1 evictions
473 * by the processes running on the same package. One thing we can do 498 * by the processes running on the same package. One thing we can do is
474 * is to shuffle the initial stack for them. 499 * to shuffle the initial stack for them.
475 * 500 *
476 * the conditionals here are unneeded, but kept in to make the 501 * the conditionals here are unneeded, but kept in to make the code
477 * code behaviour the same as pre change unless we have hyperthreaded 502 * behaviour the same as pre change unless we have hyperthreaded
478 * processors. This keeps Mr Marcelo Person happier but should be 503 * processors. This keeps Mr Marcelo Person happier but should be
479 * removed for 2.5 504 * removed for 2.5
480 */ 505 */
@@ -497,11 +522,13 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
497 522
498 if (interp_params->loadmap) { 523 if (interp_params->loadmap) {
499 len = sizeof(struct elf32_fdpic_loadmap); 524 len = sizeof(struct elf32_fdpic_loadmap);
500 len += sizeof(struct elf32_fdpic_loadseg) * interp_params->loadmap->nsegs; 525 len += sizeof(struct elf32_fdpic_loadseg) *
526 interp_params->loadmap->nsegs;
501 sp = (sp - len) & ~7UL; 527 sp = (sp - len) & ~7UL;
502 interp_params->map_addr = sp; 528 interp_params->map_addr = sp;
503 529
504 if (copy_to_user((void __user *) sp, interp_params->loadmap, len) != 0) 530 if (copy_to_user((void __user *) sp, interp_params->loadmap,
531 len) != 0)
505 return -EFAULT; 532 return -EFAULT;
506 533
507 current->mm->context.interp_fdpic_loadmap = (unsigned long) sp; 534 current->mm->context.interp_fdpic_loadmap = (unsigned long) sp;
@@ -525,34 +552,37 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
525 sp -= sp & 15UL; 552 sp -= sp & 15UL;
526 553
527 /* put the ELF interpreter info on the stack */ 554 /* put the ELF interpreter info on the stack */
528#define NEW_AUX_ENT(nr, id, val) \ 555#define NEW_AUX_ENT(nr, id, val) \
529 do { \ 556 do { \
530 struct { unsigned long _id, _val; } __user *ent = (void __user *) csp; \ 557 struct { unsigned long _id, _val; } __user *ent; \
531 __put_user((id), &ent[nr]._id); \ 558 \
532 __put_user((val), &ent[nr]._val); \ 559 ent = (void __user *) csp; \
560 __put_user((id), &ent[nr]._id); \
561 __put_user((val), &ent[nr]._val); \
533 } while (0) 562 } while (0)
534 563
535 csp -= 2 * sizeof(unsigned long); 564 csp -= 2 * sizeof(unsigned long);
536 NEW_AUX_ENT(0, AT_NULL, 0); 565 NEW_AUX_ENT(0, AT_NULL, 0);
537 if (k_platform) { 566 if (k_platform) {
538 csp -= 2 * sizeof(unsigned long); 567 csp -= 2 * sizeof(unsigned long);
539 NEW_AUX_ENT(0, AT_PLATFORM, (elf_addr_t)(unsigned long) u_platform); 568 NEW_AUX_ENT(0, AT_PLATFORM,
569 (elf_addr_t) (unsigned long) u_platform);
540 } 570 }
541 571
542 csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long); 572 csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long);
543 NEW_AUX_ENT( 0, AT_HWCAP, hwcap); 573 NEW_AUX_ENT( 0, AT_HWCAP, hwcap);
544 NEW_AUX_ENT( 1, AT_PAGESZ, PAGE_SIZE); 574 NEW_AUX_ENT( 1, AT_PAGESZ, PAGE_SIZE);
545 NEW_AUX_ENT( 2, AT_CLKTCK, CLOCKS_PER_SEC); 575 NEW_AUX_ENT( 2, AT_CLKTCK, CLOCKS_PER_SEC);
546 NEW_AUX_ENT( 3, AT_PHDR, exec_params->ph_addr); 576 NEW_AUX_ENT( 3, AT_PHDR, exec_params->ph_addr);
547 NEW_AUX_ENT( 4, AT_PHENT, sizeof(struct elf_phdr)); 577 NEW_AUX_ENT( 4, AT_PHENT, sizeof(struct elf_phdr));
548 NEW_AUX_ENT( 5, AT_PHNUM, exec_params->hdr.e_phnum); 578 NEW_AUX_ENT( 5, AT_PHNUM, exec_params->hdr.e_phnum);
549 NEW_AUX_ENT( 6, AT_BASE, interp_params->elfhdr_addr); 579 NEW_AUX_ENT( 6, AT_BASE, interp_params->elfhdr_addr);
550 NEW_AUX_ENT( 7, AT_FLAGS, 0); 580 NEW_AUX_ENT( 7, AT_FLAGS, 0);
551 NEW_AUX_ENT( 8, AT_ENTRY, exec_params->entry_addr); 581 NEW_AUX_ENT( 8, AT_ENTRY, exec_params->entry_addr);
552 NEW_AUX_ENT( 9, AT_UID, (elf_addr_t) current->uid); 582 NEW_AUX_ENT( 9, AT_UID, (elf_addr_t) current->uid);
553 NEW_AUX_ENT(10, AT_EUID, (elf_addr_t) current->euid); 583 NEW_AUX_ENT(10, AT_EUID, (elf_addr_t) current->euid);
554 NEW_AUX_ENT(11, AT_GID, (elf_addr_t) current->gid); 584 NEW_AUX_ENT(11, AT_GID, (elf_addr_t) current->gid);
555 NEW_AUX_ENT(12, AT_EGID, (elf_addr_t) current->egid); 585 NEW_AUX_ENT(12, AT_EGID, (elf_addr_t) current->egid);
556 586
557#ifdef ARCH_DLINFO 587#ifdef ARCH_DLINFO
558 /* ARCH_DLINFO must come last so platform specific code can enforce 588 /* ARCH_DLINFO must come last so platform specific code can enforce
@@ -578,7 +608,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
578#ifdef CONFIG_MMU 608#ifdef CONFIG_MMU
579 current->mm->arg_start = bprm->p; 609 current->mm->arg_start = bprm->p;
580#else 610#else
581 current->mm->arg_start = current->mm->start_stack - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p); 611 current->mm->arg_start = current->mm->start_stack -
612 (MAX_ARG_PAGES * PAGE_SIZE - bprm->p);
582#endif 613#endif
583 614
584 p = (char __user *) current->mm->arg_start; 615 p = (char __user *) current->mm->arg_start;
@@ -606,7 +637,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
606 637
607 mm->start_stack = (unsigned long) sp; 638 mm->start_stack = (unsigned long) sp;
608 return 0; 639 return 0;
609} /* end create_elf_fdpic_tables() */ 640}
610 641
611/*****************************************************************************/ 642/*****************************************************************************/
612/* 643/*
@@ -614,7 +645,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
614 * the stack 645 * the stack
615 */ 646 */
616#ifndef CONFIG_MMU 647#ifndef CONFIG_MMU
617static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm, unsigned long *_sp) 648static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm,
649 unsigned long *_sp)
618{ 650{
619 unsigned long index, stop, sp; 651 unsigned long index, stop, sp;
620 char *src; 652 char *src;
@@ -635,9 +667,9 @@ static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm, unsigned
635 667
636 *_sp = (*_sp - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p)) & ~15; 668 *_sp = (*_sp - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p)) & ~15;
637 669
638 out: 670out:
639 return ret; 671 return ret;
640} /* end elf_fdpic_transfer_args_to_stack() */ 672}
641#endif 673#endif
642 674
643/*****************************************************************************/ 675/*****************************************************************************/
@@ -712,17 +744,18 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
712 seg = loadmap->segs; 744 seg = loadmap->segs;
713 for (loop = loadmap->nsegs; loop > 0; loop--, seg++) { 745 for (loop = loadmap->nsegs; loop > 0; loop--, seg++) {
714 if (params->hdr.e_entry >= seg->p_vaddr && 746 if (params->hdr.e_entry >= seg->p_vaddr &&
715 params->hdr.e_entry < seg->p_vaddr + seg->p_memsz 747 params->hdr.e_entry < seg->p_vaddr + seg->p_memsz) {
716 ) {
717 params->entry_addr = 748 params->entry_addr =
718 (params->hdr.e_entry - seg->p_vaddr) + seg->addr; 749 (params->hdr.e_entry - seg->p_vaddr) +
750 seg->addr;
719 break; 751 break;
720 } 752 }
721 } 753 }
722 } 754 }
723 755
724 /* determine where the program header table has wound up if mapped */ 756 /* determine where the program header table has wound up if mapped */
725 stop = params->hdr.e_phoff + params->hdr.e_phnum * sizeof (struct elf_phdr); 757 stop = params->hdr.e_phoff;
758 stop += params->hdr.e_phnum * sizeof (struct elf_phdr);
726 phdr = params->phdrs; 759 phdr = params->phdrs;
727 760
728 for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) { 761 for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) {
@@ -736,9 +769,11 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
736 seg = loadmap->segs; 769 seg = loadmap->segs;
737 for (loop = loadmap->nsegs; loop > 0; loop--, seg++) { 770 for (loop = loadmap->nsegs; loop > 0; loop--, seg++) {
738 if (phdr->p_vaddr >= seg->p_vaddr && 771 if (phdr->p_vaddr >= seg->p_vaddr &&
739 phdr->p_vaddr + phdr->p_filesz <= seg->p_vaddr + seg->p_memsz 772 phdr->p_vaddr + phdr->p_filesz <=
740 ) { 773 seg->p_vaddr + seg->p_memsz) {
741 params->ph_addr = (phdr->p_vaddr - seg->p_vaddr) + seg->addr + 774 params->ph_addr =
775 (phdr->p_vaddr - seg->p_vaddr) +
776 seg->addr +
742 params->hdr.e_phoff - phdr->p_offset; 777 params->hdr.e_phoff - phdr->p_offset;
743 break; 778 break;
744 } 779 }
@@ -755,18 +790,22 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
755 seg = loadmap->segs; 790 seg = loadmap->segs;
756 for (loop = loadmap->nsegs; loop > 0; loop--, seg++) { 791 for (loop = loadmap->nsegs; loop > 0; loop--, seg++) {
757 if (phdr->p_vaddr >= seg->p_vaddr && 792 if (phdr->p_vaddr >= seg->p_vaddr &&
758 phdr->p_vaddr + phdr->p_memsz <= seg->p_vaddr + seg->p_memsz 793 phdr->p_vaddr + phdr->p_memsz <=
759 ) { 794 seg->p_vaddr + seg->p_memsz) {
760 params->dynamic_addr = (phdr->p_vaddr - seg->p_vaddr) + seg->addr; 795 params->dynamic_addr =
761 796 (phdr->p_vaddr - seg->p_vaddr) +
762 /* check the dynamic section contains at least one item, and that 797 seg->addr;
763 * the last item is a NULL entry */ 798
799 /* check the dynamic section contains at least
800 * one item, and that the last item is a NULL
801 * entry */
764 if (phdr->p_memsz == 0 || 802 if (phdr->p_memsz == 0 ||
765 phdr->p_memsz % sizeof(Elf32_Dyn) != 0) 803 phdr->p_memsz % sizeof(Elf32_Dyn) != 0)
766 goto dynamic_error; 804 goto dynamic_error;
767 805
768 tmp = phdr->p_memsz / sizeof(Elf32_Dyn); 806 tmp = phdr->p_memsz / sizeof(Elf32_Dyn);
769 if (((Elf32_Dyn *) params->dynamic_addr)[tmp - 1].d_tag != 0) 807 if (((Elf32_Dyn *)
808 params->dynamic_addr)[tmp - 1].d_tag != 0)
770 goto dynamic_error; 809 goto dynamic_error;
771 break; 810 break;
772 } 811 }
@@ -775,8 +814,8 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
775 } 814 }
776 815
777 /* now elide adjacent segments in the load map on MMU linux 816 /* now elide adjacent segments in the load map on MMU linux
778 * - on uClinux the holes between may actually be filled with system stuff or stuff from 817 * - on uClinux the holes between may actually be filled with system
779 * other processes 818 * stuff or stuff from other processes
780 */ 819 */
781#ifdef CONFIG_MMU 820#ifdef CONFIG_MMU
782 nloads = loadmap->nsegs; 821 nloads = loadmap->nsegs;
@@ -787,7 +826,9 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
787 if (seg->p_vaddr - mseg->p_vaddr == seg->addr - mseg->addr) { 826 if (seg->p_vaddr - mseg->p_vaddr == seg->addr - mseg->addr) {
788 load_addr = PAGE_ALIGN(mseg->addr + mseg->p_memsz); 827 load_addr = PAGE_ALIGN(mseg->addr + mseg->p_memsz);
789 if (load_addr == (seg->addr & PAGE_MASK)) { 828 if (load_addr == (seg->addr & PAGE_MASK)) {
790 mseg->p_memsz += load_addr - (mseg->addr + mseg->p_memsz); 829 mseg->p_memsz +=
830 load_addr -
831 (mseg->addr + mseg->p_memsz);
791 mseg->p_memsz += seg->addr & ~PAGE_MASK; 832 mseg->p_memsz += seg->addr & ~PAGE_MASK;
792 mseg->p_memsz += seg->p_memsz; 833 mseg->p_memsz += seg->p_memsz;
793 loadmap->nsegs--; 834 loadmap->nsegs--;
@@ -815,20 +856,21 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
815 856
816 return 0; 857 return 0;
817 858
818 dynamic_error: 859dynamic_error:
819 printk("ELF FDPIC %s with invalid DYNAMIC section (inode=%lu)\n", 860 printk("ELF FDPIC %s with invalid DYNAMIC section (inode=%lu)\n",
820 what, file->f_dentry->d_inode->i_ino); 861 what, file->f_dentry->d_inode->i_ino);
821 return -ELIBBAD; 862 return -ELIBBAD;
822} /* end elf_fdpic_map_file() */ 863}
823 864
824/*****************************************************************************/ 865/*****************************************************************************/
825/* 866/*
826 * map a file with constant displacement under uClinux 867 * map a file with constant displacement under uClinux
827 */ 868 */
828#ifndef CONFIG_MMU 869#ifndef CONFIG_MMU
829static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *params, 870static int elf_fdpic_map_file_constdisp_on_uclinux(
830 struct file *file, 871 struct elf_fdpic_params *params,
831 struct mm_struct *mm) 872 struct file *file,
873 struct mm_struct *mm)
832{ 874{
833 struct elf32_fdpic_loadseg *seg; 875 struct elf32_fdpic_loadseg *seg;
834 struct elf32_phdr *phdr; 876 struct elf32_phdr *phdr;
@@ -839,7 +881,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para
839 load_addr = params->load_addr; 881 load_addr = params->load_addr;
840 seg = params->loadmap->segs; 882 seg = params->loadmap->segs;
841 883
842 /* determine the bounds of the contiguous overall allocation we must make */ 884 /* determine the bounds of the contiguous overall allocation we must
885 * make */
843 phdr = params->phdrs; 886 phdr = params->phdrs;
844 for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) { 887 for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) {
845 if (params->phdrs[loop].p_type != PT_LOAD) 888 if (params->phdrs[loop].p_type != PT_LOAD)
@@ -860,7 +903,7 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para
860 maddr = do_mmap(NULL, load_addr, top - base, 903 maddr = do_mmap(NULL, load_addr, top - base,
861 PROT_READ | PROT_WRITE | PROT_EXEC, mflags, 0); 904 PROT_READ | PROT_WRITE | PROT_EXEC, mflags, 0);
862 up_write(&mm->mmap_sem); 905 up_write(&mm->mmap_sem);
863 if (IS_ERR((void *) maddr)) 906 if (IS_ERR_VALUE(maddr))
864 return (int) maddr; 907 return (int) maddr;
865 908
866 if (load_addr != 0) 909 if (load_addr != 0)
@@ -878,7 +921,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para
878 seg->p_vaddr = phdr->p_vaddr; 921 seg->p_vaddr = phdr->p_vaddr;
879 seg->p_memsz = phdr->p_memsz; 922 seg->p_memsz = phdr->p_memsz;
880 923
881 ret = file->f_op->read(file, (void *) seg->addr, phdr->p_filesz, &fpos); 924 ret = file->f_op->read(file, (void *) seg->addr,
925 phdr->p_filesz, &fpos);
882 if (ret < 0) 926 if (ret < 0)
883 return ret; 927 return ret;
884 928
@@ -895,8 +939,7 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para
895 if (phdr->p_flags & PF_X) { 939 if (phdr->p_flags & PF_X) {
896 mm->start_code = seg->addr; 940 mm->start_code = seg->addr;
897 mm->end_code = seg->addr + phdr->p_memsz; 941 mm->end_code = seg->addr + phdr->p_memsz;
898 } 942 } else if (!mm->start_data) {
899 else if (!mm->start_data) {
900 mm->start_data = seg->addr; 943 mm->start_data = seg->addr;
901#ifndef CONFIG_MMU 944#ifndef CONFIG_MMU
902 mm->end_data = seg->addr + phdr->p_memsz; 945 mm->end_data = seg->addr + phdr->p_memsz;
@@ -913,7 +956,7 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para
913 } 956 }
914 957
915 return 0; 958 return 0;
916} /* end elf_fdpic_map_file_constdisp_on_uclinux() */ 959}
917#endif 960#endif
918 961
919/*****************************************************************************/ 962/*****************************************************************************/
@@ -974,14 +1017,14 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
974 1017
975 case ELF_FDPIC_FLAG_CONSTDISP: 1018 case ELF_FDPIC_FLAG_CONSTDISP:
976 /* constant displacement 1019 /* constant displacement
977 * - can be mapped anywhere, but must be mapped as a unit 1020 * - can be mapped anywhere, but must be mapped as a
1021 * unit
978 */ 1022 */
979 if (!dvset) { 1023 if (!dvset) {
980 maddr = load_addr; 1024 maddr = load_addr;
981 delta_vaddr = phdr->p_vaddr; 1025 delta_vaddr = phdr->p_vaddr;
982 dvset = 1; 1026 dvset = 1;
983 } 1027 } else {
984 else {
985 maddr = load_addr + phdr->p_vaddr - delta_vaddr; 1028 maddr = load_addr + phdr->p_vaddr - delta_vaddr;
986 flags |= MAP_FIXED; 1029 flags |= MAP_FIXED;
987 } 1030 }
@@ -1005,13 +1048,14 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1005 up_write(&mm->mmap_sem); 1048 up_write(&mm->mmap_sem);
1006 1049
1007 kdebug("mmap[%d] <file> sz=%lx pr=%x fl=%x of=%lx --> %08lx", 1050 kdebug("mmap[%d] <file> sz=%lx pr=%x fl=%x of=%lx --> %08lx",
1008 loop, phdr->p_memsz + disp, prot, flags, phdr->p_offset - disp, 1051 loop, phdr->p_memsz + disp, prot, flags,
1009 maddr); 1052 phdr->p_offset - disp, maddr);
1010 1053
1011 if (IS_ERR((void *) maddr)) 1054 if (IS_ERR_VALUE(maddr))
1012 return (int) maddr; 1055 return (int) maddr;
1013 1056
1014 if ((params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) == ELF_FDPIC_FLAG_CONTIGUOUS) 1057 if ((params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) ==
1058 ELF_FDPIC_FLAG_CONTIGUOUS)
1015 load_addr += PAGE_ALIGN(phdr->p_memsz + disp); 1059 load_addr += PAGE_ALIGN(phdr->p_memsz + disp);
1016 1060
1017 seg->addr = maddr + disp; 1061 seg->addr = maddr + disp;
@@ -1022,7 +1066,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1022 if (phdr->p_offset == 0) 1066 if (phdr->p_offset == 0)
1023 params->elfhdr_addr = seg->addr; 1067 params->elfhdr_addr = seg->addr;
1024 1068
1025 /* clear the bit between beginning of mapping and beginning of PT_LOAD */ 1069 /* clear the bit between beginning of mapping and beginning of
1070 * PT_LOAD */
1026 if (prot & PROT_WRITE && disp > 0) { 1071 if (prot & PROT_WRITE && disp > 0) {
1027 kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp); 1072 kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp);
1028 clear_user((void __user *) maddr, disp); 1073 clear_user((void __user *) maddr, disp);
@@ -1038,19 +1083,20 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1038 excess1 = PAGE_SIZE - ((maddr + phdr->p_filesz) & ~PAGE_MASK); 1083 excess1 = PAGE_SIZE - ((maddr + phdr->p_filesz) & ~PAGE_MASK);
1039 1084
1040#ifdef CONFIG_MMU 1085#ifdef CONFIG_MMU
1041
1042 if (excess > excess1) { 1086 if (excess > excess1) {
1043 unsigned long xaddr = maddr + phdr->p_filesz + excess1; 1087 unsigned long xaddr = maddr + phdr->p_filesz + excess1;
1044 unsigned long xmaddr; 1088 unsigned long xmaddr;
1045 1089
1046 flags |= MAP_FIXED | MAP_ANONYMOUS; 1090 flags |= MAP_FIXED | MAP_ANONYMOUS;
1047 down_write(&mm->mmap_sem); 1091 down_write(&mm->mmap_sem);
1048 xmaddr = do_mmap(NULL, xaddr, excess - excess1, prot, flags, 0); 1092 xmaddr = do_mmap(NULL, xaddr, excess - excess1,
1093 prot, flags, 0);
1049 up_write(&mm->mmap_sem); 1094 up_write(&mm->mmap_sem);
1050 1095
1051 kdebug("mmap[%d] <anon>" 1096 kdebug("mmap[%d] <anon>"
1052 " ad=%lx sz=%lx pr=%x fl=%x of=0 --> %08lx", 1097 " ad=%lx sz=%lx pr=%x fl=%x of=0 --> %08lx",
1053 loop, xaddr, excess - excess1, prot, flags, xmaddr); 1098 loop, xaddr, excess - excess1, prot, flags,
1099 xmaddr);
1054 1100
1055 if (xmaddr != xaddr) 1101 if (xmaddr != xaddr)
1056 return -ENOMEM; 1102 return -ENOMEM;
@@ -1059,7 +1105,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1059 if (prot & PROT_WRITE && excess1 > 0) { 1105 if (prot & PROT_WRITE && excess1 > 0) {
1060 kdebug("clear[%d] ad=%lx sz=%lx", 1106 kdebug("clear[%d] ad=%lx sz=%lx",
1061 loop, maddr + phdr->p_filesz, excess1); 1107 loop, maddr + phdr->p_filesz, excess1);
1062 clear_user((void __user *) maddr + phdr->p_filesz, excess1); 1108 clear_user((void __user *) maddr + phdr->p_filesz,
1109 excess1);
1063 } 1110 }
1064 1111
1065#else 1112#else
@@ -1074,8 +1121,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1074 if (phdr->p_flags & PF_X) { 1121 if (phdr->p_flags & PF_X) {
1075 mm->start_code = maddr; 1122 mm->start_code = maddr;
1076 mm->end_code = maddr + phdr->p_memsz; 1123 mm->end_code = maddr + phdr->p_memsz;
1077 } 1124 } else if (!mm->start_data) {
1078 else if (!mm->start_data) {
1079 mm->start_data = maddr; 1125 mm->start_data = maddr;
1080 mm->end_data = maddr + phdr->p_memsz; 1126 mm->end_data = maddr + phdr->p_memsz;
1081 } 1127 }
@@ -1085,4 +1131,662 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
1085 } 1131 }
1086 1132
1087 return 0; 1133 return 0;
1088} /* end elf_fdpic_map_file_by_direct_mmap() */ 1134}
1135
1136/*****************************************************************************/
1137/*
1138 * ELF-FDPIC core dumper
1139 *
1140 * Modelled on fs/exec.c:aout_core_dump()
1141 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1142 *
1143 * Modelled on fs/binfmt_elf.c core dumper
1144 */
1145#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1146
1147/*
1148 * These are the only things you should do on a core-file: use only these
1149 * functions to write out all the necessary info.
1150 */
1151static int dump_write(struct file *file, const void *addr, int nr)
1152{
1153 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1154}
1155
1156static int dump_seek(struct file *file, loff_t off)
1157{
1158 if (file->f_op->llseek) {
1159 if (file->f_op->llseek(file, off, SEEK_SET) != off)
1160 return 0;
1161 } else {
1162 file->f_pos = off;
1163 }
1164 return 1;
1165}
1166
1167/*
1168 * Decide whether a segment is worth dumping; default is yes to be
1169 * sure (missing info is worse than too much; etc).
1170 * Personally I'd include everything, and use the coredump limit...
1171 *
1172 * I think we should skip something. But I am not sure how. H.J.
1173 */
1174static int maydump(struct vm_area_struct *vma)
1175{
1176 /* Do not dump I/O mapped devices or special mappings */
1177 if (vma->vm_flags & (VM_IO | VM_RESERVED)) {
1178 kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags);
1179 return 0;
1180 }
1181
1182 /* If we may not read the contents, don't allow us to dump
1183 * them either. "dump_write()" can't handle it anyway.
1184 */
1185 if (!(vma->vm_flags & VM_READ)) {
1186 kdcore("%08lx: %08lx: no (!read)", vma->vm_start, vma->vm_flags);
1187 return 0;
1188 }
1189
1190 /* Dump shared memory only if mapped from an anonymous file. */
1191 if (vma->vm_flags & VM_SHARED) {
1192 if (vma->vm_file->f_dentry->d_inode->i_nlink == 0) {
1193 kdcore("%08lx: %08lx: no (share)", vma->vm_start, vma->vm_flags);
1194 return 1;
1195 }
1196
1197 kdcore("%08lx: %08lx: no (share)", vma->vm_start, vma->vm_flags);
1198 return 0;
1199 }
1200
1201#ifdef CONFIG_MMU
1202 /* If it hasn't been written to, don't write it out */
1203 if (!vma->anon_vma) {
1204 kdcore("%08lx: %08lx: no (!anon)", vma->vm_start, vma->vm_flags);
1205 return 0;
1206 }
1207#endif
1208
1209 kdcore("%08lx: %08lx: yes", vma->vm_start, vma->vm_flags);
1210 return 1;
1211}
1212
1213/* An ELF note in memory */
1214struct memelfnote
1215{
1216 const char *name;
1217 int type;
1218 unsigned int datasz;
1219 void *data;
1220};
1221
1222static int notesize(struct memelfnote *en)
1223{
1224 int sz;
1225
1226 sz = sizeof(struct elf_note);
1227 sz += roundup(strlen(en->name) + 1, 4);
1228 sz += roundup(en->datasz, 4);
1229
1230 return sz;
1231}
1232
1233/* #define DEBUG */
1234
1235#define DUMP_WRITE(addr, nr) \
1236 do { if (!dump_write(file, (addr), (nr))) return 0; } while(0)
1237#define DUMP_SEEK(off) \
1238 do { if (!dump_seek(file, (off))) return 0; } while(0)
1239
1240static int writenote(struct memelfnote *men, struct file *file)
1241{
1242 struct elf_note en;
1243
1244 en.n_namesz = strlen(men->name) + 1;
1245 en.n_descsz = men->datasz;
1246 en.n_type = men->type;
1247
1248 DUMP_WRITE(&en, sizeof(en));
1249 DUMP_WRITE(men->name, en.n_namesz);
1250 /* XXX - cast from long long to long to avoid need for libgcc.a */
1251 DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */
1252 DUMP_WRITE(men->data, men->datasz);
1253 DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */
1254
1255 return 1;
1256}
1257#undef DUMP_WRITE
1258#undef DUMP_SEEK
1259
1260#define DUMP_WRITE(addr, nr) \
1261 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1262 goto end_coredump;
1263#define DUMP_SEEK(off) \
1264 if (!dump_seek(file, (off))) \
1265 goto end_coredump;
1266
1267static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
1268{
1269 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1270 elf->e_ident[EI_CLASS] = ELF_CLASS;
1271 elf->e_ident[EI_DATA] = ELF_DATA;
1272 elf->e_ident[EI_VERSION] = EV_CURRENT;
1273 elf->e_ident[EI_OSABI] = ELF_OSABI;
1274 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1275
1276 elf->e_type = ET_CORE;
1277 elf->e_machine = ELF_ARCH;
1278 elf->e_version = EV_CURRENT;
1279 elf->e_entry = 0;
1280 elf->e_phoff = sizeof(struct elfhdr);
1281 elf->e_shoff = 0;
1282 elf->e_flags = ELF_FDPIC_CORE_EFLAGS;
1283 elf->e_ehsize = sizeof(struct elfhdr);
1284 elf->e_phentsize = sizeof(struct elf_phdr);
1285 elf->e_phnum = segs;
1286 elf->e_shentsize = 0;
1287 elf->e_shnum = 0;
1288 elf->e_shstrndx = 0;
1289 return;
1290}
1291
1292static inline void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1293{
1294 phdr->p_type = PT_NOTE;
1295 phdr->p_offset = offset;
1296 phdr->p_vaddr = 0;
1297 phdr->p_paddr = 0;
1298 phdr->p_filesz = sz;
1299 phdr->p_memsz = 0;
1300 phdr->p_flags = 0;
1301 phdr->p_align = 0;
1302 return;
1303}
1304
1305static inline void fill_note(struct memelfnote *note, const char *name, int type,
1306 unsigned int sz, void *data)
1307{
1308 note->name = name;
1309 note->type = type;
1310 note->datasz = sz;
1311 note->data = data;
1312 return;
1313}
1314
1315/*
1316 * fill up all the fields in prstatus from the given task struct, except
1317 * registers which need to be filled up seperately.
1318 */
1319static void fill_prstatus(struct elf_prstatus *prstatus,
1320 struct task_struct *p, long signr)
1321{
1322 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1323 prstatus->pr_sigpend = p->pending.signal.sig[0];
1324 prstatus->pr_sighold = p->blocked.sig[0];
1325 prstatus->pr_pid = p->pid;
1326 prstatus->pr_ppid = p->parent->pid;
1327 prstatus->pr_pgrp = process_group(p);
1328 prstatus->pr_sid = p->signal->session;
1329 if (thread_group_leader(p)) {
1330 /*
1331 * This is the record for the group leader. Add in the
1332 * cumulative times of previous dead threads. This total
1333 * won't include the time of each live thread whose state
1334 * is included in the core dump. The final total reported
1335 * to our parent process when it calls wait4 will include
1336 * those sums as well as the little bit more time it takes
1337 * this and each other thread to finish dying after the
1338 * core dump synchronization phase.
1339 */
1340 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1341 &prstatus->pr_utime);
1342 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1343 &prstatus->pr_stime);
1344 } else {
1345 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1346 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1347 }
1348 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1349 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1350
1351 prstatus->pr_exec_fdpic_loadmap = p->mm->context.exec_fdpic_loadmap;
1352 prstatus->pr_interp_fdpic_loadmap = p->mm->context.interp_fdpic_loadmap;
1353}
1354
1355static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1356 struct mm_struct *mm)
1357{
1358 unsigned int i, len;
1359
1360 /* first copy the parameters from user space */
1361 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1362
1363 len = mm->arg_end - mm->arg_start;
1364 if (len >= ELF_PRARGSZ)
1365 len = ELF_PRARGSZ - 1;
1366 if (copy_from_user(&psinfo->pr_psargs,
1367 (const char __user *) mm->arg_start, len))
1368 return -EFAULT;
1369 for (i = 0; i < len; i++)
1370 if (psinfo->pr_psargs[i] == 0)
1371 psinfo->pr_psargs[i] = ' ';
1372 psinfo->pr_psargs[len] = 0;
1373
1374 psinfo->pr_pid = p->pid;
1375 psinfo->pr_ppid = p->parent->pid;
1376 psinfo->pr_pgrp = process_group(p);
1377 psinfo->pr_sid = p->signal->session;
1378
1379 i = p->state ? ffz(~p->state) + 1 : 0;
1380 psinfo->pr_state = i;
1381 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1382 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1383 psinfo->pr_nice = task_nice(p);
1384 psinfo->pr_flag = p->flags;
1385 SET_UID(psinfo->pr_uid, p->uid);
1386 SET_GID(psinfo->pr_gid, p->gid);
1387 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1388
1389 return 0;
1390}
1391
1392/* Here is the structure in which status of each thread is captured. */
1393struct elf_thread_status
1394{
1395 struct list_head list;
1396 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1397 elf_fpregset_t fpu; /* NT_PRFPREG */
1398 struct task_struct *thread;
1399#ifdef ELF_CORE_COPY_XFPREGS
1400 elf_fpxregset_t xfpu; /* NT_PRXFPREG */
1401#endif
1402 struct memelfnote notes[3];
1403 int num_notes;
1404};
1405
1406/*
1407 * In order to add the specific thread information for the elf file format,
1408 * we need to keep a linked list of every thread's pr_status and then create
1409 * a single section for them in the final core file.
1410 */
1411static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1412{
1413 struct task_struct *p = t->thread;
1414 int sz = 0;
1415
1416 t->num_notes = 0;
1417
1418 fill_prstatus(&t->prstatus, p, signr);
1419 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1420
1421 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1422 &t->prstatus);
1423 t->num_notes++;
1424 sz += notesize(&t->notes[0]);
1425
1426 t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, &t->fpu);
1427 if (t->prstatus.pr_fpvalid) {
1428 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1429 &t->fpu);
1430 t->num_notes++;
1431 sz += notesize(&t->notes[1]);
1432 }
1433
1434#ifdef ELF_CORE_COPY_XFPREGS
1435 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1436 fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1437 &t->xfpu);
1438 t->num_notes++;
1439 sz += notesize(&t->notes[2]);
1440 }
1441#endif
1442 return sz;
1443}
1444
1445/*
1446 * dump the segments for an MMU process
1447 */
1448#ifdef CONFIG_MMU
1449static int elf_fdpic_dump_segments(struct file *file, struct mm_struct *mm,
1450 size_t *size, unsigned long *limit)
1451{
1452 struct vm_area_struct *vma;
1453
1454 for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
1455 unsigned long addr;
1456
1457 if (!maydump(vma))
1458 continue;
1459
1460 for (addr = vma->vm_start;
1461 addr < vma->vm_end;
1462 addr += PAGE_SIZE
1463 ) {
1464 struct vm_area_struct *vma;
1465 struct page *page;
1466
1467 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1468 &page, &vma) <= 0) {
1469 DUMP_SEEK(file->f_pos + PAGE_SIZE);
1470 }
1471 else if (page == ZERO_PAGE(addr)) {
1472 DUMP_SEEK(file->f_pos + PAGE_SIZE);
1473 page_cache_release(page);
1474 }
1475 else {
1476 void *kaddr;
1477
1478 flush_cache_page(vma, addr, page_to_pfn(page));
1479 kaddr = kmap(page);
1480 if ((*size += PAGE_SIZE) > *limit ||
1481 !dump_write(file, kaddr, PAGE_SIZE)
1482 ) {
1483 kunmap(page);
1484 page_cache_release(page);
1485 return -EIO;
1486 }
1487 kunmap(page);
1488 page_cache_release(page);
1489 }
1490 }
1491 }
1492
1493 return 0;
1494
1495end_coredump:
1496 return -EFBIG;
1497}
1498#endif
1499
1500/*
1501 * dump the segments for a NOMMU process
1502 */
1503#ifndef CONFIG_MMU
1504static int elf_fdpic_dump_segments(struct file *file, struct mm_struct *mm,
1505 size_t *size, unsigned long *limit)
1506{
1507 struct vm_list_struct *vml;
1508
1509 for (vml = current->mm->context.vmlist; vml; vml = vml->next) {
1510 struct vm_area_struct *vma = vml->vma;
1511
1512 if (!maydump(vma))
1513 continue;
1514
1515 if ((*size += PAGE_SIZE) > *limit)
1516 return -EFBIG;
1517
1518 if (!dump_write(file, (void *) vma->vm_start,
1519 vma->vm_end - vma->vm_start))
1520 return -EIO;
1521 }
1522
1523 return 0;
1524}
1525#endif
1526
1527/*
1528 * Actual dumper
1529 *
1530 * This is a two-pass process; first we find the offsets of the bits,
1531 * and then they are actually written out. If we run out of core limit
1532 * we just truncate.
1533 */
1534static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1535 struct file *file)
1536{
1537#define NUM_NOTES 6
1538 int has_dumped = 0;
1539 mm_segment_t fs;
1540 int segs;
1541 size_t size = 0;
1542 int i;
1543 struct vm_area_struct *vma;
1544 struct elfhdr *elf = NULL;
1545 loff_t offset = 0, dataoff;
1546 unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1547 int numnote;
1548 struct memelfnote *notes = NULL;
1549 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1550 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1551 struct task_struct *g, *p;
1552 LIST_HEAD(thread_list);
1553 struct list_head *t;
1554 elf_fpregset_t *fpu = NULL;
1555#ifdef ELF_CORE_COPY_XFPREGS
1556 elf_fpxregset_t *xfpu = NULL;
1557#endif
1558 int thread_status_size = 0;
1559#ifndef CONFIG_MMU
1560 struct vm_list_struct *vml;
1561#endif
1562 elf_addr_t *auxv;
1563
1564 /*
1565 * We no longer stop all VM operations.
1566 *
1567 * This is because those proceses that could possibly change map_count
1568 * or the mmap / vma pages are now blocked in do_exit on current
1569 * finishing this core dump.
1570 *
1571 * Only ptrace can touch these memory addresses, but it doesn't change
1572 * the map_count or the pages allocated. So no possibility of crashing
1573 * exists while dumping the mm->vm_next areas to the core file.
1574 */
1575
1576 /* alloc memory for large data structures: too large to be on stack */
1577 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1578 if (!elf)
1579 goto cleanup;
1580 prstatus = kzalloc(sizeof(*prstatus), GFP_KERNEL);
1581 if (!prstatus)
1582 goto cleanup;
1583 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1584 if (!psinfo)
1585 goto cleanup;
1586 notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1587 if (!notes)
1588 goto cleanup;
1589 fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1590 if (!fpu)
1591 goto cleanup;
1592#ifdef ELF_CORE_COPY_XFPREGS
1593 xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1594 if (!xfpu)
1595 goto cleanup;
1596#endif
1597
1598 if (signr) {
1599 struct elf_thread_status *tmp;
1600 read_lock(&tasklist_lock);
1601 do_each_thread(g,p)
1602 if (current->mm == p->mm && current != p) {
1603 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1604 if (!tmp) {
1605 read_unlock(&tasklist_lock);
1606 goto cleanup;
1607 }
1608 INIT_LIST_HEAD(&tmp->list);
1609 tmp->thread = p;
1610 list_add(&tmp->list, &thread_list);
1611 }
1612 while_each_thread(g,p);
1613 read_unlock(&tasklist_lock);
1614 list_for_each(t, &thread_list) {
1615 struct elf_thread_status *tmp;
1616 int sz;
1617
1618 tmp = list_entry(t, struct elf_thread_status, list);
1619 sz = elf_dump_thread_status(signr, tmp);
1620 thread_status_size += sz;
1621 }
1622 }
1623
1624 /* now collect the dump for the current */
1625 fill_prstatus(prstatus, current, signr);
1626 elf_core_copy_regs(&prstatus->pr_reg, regs);
1627
1628#ifdef CONFIG_MMU
1629 segs = current->mm->map_count;
1630#else
1631 segs = 0;
1632 for (vml = current->mm->context.vmlist; vml; vml = vml->next)
1633 segs++;
1634#endif
1635#ifdef ELF_CORE_EXTRA_PHDRS
1636 segs += ELF_CORE_EXTRA_PHDRS;
1637#endif
1638
1639 /* Set up header */
1640 fill_elf_fdpic_header(elf, segs + 1); /* including notes section */
1641
1642 has_dumped = 1;
1643 current->flags |= PF_DUMPCORE;
1644
1645 /*
1646 * Set up the notes in similar form to SVR4 core dumps made
1647 * with info from their /proc.
1648 */
1649
1650 fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1651 fill_psinfo(psinfo, current->group_leader, current->mm);
1652 fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1653
1654 numnote = 2;
1655
1656 auxv = (elf_addr_t *) current->mm->saved_auxv;
1657
1658 i = 0;
1659 do
1660 i += 2;
1661 while (auxv[i - 2] != AT_NULL);
1662 fill_note(&notes[numnote++], "CORE", NT_AUXV,
1663 i * sizeof(elf_addr_t), auxv);
1664
1665 /* Try to dump the FPU. */
1666 if ((prstatus->pr_fpvalid =
1667 elf_core_copy_task_fpregs(current, regs, fpu)))
1668 fill_note(notes + numnote++,
1669 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1670#ifdef ELF_CORE_COPY_XFPREGS
1671 if (elf_core_copy_task_xfpregs(current, xfpu))
1672 fill_note(notes + numnote++,
1673 "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1674#endif
1675
1676 fs = get_fs();
1677 set_fs(KERNEL_DS);
1678
1679 DUMP_WRITE(elf, sizeof(*elf));
1680 offset += sizeof(*elf); /* Elf header */
1681 offset += (segs+1) * sizeof(struct elf_phdr); /* Program headers */
1682
1683 /* Write notes phdr entry */
1684 {
1685 struct elf_phdr phdr;
1686 int sz = 0;
1687
1688 for (i = 0; i < numnote; i++)
1689 sz += notesize(notes + i);
1690
1691 sz += thread_status_size;
1692
1693 fill_elf_note_phdr(&phdr, sz, offset);
1694 offset += sz;
1695 DUMP_WRITE(&phdr, sizeof(phdr));
1696 }
1697
1698 /* Page-align dumped data */
1699 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1700
1701 /* write program headers for segments dump */
1702 for (
1703#ifdef CONFIG_MMU
1704 vma = current->mm->mmap; vma; vma = vma->vm_next
1705#else
1706 vml = current->mm->context.vmlist; vml; vml = vml->next
1707#endif
1708 ) {
1709 struct elf_phdr phdr;
1710 size_t sz;
1711
1712#ifndef CONFIG_MMU
1713 vma = vml->vma;
1714#endif
1715
1716 sz = vma->vm_end - vma->vm_start;
1717
1718 phdr.p_type = PT_LOAD;
1719 phdr.p_offset = offset;
1720 phdr.p_vaddr = vma->vm_start;
1721 phdr.p_paddr = 0;
1722 phdr.p_filesz = maydump(vma) ? sz : 0;
1723 phdr.p_memsz = sz;
1724 offset += phdr.p_filesz;
1725 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1726 if (vma->vm_flags & VM_WRITE)
1727 phdr.p_flags |= PF_W;
1728 if (vma->vm_flags & VM_EXEC)
1729 phdr.p_flags |= PF_X;
1730 phdr.p_align = ELF_EXEC_PAGESIZE;
1731
1732 DUMP_WRITE(&phdr, sizeof(phdr));
1733 }
1734
1735#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1736 ELF_CORE_WRITE_EXTRA_PHDRS;
1737#endif
1738
1739 /* write out the notes section */
1740 for (i = 0; i < numnote; i++)
1741 if (!writenote(notes + i, file))
1742 goto end_coredump;
1743
1744 /* write out the thread status notes section */
1745 list_for_each(t, &thread_list) {
1746 struct elf_thread_status *tmp =
1747 list_entry(t, struct elf_thread_status, list);
1748
1749 for (i = 0; i < tmp->num_notes; i++)
1750 if (!writenote(&tmp->notes[i], file))
1751 goto end_coredump;
1752 }
1753
1754 DUMP_SEEK(dataoff);
1755
1756 if (elf_fdpic_dump_segments(file, current->mm, &size, &limit) < 0)
1757 goto end_coredump;
1758
1759#ifdef ELF_CORE_WRITE_EXTRA_DATA
1760 ELF_CORE_WRITE_EXTRA_DATA;
1761#endif
1762
1763 if (file->f_pos != offset) {
1764 /* Sanity check */
1765 printk(KERN_WARNING
1766 "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n",
1767 file->f_pos, offset);
1768 }
1769
1770end_coredump:
1771 set_fs(fs);
1772
1773cleanup:
1774 while (!list_empty(&thread_list)) {
1775 struct list_head *tmp = thread_list.next;
1776 list_del(tmp);
1777 kfree(list_entry(tmp, struct elf_thread_status, list));
1778 }
1779
1780 kfree(elf);
1781 kfree(prstatus);
1782 kfree(psinfo);
1783 kfree(notes);
1784 kfree(fpu);
1785#ifdef ELF_CORE_COPY_XFPREGS
1786 kfree(xfpu);
1787#endif
1788 return has_dumped;
1789#undef NUM_NOTES
1790}
1791
1792#endif /* USE_ELF_CORE_DUMP */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 37534573960b..045f98854f14 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -884,6 +884,61 @@ void bd_set_size(struct block_device *bdev, loff_t size)
884} 884}
885EXPORT_SYMBOL(bd_set_size); 885EXPORT_SYMBOL(bd_set_size);
886 886
887static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
888{
889 int ret = 0;
890 struct inode *bd_inode = bdev->bd_inode;
891 struct gendisk *disk = bdev->bd_disk;
892
893 mutex_lock_nested(&bdev->bd_mutex, subclass);
894 lock_kernel();
895 if (!--bdev->bd_openers) {
896 sync_blockdev(bdev);
897 kill_bdev(bdev);
898 }
899 if (bdev->bd_contains == bdev) {
900 if (disk->fops->release)
901 ret = disk->fops->release(bd_inode, NULL);
902 } else {
903 mutex_lock_nested(&bdev->bd_contains->bd_mutex,
904 subclass + 1);
905 bdev->bd_contains->bd_part_count--;
906 mutex_unlock(&bdev->bd_contains->bd_mutex);
907 }
908 if (!bdev->bd_openers) {
909 struct module *owner = disk->fops->owner;
910
911 put_disk(disk);
912 module_put(owner);
913
914 if (bdev->bd_contains != bdev) {
915 kobject_put(&bdev->bd_part->kobj);
916 bdev->bd_part = NULL;
917 }
918 bdev->bd_disk = NULL;
919 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
920 if (bdev != bdev->bd_contains)
921 __blkdev_put(bdev->bd_contains, subclass + 1);
922 bdev->bd_contains = NULL;
923 }
924 unlock_kernel();
925 mutex_unlock(&bdev->bd_mutex);
926 bdput(bdev);
927 return ret;
928}
929
930int blkdev_put(struct block_device *bdev)
931{
932 return __blkdev_put(bdev, BD_MUTEX_NORMAL);
933}
934EXPORT_SYMBOL(blkdev_put);
935
936int blkdev_put_partition(struct block_device *bdev)
937{
938 return __blkdev_put(bdev, BD_MUTEX_PARTITION);
939}
940EXPORT_SYMBOL(blkdev_put_partition);
941
887static int 942static int
888blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags); 943blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags);
889 944
@@ -980,7 +1035,7 @@ out_first:
980 bdev->bd_disk = NULL; 1035 bdev->bd_disk = NULL;
981 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1036 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
982 if (bdev != bdev->bd_contains) 1037 if (bdev != bdev->bd_contains)
983 blkdev_put(bdev->bd_contains); 1038 __blkdev_put(bdev->bd_contains, BD_MUTEX_WHOLE);
984 bdev->bd_contains = NULL; 1039 bdev->bd_contains = NULL;
985 put_disk(disk); 1040 put_disk(disk);
986 module_put(owner); 1041 module_put(owner);
@@ -1079,63 +1134,6 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1079 return res; 1134 return res;
1080} 1135}
1081 1136
1082static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
1083{
1084 int ret = 0;
1085 struct inode *bd_inode = bdev->bd_inode;
1086 struct gendisk *disk = bdev->bd_disk;
1087
1088 mutex_lock_nested(&bdev->bd_mutex, subclass);
1089 lock_kernel();
1090 if (!--bdev->bd_openers) {
1091 sync_blockdev(bdev);
1092 kill_bdev(bdev);
1093 }
1094 if (bdev->bd_contains == bdev) {
1095 if (disk->fops->release)
1096 ret = disk->fops->release(bd_inode, NULL);
1097 } else {
1098 mutex_lock_nested(&bdev->bd_contains->bd_mutex,
1099 subclass + 1);
1100 bdev->bd_contains->bd_part_count--;
1101 mutex_unlock(&bdev->bd_contains->bd_mutex);
1102 }
1103 if (!bdev->bd_openers) {
1104 struct module *owner = disk->fops->owner;
1105
1106 put_disk(disk);
1107 module_put(owner);
1108
1109 if (bdev->bd_contains != bdev) {
1110 kobject_put(&bdev->bd_part->kobj);
1111 bdev->bd_part = NULL;
1112 }
1113 bdev->bd_disk = NULL;
1114 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
1115 if (bdev != bdev->bd_contains)
1116 __blkdev_put(bdev->bd_contains, subclass + 1);
1117 bdev->bd_contains = NULL;
1118 }
1119 unlock_kernel();
1120 mutex_unlock(&bdev->bd_mutex);
1121 bdput(bdev);
1122 return ret;
1123}
1124
1125int blkdev_put(struct block_device *bdev)
1126{
1127 return __blkdev_put(bdev, BD_MUTEX_NORMAL);
1128}
1129
1130EXPORT_SYMBOL(blkdev_put);
1131
1132int blkdev_put_partition(struct block_device *bdev)
1133{
1134 return __blkdev_put(bdev, BD_MUTEX_PARTITION);
1135}
1136
1137EXPORT_SYMBOL(blkdev_put_partition);
1138
1139static int blkdev_close(struct inode * inode, struct file * filp) 1137static int blkdev_close(struct inode * inode, struct file * filp)
1140{ 1138{
1141 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 1139 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
diff --git a/fs/buffer.c b/fs/buffer.c
index 3660dcb97591..71649ef9b658 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -470,13 +470,18 @@ out:
470 pass does the actual I/O. */ 470 pass does the actual I/O. */
471void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) 471void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers)
472{ 472{
473 struct address_space *mapping = bdev->bd_inode->i_mapping;
474
475 if (mapping->nrpages == 0)
476 return;
477
473 invalidate_bh_lrus(); 478 invalidate_bh_lrus();
474 /* 479 /*
475 * FIXME: what about destroy_dirty_buffers? 480 * FIXME: what about destroy_dirty_buffers?
476 * We really want to use invalidate_inode_pages2() for 481 * We really want to use invalidate_inode_pages2() for
477 * that, but not until that's cleaned up. 482 * that, but not until that's cleaned up.
478 */ 483 */
479 invalidate_inode_pages(bdev->bd_inode->i_mapping); 484 invalidate_inode_pages(mapping);
480} 485}
481 486
482/* 487/*
diff --git a/fs/char_dev.c b/fs/char_dev.c
index a4cbc6706ef0..3483d3cf8087 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -182,6 +182,28 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
182 return 0; 182 return 0;
183} 183}
184 184
185/**
186 * register_chrdev() - Register a major number for character devices.
187 * @major: major device number or 0 for dynamic allocation
188 * @name: name of this range of devices
189 * @fops: file operations associated with this devices
190 *
191 * If @major == 0 this functions will dynamically allocate a major and return
192 * its number.
193 *
194 * If @major > 0 this function will attempt to reserve a device with the given
195 * major number and will return zero on success.
196 *
197 * Returns a -ve errno on failure.
198 *
199 * The name of this device has nothing to do with the name of the device in
200 * /dev. It only helps to keep track of the different owners of devices. If
201 * your module name has only one type of devices it's ok to use e.g. the name
202 * of the module here.
203 *
204 * This function registers a range of 256 minor numbers. The first minor number
205 * is 0.
206 */
185int register_chrdev(unsigned int major, const char *name, 207int register_chrdev(unsigned int major, const char *name,
186 const struct file_operations *fops) 208 const struct file_operations *fops)
187{ 209{
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index a61d17ed1827..0feb3bd49cb8 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,13 @@
1Version 1.45
2------------
3Do not time out lockw calls when using posix extensions. Do not
4time out requests if server still responding reasonably fast
5on requests on other threads. Improve POSIX locking emulation,
6(lock cancel now works, and unlock of merged range works even
7to Windows servers now). Fix oops on mount to lanman servers
8(win9x, os/2 etc.) when null password. Do not send listxattr
9(SMB to query all EAs) if nouser_xattr specified.
10
1Version 1.44 11Version 1.44
2------------ 12------------
3Rewritten sessionsetup support, including support for legacy SMB 13Rewritten sessionsetup support, including support for legacy SMB
diff --git a/fs/cifs/README b/fs/cifs/README
index 7986d0d97ace..5f0e1bd64fee 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -408,7 +408,7 @@ A partial list of the supported mount options follows:
408 user_xattr Allow getting and setting user xattrs as OS/2 EAs (extended 408 user_xattr Allow getting and setting user xattrs as OS/2 EAs (extended
409 attributes) to the server (default) e.g. via setfattr 409 attributes) to the server (default) e.g. via setfattr
410 and getfattr utilities. 410 and getfattr utilities.
411 nouser_xattr Do not allow getfattr/setfattr to get/set xattrs 411 nouser_xattr Do not allow getfattr/setfattr to get/set/list xattrs
412 mapchars Translate six of the seven reserved characters (not backslash) 412 mapchars Translate six of the seven reserved characters (not backslash)
413 *?<>|: 413 *?<>|:
414 to the remap range (above 0xF000), which also 414 to the remap range (above 0xF000), which also
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index a89efaf78a26..4bc250b2d9fc 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -277,7 +277,8 @@ void calc_lanman_hash(struct cifsSesInfo * ses, char * lnm_session_key)
277 return; 277 return;
278 278
279 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); 279 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
280 strncpy(password_with_pad, ses->password, CIFS_ENCPWD_SIZE); 280 if(ses->password)
281 strncpy(password_with_pad, ses->password, CIFS_ENCPWD_SIZE);
281 282
282 if((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) 283 if((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0)
283 if(extended_security & CIFSSEC_MAY_PLNTXT) { 284 if(extended_security & CIFSSEC_MAY_PLNTXT) {
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c28ede599946..3cd750029be2 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -402,7 +402,6 @@ static struct quotactl_ops cifs_quotactl_ops = {
402}; 402};
403#endif 403#endif
404 404
405#ifdef CONFIG_CIFS_EXPERIMENTAL
406static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags) 405static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags)
407{ 406{
408 struct cifs_sb_info *cifs_sb; 407 struct cifs_sb_info *cifs_sb;
@@ -422,7 +421,7 @@ static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags)
422 tcon->tidStatus = CifsExiting; 421 tcon->tidStatus = CifsExiting;
423 up(&tcon->tconSem); 422 up(&tcon->tconSem);
424 423
425 /* cancel_brl_requests(tcon); */ 424 /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */
426 /* cancel_notify_requests(tcon); */ 425 /* cancel_notify_requests(tcon); */
427 if(tcon->ses && tcon->ses->server) 426 if(tcon->ses && tcon->ses->server)
428 { 427 {
@@ -438,7 +437,6 @@ static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags)
438 437
439 return; 438 return;
440} 439}
441#endif
442 440
443static int cifs_remount(struct super_block *sb, int *flags, char *data) 441static int cifs_remount(struct super_block *sb, int *flags, char *data)
444{ 442{
@@ -457,9 +455,7 @@ struct super_operations cifs_super_ops = {
457 unless later we add lazy close of inodes or unless the kernel forgets to call 455 unless later we add lazy close of inodes or unless the kernel forgets to call
458 us with the same number of releases (closes) as opens */ 456 us with the same number of releases (closes) as opens */
459 .show_options = cifs_show_options, 457 .show_options = cifs_show_options,
460#ifdef CONFIG_CIFS_EXPERIMENTAL
461 .umount_begin = cifs_umount_begin, 458 .umount_begin = cifs_umount_begin,
462#endif
463 .remount_fs = cifs_remount, 459 .remount_fs = cifs_remount,
464}; 460};
465 461
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 8f75c6f24701..39ee8ef3bdeb 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -100,5 +100,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
100extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 100extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
101extern int cifs_ioctl (struct inode * inode, struct file * filep, 101extern int cifs_ioctl (struct inode * inode, struct file * filep,
102 unsigned int command, unsigned long arg); 102 unsigned int command, unsigned long arg);
103#define CIFS_VERSION "1.44" 103#define CIFS_VERSION "1.45"
104#endif /* _CIFSFS_H */ 104#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 6d7cf5f3bc0b..b24006c47df1 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -3,6 +3,7 @@
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2006 4 * Copyright (C) International Business Machines Corp., 2002,2006
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * Jeremy Allison (jra@samba.org)
6 * 7 *
7 * This library is free software; you can redistribute it and/or modify 8 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published 9 * it under the terms of the GNU Lesser General Public License as published
@@ -158,7 +159,8 @@ struct TCP_Server_Info {
158 /* 16th byte of RFC1001 workstation name is always null */ 159 /* 16th byte of RFC1001 workstation name is always null */
159 char workstation_RFC1001_name[SERVER_NAME_LEN_WITH_NULL]; 160 char workstation_RFC1001_name[SERVER_NAME_LEN_WITH_NULL];
160 __u32 sequence_number; /* needed for CIFS PDU signature */ 161 __u32 sequence_number; /* needed for CIFS PDU signature */
161 char mac_signing_key[CIFS_SESS_KEY_SIZE + 16]; 162 char mac_signing_key[CIFS_SESS_KEY_SIZE + 16];
163 unsigned long lstrp; /* when we got last response from this server */
162}; 164};
163 165
164/* 166/*
@@ -266,14 +268,14 @@ struct cifsTconInfo {
266}; 268};
267 269
268/* 270/*
269 * This info hangs off the cifsFileInfo structure. This is used to track 271 * This info hangs off the cifsFileInfo structure, pointed to by llist.
270 * byte stream locks on the file 272 * This is used to track byte stream locks on the file
271 */ 273 */
272struct cifsLockInfo { 274struct cifsLockInfo {
273 struct cifsLockInfo *next; 275 struct list_head llist; /* pointer to next cifsLockInfo */
274 int start; 276 __u64 offset;
275 int length; 277 __u64 length;
276 int type; 278 __u8 type;
277}; 279};
278 280
279/* 281/*
@@ -304,6 +306,8 @@ struct cifsFileInfo {
304 /* lock scope id (0 if none) */ 306 /* lock scope id (0 if none) */
305 struct file * pfile; /* needed for writepage */ 307 struct file * pfile; /* needed for writepage */
306 struct inode * pInode; /* needed for oplock break */ 308 struct inode * pInode; /* needed for oplock break */
309 struct semaphore lock_sem;
310 struct list_head llist; /* list of byte range locks we have. */
307 unsigned closePend:1; /* file is marked to close */ 311 unsigned closePend:1; /* file is marked to close */
308 unsigned invalidHandle:1; /* file closed via session abend */ 312 unsigned invalidHandle:1; /* file closed via session abend */
309 atomic_t wrtPending; /* handle in use - defer close */ 313 atomic_t wrtPending; /* handle in use - defer close */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index a5ddc62d6fe6..b35c55c3c8bb 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -50,6 +50,10 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *,
50extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *, 50extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *,
51 struct kvec *, int /* nvec to send */, 51 struct kvec *, int /* nvec to send */,
52 int * /* type of buf returned */ , const int long_op); 52 int * /* type of buf returned */ , const int long_op);
53extern int SendReceiveBlockingLock(const unsigned int /* xid */ , struct cifsTconInfo *,
54 struct smb_hdr * /* input */ ,
55 struct smb_hdr * /* out */ ,
56 int * /* bytes returned */);
53extern int checkSMBhdr(struct smb_hdr *smb, __u16 mid); 57extern int checkSMBhdr(struct smb_hdr *smb, __u16 mid);
54extern int checkSMB(struct smb_hdr *smb, __u16 mid, int length); 58extern int checkSMB(struct smb_hdr *smb, __u16 mid, int length);
55extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *); 59extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 19678c575dfc..075d8fb3d376 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -477,7 +477,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
477 /* BB get server time for time conversions and add 477 /* BB get server time for time conversions and add
478 code to use it and timezone since this is not UTC */ 478 code to use it and timezone since this is not UTC */
479 479
480 if (rsp->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { 480 if (rsp->EncryptionKeyLength == cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) {
481 memcpy(server->cryptKey, rsp->EncryptionKey, 481 memcpy(server->cryptKey, rsp->EncryptionKey,
482 CIFS_CRYPTO_KEY_SIZE); 482 CIFS_CRYPTO_KEY_SIZE);
483 } else if (server->secMode & SECMODE_PW_ENCRYPT) { 483 } else if (server->secMode & SECMODE_PW_ENCRYPT) {
@@ -1460,8 +1460,13 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1460 pSMB->hdr.smb_buf_length += count; 1460 pSMB->hdr.smb_buf_length += count;
1461 pSMB->ByteCount = cpu_to_le16(count); 1461 pSMB->ByteCount = cpu_to_le16(count);
1462 1462
1463 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 1463 if (waitFlag) {
1464 rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB,
1465 (struct smb_hdr *) pSMBr, &bytes_returned);
1466 } else {
1467 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
1464 (struct smb_hdr *) pSMBr, &bytes_returned, timeout); 1468 (struct smb_hdr *) pSMBr, &bytes_returned, timeout);
1469 }
1465 cifs_stats_inc(&tcon->num_locks); 1470 cifs_stats_inc(&tcon->num_locks);
1466 if (rc) { 1471 if (rc) {
1467 cFYI(1, ("Send error in Lock = %d", rc)); 1472 cFYI(1, ("Send error in Lock = %d", rc));
@@ -1484,6 +1489,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1484 char *data_offset; 1489 char *data_offset;
1485 struct cifs_posix_lock *parm_data; 1490 struct cifs_posix_lock *parm_data;
1486 int rc = 0; 1491 int rc = 0;
1492 int timeout = 0;
1487 int bytes_returned = 0; 1493 int bytes_returned = 0;
1488 __u16 params, param_offset, offset, byte_count, count; 1494 __u16 params, param_offset, offset, byte_count, count;
1489 1495
@@ -1503,7 +1509,6 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1503 pSMB->MaxSetupCount = 0; 1509 pSMB->MaxSetupCount = 0;
1504 pSMB->Reserved = 0; 1510 pSMB->Reserved = 0;
1505 pSMB->Flags = 0; 1511 pSMB->Flags = 0;
1506 pSMB->Timeout = 0;
1507 pSMB->Reserved2 = 0; 1512 pSMB->Reserved2 = 0;
1508 param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; 1513 param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4;
1509 offset = param_offset + params; 1514 offset = param_offset + params;
@@ -1529,8 +1534,13 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1529 (((char *) &pSMB->hdr.Protocol) + offset); 1534 (((char *) &pSMB->hdr.Protocol) + offset);
1530 1535
1531 parm_data->lock_type = cpu_to_le16(lock_type); 1536 parm_data->lock_type = cpu_to_le16(lock_type);
1532 if(waitFlag) 1537 if(waitFlag) {
1538 timeout = 3; /* blocking operation, no timeout */
1533 parm_data->lock_flags = cpu_to_le16(1); 1539 parm_data->lock_flags = cpu_to_le16(1);
1540 pSMB->Timeout = cpu_to_le32(-1);
1541 } else
1542 pSMB->Timeout = 0;
1543
1534 parm_data->pid = cpu_to_le32(current->tgid); 1544 parm_data->pid = cpu_to_le32(current->tgid);
1535 parm_data->start = cpu_to_le64(pLockData->fl_start); 1545 parm_data->start = cpu_to_le64(pLockData->fl_start);
1536 parm_data->length = cpu_to_le64(len); /* normalize negative numbers */ 1546 parm_data->length = cpu_to_le64(len); /* normalize negative numbers */
@@ -1541,8 +1551,14 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1541 pSMB->Reserved4 = 0; 1551 pSMB->Reserved4 = 0;
1542 pSMB->hdr.smb_buf_length += byte_count; 1552 pSMB->hdr.smb_buf_length += byte_count;
1543 pSMB->ByteCount = cpu_to_le16(byte_count); 1553 pSMB->ByteCount = cpu_to_le16(byte_count);
1544 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 1554 if (waitFlag) {
1545 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 1555 rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB,
1556 (struct smb_hdr *) pSMBr, &bytes_returned);
1557 } else {
1558 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
1559 (struct smb_hdr *) pSMBr, &bytes_returned, timeout);
1560 }
1561
1546 if (rc) { 1562 if (rc) {
1547 cFYI(1, ("Send error in Posix Lock = %d", rc)); 1563 cFYI(1, ("Send error in Posix Lock = %d", rc));
1548 } else if (get_flag) { 1564 } else if (get_flag) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 876eb9ef85fe..5d394c726860 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -182,6 +182,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
182 182
183 while ((server->tcpStatus != CifsExiting) && (server->tcpStatus != CifsGood)) 183 while ((server->tcpStatus != CifsExiting) && (server->tcpStatus != CifsGood))
184 { 184 {
185 try_to_freeze();
185 if(server->protocolType == IPV6) { 186 if(server->protocolType == IPV6) {
186 rc = ipv6_connect(&server->addr.sockAddr6,&server->ssocket); 187 rc = ipv6_connect(&server->addr.sockAddr6,&server->ssocket);
187 } else { 188 } else {
@@ -612,6 +613,10 @@ multi_t2_fnd:
612#ifdef CONFIG_CIFS_STATS2 613#ifdef CONFIG_CIFS_STATS2
613 mid_entry->when_received = jiffies; 614 mid_entry->when_received = jiffies;
614#endif 615#endif
616 /* so we do not time out requests to server
617 which is still responding (since server could
618 be busy but not dead) */
619 server->lstrp = jiffies;
615 break; 620 break;
616 } 621 }
617 } 622 }
@@ -1266,33 +1271,35 @@ find_unc(__be32 new_target_ip_addr, char *uncName, char *userName)
1266 1271
1267 read_lock(&GlobalSMBSeslock); 1272 read_lock(&GlobalSMBSeslock);
1268 list_for_each(tmp, &GlobalTreeConnectionList) { 1273 list_for_each(tmp, &GlobalTreeConnectionList) {
1269 cFYI(1, ("Next tcon - ")); 1274 cFYI(1, ("Next tcon"));
1270 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); 1275 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
1271 if (tcon->ses) { 1276 if (tcon->ses) {
1272 if (tcon->ses->server) { 1277 if (tcon->ses->server) {
1273 cFYI(1, 1278 cFYI(1,
1274 (" old ip addr: %x == new ip %x ?", 1279 ("old ip addr: %x == new ip %x ?",
1275 tcon->ses->server->addr.sockAddr.sin_addr. 1280 tcon->ses->server->addr.sockAddr.sin_addr.
1276 s_addr, new_target_ip_addr)); 1281 s_addr, new_target_ip_addr));
1277 if (tcon->ses->server->addr.sockAddr.sin_addr. 1282 if (tcon->ses->server->addr.sockAddr.sin_addr.
1278 s_addr == new_target_ip_addr) { 1283 s_addr == new_target_ip_addr) {
1279 /* BB lock tcon and server and tcp session and increment use count here? */ 1284 /* BB lock tcon, server and tcp session and increment use count here? */
1280 /* found a match on the TCP session */ 1285 /* found a match on the TCP session */
1281 /* BB check if reconnection needed */ 1286 /* BB check if reconnection needed */
1282 cFYI(1,("Matched ip, old UNC: %s == new: %s ?", 1287 cFYI(1,("IP match, old UNC: %s new: %s",
1283 tcon->treeName, uncName)); 1288 tcon->treeName, uncName));
1284 if (strncmp 1289 if (strncmp
1285 (tcon->treeName, uncName, 1290 (tcon->treeName, uncName,
1286 MAX_TREE_SIZE) == 0) { 1291 MAX_TREE_SIZE) == 0) {
1287 cFYI(1, 1292 cFYI(1,
1288 ("Matched UNC, old user: %s == new: %s ?", 1293 ("and old usr: %s new: %s",
1289 tcon->treeName, uncName)); 1294 tcon->treeName, uncName));
1290 if (strncmp 1295 if (strncmp
1291 (tcon->ses->userName, 1296 (tcon->ses->userName,
1292 userName, 1297 userName,
1293 MAX_USERNAME_SIZE) == 0) { 1298 MAX_USERNAME_SIZE) == 0) {
1294 read_unlock(&GlobalSMBSeslock); 1299 read_unlock(&GlobalSMBSeslock);
1295 return tcon;/* also matched user (smb session)*/ 1300 /* matched smb session
1301 (user name */
1302 return tcon;
1296 } 1303 }
1297 } 1304 }
1298 } 1305 }
@@ -1969,7 +1976,18 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1969 } 1976 }
1970 1977
1971 cFYI(1,("Negotiate caps 0x%x",(int)cap)); 1978 cFYI(1,("Negotiate caps 0x%x",(int)cap));
1972 1979#ifdef CONFIG_CIFS_DEBUG2
1980 if(cap & CIFS_UNIX_FCNTL_CAP)
1981 cFYI(1,("FCNTL cap"));
1982 if(cap & CIFS_UNIX_EXTATTR_CAP)
1983 cFYI(1,("EXTATTR cap"));
1984 if(cap & CIFS_UNIX_POSIX_PATHNAMES_CAP)
1985 cFYI(1,("POSIX path cap"));
1986 if(cap & CIFS_UNIX_XATTR_CAP)
1987 cFYI(1,("XATTR cap"));
1988 if(cap & CIFS_UNIX_POSIX_ACL_CAP)
1989 cFYI(1,("POSIX ACL cap"));
1990#endif /* CIFS_DEBUG2 */
1973 if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) { 1991 if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) {
1974 cFYI(1,("setting capabilities failed")); 1992 cFYI(1,("setting capabilities failed"));
1975 } 1993 }
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index ba4cbe9b0684..914239d53634 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -267,6 +267,10 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
267 pCifsFile->invalidHandle = FALSE; 267 pCifsFile->invalidHandle = FALSE;
268 pCifsFile->closePend = FALSE; 268 pCifsFile->closePend = FALSE;
269 init_MUTEX(&pCifsFile->fh_sem); 269 init_MUTEX(&pCifsFile->fh_sem);
270 init_MUTEX(&pCifsFile->lock_sem);
271 INIT_LIST_HEAD(&pCifsFile->llist);
272 atomic_set(&pCifsFile->wrtPending,0);
273
270 /* set the following in open now 274 /* set the following in open now
271 pCifsFile->pfile = file; */ 275 pCifsFile->pfile = file; */
272 write_lock(&GlobalSMBSeslock); 276 write_lock(&GlobalSMBSeslock);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 944d2b9e092d..e9c5ba9084fc 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -5,6 +5,7 @@
5 * 5 *
6 * Copyright (C) International Business Machines Corp., 2002,2003 6 * Copyright (C) International Business Machines Corp., 2002,2003
7 * Author(s): Steve French (sfrench@us.ibm.com) 7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
8 * 9 *
9 * This library is free software; you can redistribute it and/or modify 10 * This library is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License as published 11 * it under the terms of the GNU Lesser General Public License as published
@@ -47,6 +48,8 @@ static inline struct cifsFileInfo *cifs_init_private(
47 private_data->netfid = netfid; 48 private_data->netfid = netfid;
48 private_data->pid = current->tgid; 49 private_data->pid = current->tgid;
49 init_MUTEX(&private_data->fh_sem); 50 init_MUTEX(&private_data->fh_sem);
51 init_MUTEX(&private_data->lock_sem);
52 INIT_LIST_HEAD(&private_data->llist);
50 private_data->pfile = file; /* needed for writepage */ 53 private_data->pfile = file; /* needed for writepage */
51 private_data->pInode = inode; 54 private_data->pInode = inode;
52 private_data->invalidHandle = FALSE; 55 private_data->invalidHandle = FALSE;
@@ -473,6 +476,8 @@ int cifs_close(struct inode *inode, struct file *file)
473 cifs_sb = CIFS_SB(inode->i_sb); 476 cifs_sb = CIFS_SB(inode->i_sb);
474 pTcon = cifs_sb->tcon; 477 pTcon = cifs_sb->tcon;
475 if (pSMBFile) { 478 if (pSMBFile) {
479 struct cifsLockInfo *li, *tmp;
480
476 pSMBFile->closePend = TRUE; 481 pSMBFile->closePend = TRUE;
477 if (pTcon) { 482 if (pTcon) {
478 /* no sense reconnecting to close a file that is 483 /* no sense reconnecting to close a file that is
@@ -496,6 +501,16 @@ int cifs_close(struct inode *inode, struct file *file)
496 pSMBFile->netfid); 501 pSMBFile->netfid);
497 } 502 }
498 } 503 }
504
505 /* Delete any outstanding lock records.
506 We'll lose them when the file is closed anyway. */
507 down(&pSMBFile->lock_sem);
508 list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) {
509 list_del(&li->llist);
510 kfree(li);
511 }
512 up(&pSMBFile->lock_sem);
513
499 write_lock(&GlobalSMBSeslock); 514 write_lock(&GlobalSMBSeslock);
500 list_del(&pSMBFile->flist); 515 list_del(&pSMBFile->flist);
501 list_del(&pSMBFile->tlist); 516 list_del(&pSMBFile->tlist);
@@ -570,6 +585,21 @@ int cifs_closedir(struct inode *inode, struct file *file)
570 return rc; 585 return rc;
571} 586}
572 587
588static int store_file_lock(struct cifsFileInfo *fid, __u64 len,
589 __u64 offset, __u8 lockType)
590{
591 struct cifsLockInfo *li = kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
592 if (li == NULL)
593 return -ENOMEM;
594 li->offset = offset;
595 li->length = len;
596 li->type = lockType;
597 down(&fid->lock_sem);
598 list_add(&li->llist, &fid->llist);
599 up(&fid->lock_sem);
600 return 0;
601}
602
573int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) 603int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
574{ 604{
575 int rc, xid; 605 int rc, xid;
@@ -581,6 +611,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
581 struct cifsTconInfo *pTcon; 611 struct cifsTconInfo *pTcon;
582 __u16 netfid; 612 __u16 netfid;
583 __u8 lockType = LOCKING_ANDX_LARGE_FILES; 613 __u8 lockType = LOCKING_ANDX_LARGE_FILES;
614 int posix_locking;
584 615
585 length = 1 + pfLock->fl_end - pfLock->fl_start; 616 length = 1 + pfLock->fl_end - pfLock->fl_start;
586 rc = -EACCES; 617 rc = -EACCES;
@@ -639,15 +670,14 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
639 } 670 }
640 netfid = ((struct cifsFileInfo *)file->private_data)->netfid; 671 netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
641 672
673 posix_locking = (cifs_sb->tcon->ses->capabilities & CAP_UNIX) &&
674 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability));
642 675
643 /* BB add code here to normalize offset and length to 676 /* BB add code here to normalize offset and length to
644 account for negative length which we can not accept over the 677 account for negative length which we can not accept over the
645 wire */ 678 wire */
646 if (IS_GETLK(cmd)) { 679 if (IS_GETLK(cmd)) {
647 if(experimEnabled && 680 if(posix_locking) {
648 (cifs_sb->tcon->ses->capabilities & CAP_UNIX) &&
649 (CIFS_UNIX_FCNTL_CAP &
650 le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability))) {
651 int posix_lock_type; 681 int posix_lock_type;
652 if(lockType & LOCKING_ANDX_SHARED_LOCK) 682 if(lockType & LOCKING_ANDX_SHARED_LOCK)
653 posix_lock_type = CIFS_RDLCK; 683 posix_lock_type = CIFS_RDLCK;
@@ -683,10 +713,15 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
683 FreeXid(xid); 713 FreeXid(xid);
684 return rc; 714 return rc;
685 } 715 }
686 if (experimEnabled && 716
687 (cifs_sb->tcon->ses->capabilities & CAP_UNIX) && 717 if (!numLock && !numUnlock) {
688 (CIFS_UNIX_FCNTL_CAP & 718 /* if no lock or unlock then nothing
689 le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability))) { 719 to do since we do not know what it is */
720 FreeXid(xid);
721 return -EOPNOTSUPP;
722 }
723
724 if (posix_locking) {
690 int posix_lock_type; 725 int posix_lock_type;
691 if(lockType & LOCKING_ANDX_SHARED_LOCK) 726 if(lockType & LOCKING_ANDX_SHARED_LOCK)
692 posix_lock_type = CIFS_RDLCK; 727 posix_lock_type = CIFS_RDLCK;
@@ -695,18 +730,46 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
695 730
696 if(numUnlock == 1) 731 if(numUnlock == 1)
697 posix_lock_type = CIFS_UNLCK; 732 posix_lock_type = CIFS_UNLCK;
698 else if(numLock == 0) { 733
699 /* if no lock or unlock then nothing
700 to do since we do not know what it is */
701 FreeXid(xid);
702 return -EOPNOTSUPP;
703 }
704 rc = CIFSSMBPosixLock(xid, pTcon, netfid, 0 /* set */, 734 rc = CIFSSMBPosixLock(xid, pTcon, netfid, 0 /* set */,
705 length, pfLock, 735 length, pfLock,
706 posix_lock_type, wait_flag); 736 posix_lock_type, wait_flag);
707 } else 737 } else {
708 rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start, 738 struct cifsFileInfo *fid = (struct cifsFileInfo *)file->private_data;
709 numUnlock, numLock, lockType, wait_flag); 739
740 if (numLock) {
741 rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start,
742 0, numLock, lockType, wait_flag);
743
744 if (rc == 0) {
745 /* For Windows locks we must store them. */
746 rc = store_file_lock(fid, length,
747 pfLock->fl_start, lockType);
748 }
749 } else if (numUnlock) {
750 /* For each stored lock that this unlock overlaps
751 completely, unlock it. */
752 int stored_rc = 0;
753 struct cifsLockInfo *li, *tmp;
754
755 down(&fid->lock_sem);
756 list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
757 if (pfLock->fl_start <= li->offset &&
758 length >= li->length) {
759 stored_rc = CIFSSMBLock(xid, pTcon, netfid,
760 li->length, li->offset,
761 1, 0, li->type, FALSE);
762 if (stored_rc)
763 rc = stored_rc;
764
765 list_del(&li->llist);
766 kfree(li);
767 }
768 }
769 up(&fid->lock_sem);
770 }
771 }
772
710 if (pfLock->fl_flags & FL_POSIX) 773 if (pfLock->fl_flags & FL_POSIX)
711 posix_lock_file_wait(file, pfLock); 774 posix_lock_file_wait(file, pfLock);
712 FreeXid(xid); 775 FreeXid(xid);
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index b66eff5dc624..ce87550e918f 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -72,6 +72,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = {
72 {ERRinvlevel,-EOPNOTSUPP}, 72 {ERRinvlevel,-EOPNOTSUPP},
73 {ERRdirnotempty, -ENOTEMPTY}, 73 {ERRdirnotempty, -ENOTEMPTY},
74 {ERRnotlocked, -ENOLCK}, 74 {ERRnotlocked, -ENOLCK},
75 {ERRcancelviolation, -ENOLCK},
75 {ERRalreadyexists, -EEXIST}, 76 {ERRalreadyexists, -EEXIST},
76 {ERRmoredata, -EOVERFLOW}, 77 {ERRmoredata, -EOVERFLOW},
77 {ERReasnotsupported,-EOPNOTSUPP}, 78 {ERReasnotsupported,-EOPNOTSUPP},
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 03bbcb377913..105761e3ba0e 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -556,7 +556,7 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
556 FIND_FILE_STANDARD_INFO * pFindData = 556 FIND_FILE_STANDARD_INFO * pFindData =
557 (FIND_FILE_STANDARD_INFO *)current_entry; 557 (FIND_FILE_STANDARD_INFO *)current_entry;
558 filename = &pFindData->FileName[0]; 558 filename = &pFindData->FileName[0];
559 len = le32_to_cpu(pFindData->FileNameLength); 559 len = pFindData->FileNameLength;
560 } else { 560 } else {
561 cFYI(1,("Unknown findfirst level %d",cfile->srch_inf.info_level)); 561 cFYI(1,("Unknown findfirst level %d",cfile->srch_inf.info_level));
562 } 562 }
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7202d534ef0b..d1705ab8136e 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -372,7 +372,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
372 372
373 /* no capabilities flags in old lanman negotiation */ 373 /* no capabilities flags in old lanman negotiation */
374 374
375 pSMB->old_req.PasswordLength = CIFS_SESS_KEY_SIZE; 375 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
376 /* BB calculate hash with password */ 376 /* BB calculate hash with password */
377 /* and copy into bcc */ 377 /* and copy into bcc */
378 378
diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h
index cd41c67ff8d3..212c3c296409 100644
--- a/fs/cifs/smberr.h
+++ b/fs/cifs/smberr.h
@@ -95,6 +95,7 @@
95#define ERRinvlevel 124 95#define ERRinvlevel 124
96#define ERRdirnotempty 145 96#define ERRdirnotempty 145
97#define ERRnotlocked 158 97#define ERRnotlocked 158
98#define ERRcancelviolation 173
98#define ERRalreadyexists 183 99#define ERRalreadyexists 183
99#define ERRbadpipe 230 100#define ERRbadpipe 230
100#define ERRpipebusy 231 101#define ERRpipebusy 231
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 17ba329e2b3d..48d47b46b1fb 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -3,7 +3,8 @@
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2005 4 * Copyright (C) International Business Machines Corp., 2002,2005
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 * Jeremy Allison (jra@samba.org) 2006.
7 *
7 * This library is free software; you can redistribute it and/or modify 8 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published 9 * it under the terms of the GNU Lesser General Public License as published
9 * by the Free Software Foundation; either version 2.1 of the License, or 10 * by the Free Software Foundation; either version 2.1 of the License, or
@@ -36,7 +37,7 @@ extern mempool_t *cifs_mid_poolp;
36extern kmem_cache_t *cifs_oplock_cachep; 37extern kmem_cache_t *cifs_oplock_cachep;
37 38
38static struct mid_q_entry * 39static struct mid_q_entry *
39AllocMidQEntry(struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) 40AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
40{ 41{
41 struct mid_q_entry *temp; 42 struct mid_q_entry *temp;
42 43
@@ -203,6 +204,10 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
203 rc = 0; 204 rc = 0;
204 } 205 }
205 206
207 /* Don't want to modify the buffer as a
208 side effect of this call. */
209 smb_buffer->smb_buf_length = smb_buf_length;
210
206 return rc; 211 return rc;
207} 212}
208 213
@@ -217,6 +222,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
217 unsigned int len = iov[0].iov_len; 222 unsigned int len = iov[0].iov_len;
218 unsigned int total_len; 223 unsigned int total_len;
219 int first_vec = 0; 224 int first_vec = 0;
225 unsigned int smb_buf_length = smb_buffer->smb_buf_length;
220 226
221 if(ssocket == NULL) 227 if(ssocket == NULL)
222 return -ENOTSOCK; /* BB eventually add reconnect code here */ 228 return -ENOTSOCK; /* BB eventually add reconnect code here */
@@ -293,36 +299,15 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
293 } else 299 } else
294 rc = 0; 300 rc = 0;
295 301
302 /* Don't want to modify the buffer as a
303 side effect of this call. */
304 smb_buffer->smb_buf_length = smb_buf_length;
305
296 return rc; 306 return rc;
297} 307}
298 308
299int 309static int wait_for_free_request(struct cifsSesInfo *ses, const int long_op)
300SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
301 struct kvec *iov, int n_vec, int * pRespBufType /* ret */,
302 const int long_op)
303{ 310{
304 int rc = 0;
305 unsigned int receive_len;
306 unsigned long timeout;
307 struct mid_q_entry *midQ;
308 struct smb_hdr *in_buf = iov[0].iov_base;
309
310 *pRespBufType = CIFS_NO_BUFFER; /* no response buf yet */
311
312 if ((ses == NULL) || (ses->server == NULL)) {
313 cifs_small_buf_release(in_buf);
314 cERROR(1,("Null session"));
315 return -EIO;
316 }
317
318 if(ses->server->tcpStatus == CifsExiting) {
319 cifs_small_buf_release(in_buf);
320 return -ENOENT;
321 }
322
323 /* Ensure that we do not send more than 50 overlapping requests
324 to the same server. We may make this configurable later or
325 use ses->maxReq */
326 if(long_op == -1) { 311 if(long_op == -1) {
327 /* oplock breaks must not be held up */ 312 /* oplock breaks must not be held up */
328 atomic_inc(&ses->server->inFlight); 313 atomic_inc(&ses->server->inFlight);
@@ -345,53 +330,140 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
345 } else { 330 } else {
346 if(ses->server->tcpStatus == CifsExiting) { 331 if(ses->server->tcpStatus == CifsExiting) {
347 spin_unlock(&GlobalMid_Lock); 332 spin_unlock(&GlobalMid_Lock);
348 cifs_small_buf_release(in_buf);
349 return -ENOENT; 333 return -ENOENT;
350 } 334 }
351 335
352 /* can not count locking commands against total since 336 /* can not count locking commands against total since
353 they are allowed to block on server */ 337 they are allowed to block on server */
354 338
355 if(long_op < 3) {
356 /* update # of requests on the wire to server */ 339 /* update # of requests on the wire to server */
340 if (long_op < 3)
357 atomic_inc(&ses->server->inFlight); 341 atomic_inc(&ses->server->inFlight);
358 }
359 spin_unlock(&GlobalMid_Lock); 342 spin_unlock(&GlobalMid_Lock);
360 break; 343 break;
361 } 344 }
362 } 345 }
363 } 346 }
364 /* make sure that we sign in the same order that we send on this socket 347 return 0;
365 and avoid races inside tcp sendmsg code that could cause corruption 348}
366 of smb data */
367
368 down(&ses->server->tcpSem);
369 349
350static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf,
351 struct mid_q_entry **ppmidQ)
352{
370 if (ses->server->tcpStatus == CifsExiting) { 353 if (ses->server->tcpStatus == CifsExiting) {
371 rc = -ENOENT; 354 return -ENOENT;
372 goto out_unlock2;
373 } else if (ses->server->tcpStatus == CifsNeedReconnect) { 355 } else if (ses->server->tcpStatus == CifsNeedReconnect) {
374 cFYI(1,("tcp session dead - return to caller to retry")); 356 cFYI(1,("tcp session dead - return to caller to retry"));
375 rc = -EAGAIN; 357 return -EAGAIN;
376 goto out_unlock2;
377 } else if (ses->status != CifsGood) { 358 } else if (ses->status != CifsGood) {
378 /* check if SMB session is bad because we are setting it up */ 359 /* check if SMB session is bad because we are setting it up */
379 if((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && 360 if((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) &&
380 (in_buf->Command != SMB_COM_NEGOTIATE)) { 361 (in_buf->Command != SMB_COM_NEGOTIATE)) {
381 rc = -EAGAIN; 362 return -EAGAIN;
382 goto out_unlock2;
383 } /* else ok - we are setting up session */ 363 } /* else ok - we are setting up session */
384 } 364 }
385 midQ = AllocMidQEntry(in_buf, ses); 365 *ppmidQ = AllocMidQEntry(in_buf, ses);
386 if (midQ == NULL) { 366 if (*ppmidQ == NULL) {
367 return -ENOMEM;
368 }
369 return 0;
370}
371
372static int wait_for_response(struct cifsSesInfo *ses,
373 struct mid_q_entry *midQ,
374 unsigned long timeout,
375 unsigned long time_to_wait)
376{
377 unsigned long curr_timeout;
378
379 for (;;) {
380 curr_timeout = timeout + jiffies;
381 wait_event(ses->server->response_q,
382 (!(midQ->midState == MID_REQUEST_SUBMITTED)) ||
383 time_after(jiffies, curr_timeout) ||
384 ((ses->server->tcpStatus != CifsGood) &&
385 (ses->server->tcpStatus != CifsNew)));
386
387 if (time_after(jiffies, curr_timeout) &&
388 (midQ->midState == MID_REQUEST_SUBMITTED) &&
389 ((ses->server->tcpStatus == CifsGood) ||
390 (ses->server->tcpStatus == CifsNew))) {
391
392 unsigned long lrt;
393
394 /* We timed out. Is the server still
395 sending replies ? */
396 spin_lock(&GlobalMid_Lock);
397 lrt = ses->server->lstrp;
398 spin_unlock(&GlobalMid_Lock);
399
400 /* Calculate time_to_wait past last receive time.
401 Although we prefer not to time out if the
402 server is still responding - we will time
403 out if the server takes more than 15 (or 45
404 or 180) seconds to respond to this request
405 and has not responded to any request from
406 other threads on the client within 10 seconds */
407 lrt += time_to_wait;
408 if (time_after(jiffies, lrt)) {
409 /* No replies for time_to_wait. */
410 cERROR(1,("server not responding"));
411 return -1;
412 }
413 } else {
414 return 0;
415 }
416 }
417}
418
419int
420SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
421 struct kvec *iov, int n_vec, int * pRespBufType /* ret */,
422 const int long_op)
423{
424 int rc = 0;
425 unsigned int receive_len;
426 unsigned long timeout;
427 struct mid_q_entry *midQ;
428 struct smb_hdr *in_buf = iov[0].iov_base;
429
430 *pRespBufType = CIFS_NO_BUFFER; /* no response buf yet */
431
432 if ((ses == NULL) || (ses->server == NULL)) {
433 cifs_small_buf_release(in_buf);
434 cERROR(1,("Null session"));
435 return -EIO;
436 }
437
438 if(ses->server->tcpStatus == CifsExiting) {
439 cifs_small_buf_release(in_buf);
440 return -ENOENT;
441 }
442
443 /* Ensure that we do not send more than 50 overlapping requests
444 to the same server. We may make this configurable later or
445 use ses->maxReq */
446
447 rc = wait_for_free_request(ses, long_op);
448 if (rc) {
449 cifs_small_buf_release(in_buf);
450 return rc;
451 }
452
453 /* make sure that we sign in the same order that we send on this socket
454 and avoid races inside tcp sendmsg code that could cause corruption
455 of smb data */
456
457 down(&ses->server->tcpSem);
458
459 rc = allocate_mid(ses, in_buf, &midQ);
460 if (rc) {
387 up(&ses->server->tcpSem); 461 up(&ses->server->tcpSem);
388 cifs_small_buf_release(in_buf); 462 cifs_small_buf_release(in_buf);
389 /* If not lock req, update # of requests on wire to server */ 463 /* Update # of requests on wire to server */
390 if(long_op < 3) { 464 atomic_dec(&ses->server->inFlight);
391 atomic_dec(&ses->server->inFlight); 465 wake_up(&ses->server->request_q);
392 wake_up(&ses->server->request_q); 466 return rc;
393 }
394 return -ENOMEM;
395 } 467 }
396 468
397 rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number); 469 rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number);
@@ -406,32 +478,23 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
406 atomic_dec(&ses->server->inSend); 478 atomic_dec(&ses->server->inSend);
407 midQ->when_sent = jiffies; 479 midQ->when_sent = jiffies;
408#endif 480#endif
409 if(rc < 0) { 481
410 DeleteMidQEntry(midQ); 482 up(&ses->server->tcpSem);
411 up(&ses->server->tcpSem); 483 cifs_small_buf_release(in_buf);
412 cifs_small_buf_release(in_buf); 484
413 /* If not lock req, update # of requests on wire to server */ 485 if(rc < 0)
414 if(long_op < 3) { 486 goto out;
415 atomic_dec(&ses->server->inFlight);
416 wake_up(&ses->server->request_q);
417 }
418 return rc;
419 } else {
420 up(&ses->server->tcpSem);
421 cifs_small_buf_release(in_buf);
422 }
423 487
424 if (long_op == -1) 488 if (long_op == -1)
425 goto cifs_no_response_exit2; 489 goto out;
426 else if (long_op == 2) /* writes past end of file can take loong time */ 490 else if (long_op == 2) /* writes past end of file can take loong time */
427 timeout = 180 * HZ; 491 timeout = 180 * HZ;
428 else if (long_op == 1) 492 else if (long_op == 1)
429 timeout = 45 * HZ; /* should be greater than 493 timeout = 45 * HZ; /* should be greater than
430 servers oplock break timeout (about 43 seconds) */ 494 servers oplock break timeout (about 43 seconds) */
431 else if (long_op > 2) { 495 else
432 timeout = MAX_SCHEDULE_TIMEOUT;
433 } else
434 timeout = 15 * HZ; 496 timeout = 15 * HZ;
497
435 /* wait for 15 seconds or until woken up due to response arriving or 498 /* wait for 15 seconds or until woken up due to response arriving or
436 due to last connection to this server being unmounted */ 499 due to last connection to this server being unmounted */
437 if (signal_pending(current)) { 500 if (signal_pending(current)) {
@@ -441,19 +504,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
441 } 504 }
442 505
443 /* No user interrupts in wait - wreaks havoc with performance */ 506 /* No user interrupts in wait - wreaks havoc with performance */
444 if(timeout != MAX_SCHEDULE_TIMEOUT) { 507 wait_for_response(ses, midQ, timeout, 10 * HZ);
445 timeout += jiffies;
446 wait_event(ses->server->response_q,
447 (!(midQ->midState & MID_REQUEST_SUBMITTED)) ||
448 time_after(jiffies, timeout) ||
449 ((ses->server->tcpStatus != CifsGood) &&
450 (ses->server->tcpStatus != CifsNew)));
451 } else {
452 wait_event(ses->server->response_q,
453 (!(midQ->midState & MID_REQUEST_SUBMITTED)) ||
454 ((ses->server->tcpStatus != CifsGood) &&
455 (ses->server->tcpStatus != CifsNew)));
456 }
457 508
458 spin_lock(&GlobalMid_Lock); 509 spin_lock(&GlobalMid_Lock);
459 if (midQ->resp_buf) { 510 if (midQ->resp_buf) {
@@ -481,11 +532,9 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
481 } 532 }
482 spin_unlock(&GlobalMid_Lock); 533 spin_unlock(&GlobalMid_Lock);
483 DeleteMidQEntry(midQ); 534 DeleteMidQEntry(midQ);
484 /* If not lock req, update # of requests on wire to server */ 535 /* Update # of requests on wire to server */
485 if(long_op < 3) { 536 atomic_dec(&ses->server->inFlight);
486 atomic_dec(&ses->server->inFlight); 537 wake_up(&ses->server->request_q);
487 wake_up(&ses->server->request_q);
488 }
489 return rc; 538 return rc;
490 } 539 }
491 540
@@ -536,24 +585,12 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
536 cFYI(1,("Bad MID state?")); 585 cFYI(1,("Bad MID state?"));
537 } 586 }
538 } 587 }
539cifs_no_response_exit2:
540 DeleteMidQEntry(midQ);
541
542 if(long_op < 3) {
543 atomic_dec(&ses->server->inFlight);
544 wake_up(&ses->server->request_q);
545 }
546 588
547 return rc; 589out:
548 590
549out_unlock2: 591 DeleteMidQEntry(midQ);
550 up(&ses->server->tcpSem); 592 atomic_dec(&ses->server->inFlight);
551 cifs_small_buf_release(in_buf); 593 wake_up(&ses->server->request_q);
552 /* If not lock req, update # of requests on wire to server */
553 if(long_op < 3) {
554 atomic_dec(&ses->server->inFlight);
555 wake_up(&ses->server->request_q);
556 }
557 594
558 return rc; 595 return rc;
559} 596}
@@ -583,85 +620,34 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
583 /* Ensure that we do not send more than 50 overlapping requests 620 /* Ensure that we do not send more than 50 overlapping requests
584 to the same server. We may make this configurable later or 621 to the same server. We may make this configurable later or
585 use ses->maxReq */ 622 use ses->maxReq */
586 if(long_op == -1) {
587 /* oplock breaks must not be held up */
588 atomic_inc(&ses->server->inFlight);
589 } else {
590 spin_lock(&GlobalMid_Lock);
591 while(1) {
592 if(atomic_read(&ses->server->inFlight) >=
593 cifs_max_pending){
594 spin_unlock(&GlobalMid_Lock);
595#ifdef CONFIG_CIFS_STATS2
596 atomic_inc(&ses->server->num_waiters);
597#endif
598 wait_event(ses->server->request_q,
599 atomic_read(&ses->server->inFlight)
600 < cifs_max_pending);
601#ifdef CONFIG_CIFS_STATS2
602 atomic_dec(&ses->server->num_waiters);
603#endif
604 spin_lock(&GlobalMid_Lock);
605 } else {
606 if(ses->server->tcpStatus == CifsExiting) {
607 spin_unlock(&GlobalMid_Lock);
608 return -ENOENT;
609 }
610 623
611 /* can not count locking commands against total since 624 rc = wait_for_free_request(ses, long_op);
612 they are allowed to block on server */ 625 if (rc)
613 626 return rc;
614 if(long_op < 3) { 627
615 /* update # of requests on the wire to server */
616 atomic_inc(&ses->server->inFlight);
617 }
618 spin_unlock(&GlobalMid_Lock);
619 break;
620 }
621 }
622 }
623 /* make sure that we sign in the same order that we send on this socket 628 /* make sure that we sign in the same order that we send on this socket
624 and avoid races inside tcp sendmsg code that could cause corruption 629 and avoid races inside tcp sendmsg code that could cause corruption
625 of smb data */ 630 of smb data */
626 631
627 down(&ses->server->tcpSem); 632 down(&ses->server->tcpSem);
628 633
629 if (ses->server->tcpStatus == CifsExiting) { 634 rc = allocate_mid(ses, in_buf, &midQ);
630 rc = -ENOENT; 635 if (rc) {
631 goto out_unlock;
632 } else if (ses->server->tcpStatus == CifsNeedReconnect) {
633 cFYI(1,("tcp session dead - return to caller to retry"));
634 rc = -EAGAIN;
635 goto out_unlock;
636 } else if (ses->status != CifsGood) {
637 /* check if SMB session is bad because we are setting it up */
638 if((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) &&
639 (in_buf->Command != SMB_COM_NEGOTIATE)) {
640 rc = -EAGAIN;
641 goto out_unlock;
642 } /* else ok - we are setting up session */
643 }
644 midQ = AllocMidQEntry(in_buf, ses);
645 if (midQ == NULL) {
646 up(&ses->server->tcpSem); 636 up(&ses->server->tcpSem);
647 /* If not lock req, update # of requests on wire to server */ 637 /* Update # of requests on wire to server */
648 if(long_op < 3) { 638 atomic_dec(&ses->server->inFlight);
649 atomic_dec(&ses->server->inFlight); 639 wake_up(&ses->server->request_q);
650 wake_up(&ses->server->request_q); 640 return rc;
651 }
652 return -ENOMEM;
653 } 641 }
654 642
655 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 643 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
656 up(&ses->server->tcpSem);
657 cERROR(1, ("Illegal length, greater than maximum frame, %d", 644 cERROR(1, ("Illegal length, greater than maximum frame, %d",
658 in_buf->smb_buf_length)); 645 in_buf->smb_buf_length));
659 DeleteMidQEntry(midQ); 646 DeleteMidQEntry(midQ);
660 /* If not lock req, update # of requests on wire to server */ 647 up(&ses->server->tcpSem);
661 if(long_op < 3) { 648 /* Update # of requests on wire to server */
662 atomic_dec(&ses->server->inFlight); 649 atomic_dec(&ses->server->inFlight);
663 wake_up(&ses->server->request_q); 650 wake_up(&ses->server->request_q);
664 }
665 return -EIO; 651 return -EIO;
666 } 652 }
667 653
@@ -677,27 +663,19 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
677 atomic_dec(&ses->server->inSend); 663 atomic_dec(&ses->server->inSend);
678 midQ->when_sent = jiffies; 664 midQ->when_sent = jiffies;
679#endif 665#endif
680 if(rc < 0) { 666 up(&ses->server->tcpSem);
681 DeleteMidQEntry(midQ); 667
682 up(&ses->server->tcpSem); 668 if(rc < 0)
683 /* If not lock req, update # of requests on wire to server */ 669 goto out;
684 if(long_op < 3) { 670
685 atomic_dec(&ses->server->inFlight);
686 wake_up(&ses->server->request_q);
687 }
688 return rc;
689 } else
690 up(&ses->server->tcpSem);
691 if (long_op == -1) 671 if (long_op == -1)
692 goto cifs_no_response_exit; 672 goto out;
693 else if (long_op == 2) /* writes past end of file can take loong time */ 673 else if (long_op == 2) /* writes past end of file can take loong time */
694 timeout = 180 * HZ; 674 timeout = 180 * HZ;
695 else if (long_op == 1) 675 else if (long_op == 1)
696 timeout = 45 * HZ; /* should be greater than 676 timeout = 45 * HZ; /* should be greater than
697 servers oplock break timeout (about 43 seconds) */ 677 servers oplock break timeout (about 43 seconds) */
698 else if (long_op > 2) { 678 else
699 timeout = MAX_SCHEDULE_TIMEOUT;
700 } else
701 timeout = 15 * HZ; 679 timeout = 15 * HZ;
702 /* wait for 15 seconds or until woken up due to response arriving or 680 /* wait for 15 seconds or until woken up due to response arriving or
703 due to last connection to this server being unmounted */ 681 due to last connection to this server being unmounted */
@@ -708,19 +686,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
708 } 686 }
709 687
710 /* No user interrupts in wait - wreaks havoc with performance */ 688 /* No user interrupts in wait - wreaks havoc with performance */
711 if(timeout != MAX_SCHEDULE_TIMEOUT) { 689 wait_for_response(ses, midQ, timeout, 10 * HZ);
712 timeout += jiffies;
713 wait_event(ses->server->response_q,
714 (!(midQ->midState & MID_REQUEST_SUBMITTED)) ||
715 time_after(jiffies, timeout) ||
716 ((ses->server->tcpStatus != CifsGood) &&
717 (ses->server->tcpStatus != CifsNew)));
718 } else {
719 wait_event(ses->server->response_q,
720 (!(midQ->midState & MID_REQUEST_SUBMITTED)) ||
721 ((ses->server->tcpStatus != CifsGood) &&
722 (ses->server->tcpStatus != CifsNew)));
723 }
724 690
725 spin_lock(&GlobalMid_Lock); 691 spin_lock(&GlobalMid_Lock);
726 if (midQ->resp_buf) { 692 if (midQ->resp_buf) {
@@ -748,11 +714,9 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
748 } 714 }
749 spin_unlock(&GlobalMid_Lock); 715 spin_unlock(&GlobalMid_Lock);
750 DeleteMidQEntry(midQ); 716 DeleteMidQEntry(midQ);
751 /* If not lock req, update # of requests on wire to server */ 717 /* Update # of requests on wire to server */
752 if(long_op < 3) { 718 atomic_dec(&ses->server->inFlight);
753 atomic_dec(&ses->server->inFlight); 719 wake_up(&ses->server->request_q);
754 wake_up(&ses->server->request_q);
755 }
756 return rc; 720 return rc;
757 } 721 }
758 722
@@ -799,23 +763,253 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
799 cERROR(1,("Bad MID state?")); 763 cERROR(1,("Bad MID state?"));
800 } 764 }
801 } 765 }
802cifs_no_response_exit: 766
767out:
768
803 DeleteMidQEntry(midQ); 769 DeleteMidQEntry(midQ);
770 atomic_dec(&ses->server->inFlight);
771 wake_up(&ses->server->request_q);
804 772
805 if(long_op < 3) { 773 return rc;
806 atomic_dec(&ses->server->inFlight); 774}
807 wake_up(&ses->server->request_q); 775
808 } 776/* Send an NT_CANCEL SMB to cause the POSIX blocking lock to return. */
777
778static int
779send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf,
780 struct mid_q_entry *midQ)
781{
782 int rc = 0;
783 struct cifsSesInfo *ses = tcon->ses;
784 __u16 mid = in_buf->Mid;
809 785
786 header_assemble(in_buf, SMB_COM_NT_CANCEL, tcon, 0);
787 in_buf->Mid = mid;
788 down(&ses->server->tcpSem);
789 rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number);
790 if (rc) {
791 up(&ses->server->tcpSem);
792 return rc;
793 }
794 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
795 (struct sockaddr *) &(ses->server->addr.sockAddr));
796 up(&ses->server->tcpSem);
810 return rc; 797 return rc;
798}
799
800/* We send a LOCKINGX_CANCEL_LOCK to cause the Windows
801 blocking lock to return. */
802
803static int
804send_lock_cancel(const unsigned int xid, struct cifsTconInfo *tcon,
805 struct smb_hdr *in_buf,
806 struct smb_hdr *out_buf)
807{
808 int bytes_returned;
809 struct cifsSesInfo *ses = tcon->ses;
810 LOCK_REQ *pSMB = (LOCK_REQ *)in_buf;
811
812 /* We just modify the current in_buf to change
813 the type of lock from LOCKING_ANDX_SHARED_LOCK
814 or LOCKING_ANDX_EXCLUSIVE_LOCK to
815 LOCKING_ANDX_CANCEL_LOCK. */
816
817 pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES;
818 pSMB->Timeout = 0;
819 pSMB->hdr.Mid = GetNextMid(ses->server);
820
821 return SendReceive(xid, ses, in_buf, out_buf,
822 &bytes_returned, 0);
823}
811 824
812out_unlock: 825int
826SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
827 struct smb_hdr *in_buf, struct smb_hdr *out_buf,
828 int *pbytes_returned)
829{
830 int rc = 0;
831 int rstart = 0;
832 unsigned int receive_len;
833 struct mid_q_entry *midQ;
834 struct cifsSesInfo *ses;
835
836 if (tcon == NULL || tcon->ses == NULL) {
837 cERROR(1,("Null smb session"));
838 return -EIO;
839 }
840 ses = tcon->ses;
841
842 if(ses->server == NULL) {
843 cERROR(1,("Null tcp session"));
844 return -EIO;
845 }
846
847 if(ses->server->tcpStatus == CifsExiting)
848 return -ENOENT;
849
850 /* Ensure that we do not send more than 50 overlapping requests
851 to the same server. We may make this configurable later or
852 use ses->maxReq */
853
854 rc = wait_for_free_request(ses, 3);
855 if (rc)
856 return rc;
857
858 /* make sure that we sign in the same order that we send on this socket
859 and avoid races inside tcp sendmsg code that could cause corruption
860 of smb data */
861
862 down(&ses->server->tcpSem);
863
864 rc = allocate_mid(ses, in_buf, &midQ);
865 if (rc) {
866 up(&ses->server->tcpSem);
867 return rc;
868 }
869
870 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
871 up(&ses->server->tcpSem);
872 cERROR(1, ("Illegal length, greater than maximum frame, %d",
873 in_buf->smb_buf_length));
874 DeleteMidQEntry(midQ);
875 return -EIO;
876 }
877
878 rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number);
879
880 midQ->midState = MID_REQUEST_SUBMITTED;
881#ifdef CONFIG_CIFS_STATS2
882 atomic_inc(&ses->server->inSend);
883#endif
884 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
885 (struct sockaddr *) &(ses->server->addr.sockAddr));
886#ifdef CONFIG_CIFS_STATS2
887 atomic_dec(&ses->server->inSend);
888 midQ->when_sent = jiffies;
889#endif
813 up(&ses->server->tcpSem); 890 up(&ses->server->tcpSem);
814 /* If not lock req, update # of requests on wire to server */ 891
815 if(long_op < 3) { 892 if(rc < 0) {
816 atomic_dec(&ses->server->inFlight); 893 DeleteMidQEntry(midQ);
817 wake_up(&ses->server->request_q); 894 return rc;
895 }
896
897 /* Wait for a reply - allow signals to interrupt. */
898 rc = wait_event_interruptible(ses->server->response_q,
899 (!(midQ->midState == MID_REQUEST_SUBMITTED)) ||
900 ((ses->server->tcpStatus != CifsGood) &&
901 (ses->server->tcpStatus != CifsNew)));
902
903 /* Were we interrupted by a signal ? */
904 if ((rc == -ERESTARTSYS) &&
905 (midQ->midState == MID_REQUEST_SUBMITTED) &&
906 ((ses->server->tcpStatus == CifsGood) ||
907 (ses->server->tcpStatus == CifsNew))) {
908
909 if (in_buf->Command == SMB_COM_TRANSACTION2) {
910 /* POSIX lock. We send a NT_CANCEL SMB to cause the
911 blocking lock to return. */
912
913 rc = send_nt_cancel(tcon, in_buf, midQ);
914 if (rc) {
915 DeleteMidQEntry(midQ);
916 return rc;
917 }
918 } else {
919 /* Windows lock. We send a LOCKINGX_CANCEL_LOCK
920 to cause the blocking lock to return. */
921
922 rc = send_lock_cancel(xid, tcon, in_buf, out_buf);
923
924 /* If we get -ENOLCK back the lock may have
925 already been removed. Don't exit in this case. */
926 if (rc && rc != -ENOLCK) {
927 DeleteMidQEntry(midQ);
928 return rc;
929 }
930 }
931
932 /* Wait 5 seconds for the response. */
933 if (wait_for_response(ses, midQ, 5 * HZ, 5 * HZ)==0) {
934 /* We got the response - restart system call. */
935 rstart = 1;
936 }
937 }
938
939 spin_lock(&GlobalMid_Lock);
940 if (midQ->resp_buf) {
941 spin_unlock(&GlobalMid_Lock);
942 receive_len = midQ->resp_buf->smb_buf_length;
943 } else {
944 cERROR(1,("No response for cmd %d mid %d",
945 midQ->command, midQ->mid));
946 if(midQ->midState == MID_REQUEST_SUBMITTED) {
947 if(ses->server->tcpStatus == CifsExiting)
948 rc = -EHOSTDOWN;
949 else {
950 ses->server->tcpStatus = CifsNeedReconnect;
951 midQ->midState = MID_RETRY_NEEDED;
952 }
953 }
954
955 if (rc != -EHOSTDOWN) {
956 if(midQ->midState == MID_RETRY_NEEDED) {
957 rc = -EAGAIN;
958 cFYI(1,("marking request for retry"));
959 } else {
960 rc = -EIO;
961 }
962 }
963 spin_unlock(&GlobalMid_Lock);
964 DeleteMidQEntry(midQ);
965 return rc;
818 } 966 }
967
968 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
969 cERROR(1, ("Frame too large received. Length: %d Xid: %d",
970 receive_len, xid));
971 rc = -EIO;
972 } else { /* rcvd frame is ok */
973
974 if (midQ->resp_buf && out_buf
975 && (midQ->midState == MID_RESPONSE_RECEIVED)) {
976 out_buf->smb_buf_length = receive_len;
977 memcpy((char *)out_buf + 4,
978 (char *)midQ->resp_buf + 4,
979 receive_len);
980
981 dump_smb(out_buf, 92);
982 /* convert the length into a more usable form */
983 if((receive_len > 24) &&
984 (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
985 SECMODE_SIGN_ENABLED))) {
986 rc = cifs_verify_signature(out_buf,
987 ses->server->mac_signing_key,
988 midQ->sequence_number+1);
989 if(rc) {
990 cERROR(1,("Unexpected SMB signature"));
991 /* BB FIXME add code to kill session */
992 }
993 }
994
995 *pbytes_returned = out_buf->smb_buf_length;
996
997 /* BB special case reconnect tid and uid here? */
998 rc = map_smb_to_linux_error(out_buf);
819 999
1000 /* convert ByteCount if necessary */
1001 if (receive_len >=
1002 sizeof (struct smb_hdr) -
1003 4 /* do not count RFC1001 header */ +
1004 (2 * out_buf->WordCount) + 2 /* bcc */ )
1005 BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
1006 } else {
1007 rc = -EIO;
1008 cERROR(1,("Bad MID state?"));
1009 }
1010 }
1011 DeleteMidQEntry(midQ);
1012 if (rstart && rc == -EACCES)
1013 return -ERESTARTSYS;
820 return rc; 1014 return rc;
821} 1015}
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 7754d641775e..067648b7179b 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -330,11 +330,15 @@ ssize_t cifs_listxattr(struct dentry * direntry, char * data, size_t buf_size)
330 sb = direntry->d_inode->i_sb; 330 sb = direntry->d_inode->i_sb;
331 if(sb == NULL) 331 if(sb == NULL)
332 return -EIO; 332 return -EIO;
333 xid = GetXid();
334 333
335 cifs_sb = CIFS_SB(sb); 334 cifs_sb = CIFS_SB(sb);
336 pTcon = cifs_sb->tcon; 335 pTcon = cifs_sb->tcon;
337 336
337 if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
338 return -EOPNOTSUPP;
339
340 xid = GetXid();
341
338 full_path = build_path_from_dentry(direntry); 342 full_path = build_path_from_dentry(direntry);
339 if(full_path == NULL) { 343 if(full_path == NULL) {
340 FreeXid(xid); 344 FreeXid(xid);
diff --git a/fs/coda/file.c b/fs/coda/file.c
index cc66c681bd11..dbfbcfa5b3c0 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -136,10 +136,8 @@ int coda_open(struct inode *coda_inode, struct file *coda_file)
136 coda_vfs_stat.open++; 136 coda_vfs_stat.open++;
137 137
138 cfi = kmalloc(sizeof(struct coda_file_info), GFP_KERNEL); 138 cfi = kmalloc(sizeof(struct coda_file_info), GFP_KERNEL);
139 if (!cfi) { 139 if (!cfi)
140 unlock_kernel();
141 return -ENOMEM; 140 return -ENOMEM;
142 }
143 141
144 lock_kernel(); 142 lock_kernel();
145 143
diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c
index e249cf733a6b..1d30d2ff440f 100644
--- a/fs/efs/symlink.c
+++ b/fs/efs/symlink.c
@@ -22,7 +22,7 @@ static int efs_symlink_readpage(struct file *file, struct page *page)
22 22
23 err = -ENAMETOOLONG; 23 err = -ENAMETOOLONG;
24 if (size > 2 * EFS_BLOCKSIZE) 24 if (size > 2 * EFS_BLOCKSIZE)
25 goto fail; 25 goto fail_notlocked;
26 26
27 lock_kernel(); 27 lock_kernel();
28 /* read first 512 bytes of link target */ 28 /* read first 512 bytes of link target */
@@ -47,6 +47,7 @@ static int efs_symlink_readpage(struct file *file, struct page *page)
47 return 0; 47 return 0;
48fail: 48fail:
49 unlock_kernel(); 49 unlock_kernel();
50fail_notlocked:
50 SetPageError(page); 51 SetPageError(page);
51 kunmap(page); 52 kunmap(page);
52 unlock_page(page); 53 unlock_page(page);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 19ffb043abbc..3a3567433b92 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1168,7 +1168,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
1168eexit_1: 1168eexit_1:
1169 1169
1170 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n", 1170 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n",
1171 current, ep, epi->file, error)); 1171 current, ep, epi->ffd.file, error));
1172 1172
1173 return error; 1173 return error;
1174} 1174}
@@ -1236,7 +1236,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
1236 struct eventpoll *ep = epi->ep; 1236 struct eventpoll *ep = epi->ep;
1237 1237
1238 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", 1238 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
1239 current, epi->file, epi, ep)); 1239 current, epi->ffd.file, epi, ep));
1240 1240
1241 write_lock_irqsave(&ep->lock, flags); 1241 write_lock_irqsave(&ep->lock, flags);
1242 1242
diff --git a/fs/exec.c b/fs/exec.c
index 8344ba73a2a6..54135df2a966 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -486,8 +486,6 @@ struct file *open_exec(const char *name)
486 if (!(nd.mnt->mnt_flags & MNT_NOEXEC) && 486 if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
487 S_ISREG(inode->i_mode)) { 487 S_ISREG(inode->i_mode)) {
488 int err = vfs_permission(&nd, MAY_EXEC); 488 int err = vfs_permission(&nd, MAY_EXEC);
489 if (!err && !(inode->i_mode & 0111))
490 err = -EACCES;
491 file = ERR_PTR(err); 489 file = ERR_PTR(err);
492 if (!err) { 490 if (!err) {
493 file = nameidata_to_filp(&nd, O_RDONLY); 491 file = nameidata_to_filp(&nd, O_RDONLY);
@@ -753,7 +751,7 @@ no_thread_group:
753 751
754 write_lock_irq(&tasklist_lock); 752 write_lock_irq(&tasklist_lock);
755 spin_lock(&oldsighand->siglock); 753 spin_lock(&oldsighand->siglock);
756 spin_lock(&newsighand->siglock); 754 spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING);
757 755
758 rcu_assign_pointer(current->sighand, newsighand); 756 rcu_assign_pointer(current->sighand, newsighand);
759 recalc_sigpending(); 757 recalc_sigpending();
@@ -922,12 +920,6 @@ int prepare_binprm(struct linux_binprm *bprm)
922 int retval; 920 int retval;
923 921
924 mode = inode->i_mode; 922 mode = inode->i_mode;
925 /*
926 * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
927 * generic_permission lets a non-executable through
928 */
929 if (!(mode & 0111)) /* with at least _one_ execute bit set */
930 return -EACCES;
931 if (bprm->file->f_op == NULL) 923 if (bprm->file->f_op == NULL)
932 return -EACCES; 924 return -EACCES;
933 925
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index f2702cda9779..681dea8f9532 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -775,7 +775,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
775 if (EXT2_INODE_SIZE(sb) == 0) 775 if (EXT2_INODE_SIZE(sb) == 0)
776 goto cantfind_ext2; 776 goto cantfind_ext2;
777 sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb); 777 sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb);
778 if (sbi->s_inodes_per_block == 0) 778 if (sbi->s_inodes_per_block == 0 || sbi->s_inodes_per_group == 0)
779 goto cantfind_ext2; 779 goto cantfind_ext2;
780 sbi->s_itb_per_group = sbi->s_inodes_per_group / 780 sbi->s_itb_per_group = sbi->s_inodes_per_group /
781 sbi->s_inodes_per_block; 781 sbi->s_inodes_per_block;
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 92d50b53a933..0d1e6279cbfd 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -62,9 +62,6 @@ extern int ext3_permission (struct inode *, int, struct nameidata *);
62extern int ext3_acl_chmod (struct inode *); 62extern int ext3_acl_chmod (struct inode *);
63extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); 63extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
64 64
65extern int init_ext3_acl(void);
66extern void exit_ext3_acl(void);
67
68#else /* CONFIG_EXT3_FS_POSIX_ACL */ 65#else /* CONFIG_EXT3_FS_POSIX_ACL */
69#include <linux/sched.h> 66#include <linux/sched.h>
70#define ext3_permission NULL 67#define ext3_permission NULL
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index a504a40d6d29..063d994bda0b 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1269,12 +1269,12 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
1269 goal = le32_to_cpu(es->s_first_data_block); 1269 goal = le32_to_cpu(es->s_first_data_block);
1270 group_no = (goal - le32_to_cpu(es->s_first_data_block)) / 1270 group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
1271 EXT3_BLOCKS_PER_GROUP(sb); 1271 EXT3_BLOCKS_PER_GROUP(sb);
1272 goal_group = group_no;
1273retry_alloc:
1272 gdp = ext3_get_group_desc(sb, group_no, &gdp_bh); 1274 gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
1273 if (!gdp) 1275 if (!gdp)
1274 goto io_error; 1276 goto io_error;
1275 1277
1276 goal_group = group_no;
1277retry:
1278 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 1278 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1279 /* 1279 /*
1280 * if there is not enough free blocks to make a new resevation 1280 * if there is not enough free blocks to make a new resevation
@@ -1349,7 +1349,7 @@ retry:
1349 if (my_rsv) { 1349 if (my_rsv) {
1350 my_rsv = NULL; 1350 my_rsv = NULL;
1351 group_no = goal_group; 1351 group_no = goal_group;
1352 goto retry; 1352 goto retry_alloc;
1353 } 1353 }
1354 /* No space left on the device */ 1354 /* No space left on the device */
1355 *errp = -ENOSPC; 1355 *errp = -ENOSPC;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index f804d5e9d60c..c5ee9f0691e3 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1158,7 +1158,7 @@ retry:
1158 ret = PTR_ERR(handle); 1158 ret = PTR_ERR(handle);
1159 goto out; 1159 goto out;
1160 } 1160 }
1161 if (test_opt(inode->i_sb, NOBH)) 1161 if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
1162 ret = nobh_prepare_write(page, from, to, ext3_get_block); 1162 ret = nobh_prepare_write(page, from, to, ext3_get_block);
1163 else 1163 else
1164 ret = block_prepare_write(page, from, to, ext3_get_block); 1164 ret = block_prepare_write(page, from, to, ext3_get_block);
@@ -1244,7 +1244,7 @@ static int ext3_writeback_commit_write(struct file *file, struct page *page,
1244 if (new_i_size > EXT3_I(inode)->i_disksize) 1244 if (new_i_size > EXT3_I(inode)->i_disksize)
1245 EXT3_I(inode)->i_disksize = new_i_size; 1245 EXT3_I(inode)->i_disksize = new_i_size;
1246 1246
1247 if (test_opt(inode->i_sb, NOBH)) 1247 if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
1248 ret = nobh_commit_write(file, page, from, to); 1248 ret = nobh_commit_write(file, page, from, to);
1249 else 1249 else
1250 ret = generic_commit_write(file, page, from, to); 1250 ret = generic_commit_write(file, page, from, to);
@@ -1494,7 +1494,7 @@ static int ext3_writeback_writepage(struct page *page,
1494 goto out_fail; 1494 goto out_fail;
1495 } 1495 }
1496 1496
1497 if (test_opt(inode->i_sb, NOBH)) 1497 if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
1498 ret = nobh_writepage(page, ext3_get_block, wbc); 1498 ret = nobh_writepage(page, ext3_get_block, wbc);
1499 else 1499 else
1500 ret = block_write_full_page(page, ext3_get_block, wbc); 1500 ret = block_write_full_page(page, ext3_get_block, wbc);
@@ -2402,14 +2402,15 @@ static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
2402 struct buffer_head *bh; 2402 struct buffer_head *bh;
2403 struct ext3_group_desc * gdp; 2403 struct ext3_group_desc * gdp;
2404 2404
2405 2405 if (!ext3_valid_inum(sb, ino)) {
2406 if ((ino != EXT3_ROOT_INO && ino != EXT3_JOURNAL_INO && 2406 /*
2407 ino != EXT3_RESIZE_INO && ino < EXT3_FIRST_INO(sb)) || 2407 * This error is already checked for in namei.c unless we are
2408 ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) { 2408 * looking at an NFS filehandle, in which case no error
2409 ext3_error(sb, "ext3_get_inode_block", 2409 * report is needed
2410 "bad inode number: %lu", ino); 2410 */
2411 return 0; 2411 return 0;
2412 } 2412 }
2413
2413 block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); 2414 block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
2414 if (block_group >= EXT3_SB(sb)->s_groups_count) { 2415 if (block_group >= EXT3_SB(sb)->s_groups_count) {
2415 ext3_error(sb,"ext3_get_inode_block","group >= groups count"); 2416 ext3_error(sb,"ext3_get_inode_block","group >= groups count");
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index d9176dba3698..2aa7101b27cd 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1000,7 +1000,12 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
1000 if (bh) { 1000 if (bh) {
1001 unsigned long ino = le32_to_cpu(de->inode); 1001 unsigned long ino = le32_to_cpu(de->inode);
1002 brelse (bh); 1002 brelse (bh);
1003 inode = iget(dir->i_sb, ino); 1003 if (!ext3_valid_inum(dir->i_sb, ino)) {
1004 ext3_error(dir->i_sb, "ext3_lookup",
1005 "bad inode number: %lu", ino);
1006 inode = NULL;
1007 } else
1008 inode = iget(dir->i_sb, ino);
1004 1009
1005 if (!inode) 1010 if (!inode)
1006 return ERR_PTR(-EACCES); 1011 return ERR_PTR(-EACCES);
@@ -1028,7 +1033,13 @@ struct dentry *ext3_get_parent(struct dentry *child)
1028 return ERR_PTR(-ENOENT); 1033 return ERR_PTR(-ENOENT);
1029 ino = le32_to_cpu(de->inode); 1034 ino = le32_to_cpu(de->inode);
1030 brelse(bh); 1035 brelse(bh);
1031 inode = iget(child->d_inode->i_sb, ino); 1036
1037 if (!ext3_valid_inum(child->d_inode->i_sb, ino)) {
1038 ext3_error(child->d_inode->i_sb, "ext3_get_parent",
1039 "bad inode number: %lu", ino);
1040 inode = NULL;
1041 } else
1042 inode = iget(child->d_inode->i_sb, ino);
1032 1043
1033 if (!inode) 1044 if (!inode)
1034 return ERR_PTR(-EACCES); 1045 return ERR_PTR(-EACCES);
diff --git a/fs/file.c b/fs/file.c
index 55f4e7022563..b3c6b82e6a9d 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -240,13 +240,9 @@ static struct fdtable *alloc_fdtable(int nr)
240 if (!fdt) 240 if (!fdt)
241 goto out; 241 goto out;
242 242
243 nfds = 8 * L1_CACHE_BYTES; 243 nfds = max_t(int, 8 * L1_CACHE_BYTES, roundup_pow_of_two(nr + 1));
244 /* Expand to the max in easy steps */ 244 if (nfds > NR_OPEN)
245 while (nfds <= nr) { 245 nfds = NR_OPEN;
246 nfds = nfds * 2;
247 if (nfds > NR_OPEN)
248 nfds = NR_OPEN;
249 }
250 246
251 new_openset = alloc_fdset(nfds); 247 new_openset = alloc_fdset(nfds);
252 new_execset = alloc_fdset(nfds); 248 new_execset = alloc_fdset(nfds);
@@ -277,11 +273,13 @@ static struct fdtable *alloc_fdtable(int nr)
277 } while (nfds <= nr); 273 } while (nfds <= nr);
278 new_fds = alloc_fd_array(nfds); 274 new_fds = alloc_fd_array(nfds);
279 if (!new_fds) 275 if (!new_fds)
280 goto out; 276 goto out2;
281 fdt->fd = new_fds; 277 fdt->fd = new_fds;
282 fdt->max_fds = nfds; 278 fdt->max_fds = nfds;
283 fdt->free_files = NULL; 279 fdt->free_files = NULL;
284 return fdt; 280 return fdt;
281out2:
282 nfds = fdt->max_fdset;
285out: 283out:
286 if (new_openset) 284 if (new_openset)
287 free_fdset(new_openset, nfds); 285 free_fdset(new_openset, nfds);
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 29cce456c7ce..43886fa00a2a 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -246,6 +246,8 @@ vxfs_readdir(struct file *fp, void *retp, filldir_t filler)
246 u_long page, npages, block, pblocks, nblocks, offset; 246 u_long page, npages, block, pblocks, nblocks, offset;
247 loff_t pos; 247 loff_t pos;
248 248
249 lock_kernel();
250
249 switch ((long)fp->f_pos) { 251 switch ((long)fp->f_pos) {
250 case 0: 252 case 0:
251 if (filler(retp, ".", 1, fp->f_pos, ip->i_ino, DT_DIR) < 0) 253 if (filler(retp, ".", 1, fp->f_pos, ip->i_ino, DT_DIR) < 0)
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index a3bce3a77253..46fe60b2da23 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -105,7 +105,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
105 105
106/* 106/*
107 * Add a connection to the control filesystem (if it exists). Caller 107 * Add a connection to the control filesystem (if it exists). Caller
108 * must host fuse_mutex 108 * must hold fuse_mutex
109 */ 109 */
110int fuse_ctl_add_conn(struct fuse_conn *fc) 110int fuse_ctl_add_conn(struct fuse_conn *fc)
111{ 111{
@@ -139,7 +139,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
139 139
140/* 140/*
141 * Remove a connection from the control filesystem (if it exists). 141 * Remove a connection from the control filesystem (if it exists).
142 * Caller must host fuse_mutex 142 * Caller must hold fuse_mutex
143 */ 143 */
144void fuse_ctl_remove_conn(struct fuse_conn *fc) 144void fuse_ctl_remove_conn(struct fuse_conn *fc)
145{ 145{
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 72a74cde6de8..409ce6a7cca4 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -14,6 +14,33 @@
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/namei.h> 15#include <linux/namei.h>
16 16
17#if BITS_PER_LONG >= 64
18static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
19{
20 entry->d_time = time;
21}
22
23static inline u64 fuse_dentry_time(struct dentry *entry)
24{
25 return entry->d_time;
26}
27#else
28/*
29 * On 32 bit archs store the high 32 bits of time in d_fsdata
30 */
31static void fuse_dentry_settime(struct dentry *entry, u64 time)
32{
33 entry->d_time = time;
34 entry->d_fsdata = (void *) (unsigned long) (time >> 32);
35}
36
37static u64 fuse_dentry_time(struct dentry *entry)
38{
39 return (u64) entry->d_time +
40 ((u64) (unsigned long) entry->d_fsdata << 32);
41}
42#endif
43
17/* 44/*
18 * FUSE caches dentries and attributes with separate timeout. The 45 * FUSE caches dentries and attributes with separate timeout. The
19 * time in jiffies until the dentry/attributes are valid is stored in 46 * time in jiffies until the dentry/attributes are valid is stored in
@@ -23,10 +50,13 @@
23/* 50/*
24 * Calculate the time in jiffies until a dentry/attributes are valid 51 * Calculate the time in jiffies until a dentry/attributes are valid
25 */ 52 */
26static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec) 53static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
27{ 54{
28 struct timespec ts = {sec, nsec}; 55 if (sec || nsec) {
29 return jiffies + timespec_to_jiffies(&ts); 56 struct timespec ts = {sec, nsec};
57 return get_jiffies_64() + timespec_to_jiffies(&ts);
58 } else
59 return 0;
30} 60}
31 61
32/* 62/*
@@ -35,7 +65,8 @@ static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec)
35 */ 65 */
36static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o) 66static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o)
37{ 67{
38 entry->d_time = time_to_jiffies(o->entry_valid, o->entry_valid_nsec); 68 fuse_dentry_settime(entry,
69 time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
39 if (entry->d_inode) 70 if (entry->d_inode)
40 get_fuse_inode(entry->d_inode)->i_time = 71 get_fuse_inode(entry->d_inode)->i_time =
41 time_to_jiffies(o->attr_valid, o->attr_valid_nsec); 72 time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
@@ -47,7 +78,7 @@ static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o)
47 */ 78 */
48void fuse_invalidate_attr(struct inode *inode) 79void fuse_invalidate_attr(struct inode *inode)
49{ 80{
50 get_fuse_inode(inode)->i_time = jiffies - 1; 81 get_fuse_inode(inode)->i_time = 0;
51} 82}
52 83
53/* 84/*
@@ -60,7 +91,7 @@ void fuse_invalidate_attr(struct inode *inode)
60 */ 91 */
61static void fuse_invalidate_entry_cache(struct dentry *entry) 92static void fuse_invalidate_entry_cache(struct dentry *entry)
62{ 93{
63 entry->d_time = jiffies - 1; 94 fuse_dentry_settime(entry, 0);
64} 95}
65 96
66/* 97/*
@@ -102,7 +133,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
102 133
103 if (inode && is_bad_inode(inode)) 134 if (inode && is_bad_inode(inode))
104 return 0; 135 return 0;
105 else if (time_after(jiffies, entry->d_time)) { 136 else if (fuse_dentry_time(entry) < get_jiffies_64()) {
106 int err; 137 int err;
107 struct fuse_entry_out outarg; 138 struct fuse_entry_out outarg;
108 struct fuse_conn *fc; 139 struct fuse_conn *fc;
@@ -666,7 +697,7 @@ static int fuse_revalidate(struct dentry *entry)
666 if (!fuse_allow_task(fc, current)) 697 if (!fuse_allow_task(fc, current))
667 return -EACCES; 698 return -EACCES;
668 if (get_node_id(inode) != FUSE_ROOT_ID && 699 if (get_node_id(inode) != FUSE_ROOT_ID &&
669 time_before_eq(jiffies, fi->i_time)) 700 fi->i_time >= get_jiffies_64())
670 return 0; 701 return 0;
671 702
672 return fuse_do_getattr(inode); 703 return fuse_do_getattr(inode);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 63614ed16336..5c4fcd1dbf59 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -395,14 +395,16 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
395 struct fuse_readpages_data data; 395 struct fuse_readpages_data data;
396 int err; 396 int err;
397 397
398 err = -EIO;
398 if (is_bad_inode(inode)) 399 if (is_bad_inode(inode))
399 return -EIO; 400 goto clean_pages_up;
400 401
401 data.file = file; 402 data.file = file;
402 data.inode = inode; 403 data.inode = inode;
403 data.req = fuse_get_req(fc); 404 data.req = fuse_get_req(fc);
405 err = PTR_ERR(data.req);
404 if (IS_ERR(data.req)) 406 if (IS_ERR(data.req))
405 return PTR_ERR(data.req); 407 goto clean_pages_up;
406 408
407 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); 409 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
408 if (!err) { 410 if (!err) {
@@ -412,6 +414,10 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
412 fuse_put_request(fc, data.req); 414 fuse_put_request(fc, data.req);
413 } 415 }
414 return err; 416 return err;
417
418clean_pages_up:
419 put_pages_list(pages);
420 return err;
415} 421}
416 422
417static size_t fuse_send_write(struct fuse_req *req, struct file *file, 423static size_t fuse_send_write(struct fuse_req *req, struct file *file,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 0dbf96621841..69c7750d55b8 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -59,7 +59,7 @@ struct fuse_inode {
59 struct fuse_req *forget_req; 59 struct fuse_req *forget_req;
60 60
61 /** Time in jiffies until the file attributes are valid */ 61 /** Time in jiffies until the file attributes are valid */
62 unsigned long i_time; 62 u64 i_time;
63}; 63};
64 64
65/** FUSE specific file data */ 65/** FUSE specific file data */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index dcaaabd3b9c4..7d25092262ae 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -51,7 +51,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
51 return NULL; 51 return NULL;
52 52
53 fi = get_fuse_inode(inode); 53 fi = get_fuse_inode(inode);
54 fi->i_time = jiffies - 1; 54 fi->i_time = 0;
55 fi->nodeid = 0; 55 fi->nodeid = 0;
56 fi->nlookup = 0; 56 fi->nlookup = 0;
57 fi->forget_req = fuse_request_alloc(); 57 fi->forget_req = fuse_request_alloc();
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 6449cb697967..c3920c96dadf 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -83,8 +83,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
83 83
84 ret = -ENOMEM; 84 ret = -ENOMEM;
85 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 85 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
86 if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size)
87 goto out;
88 86
89 if (vma->vm_flags & VM_MAYSHARE && 87 if (vma->vm_flags & VM_MAYSHARE &&
90 hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), 88 hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT),
@@ -93,7 +91,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
93 91
94 ret = 0; 92 ret = 0;
95 hugetlb_prefault_arch_hook(vma->vm_mm); 93 hugetlb_prefault_arch_hook(vma->vm_mm);
96 if (inode->i_size < len) 94 if (vma->vm_flags & VM_WRITE && inode->i_size < len)
97 inode->i_size = len; 95 inode->i_size = len;
98out: 96out:
99 mutex_unlock(&inode->i_mutex); 97 mutex_unlock(&inode->i_mutex);
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index f2386442adee..017cb0f134d6 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -187,7 +187,7 @@ static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
187{ 187{
188 struct inotify_kernel_event *kevent; 188 struct inotify_kernel_event *kevent;
189 189
190 kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL); 190 kevent = kmem_cache_alloc(event_cachep, GFP_NOFS);
191 if (unlikely(!kevent)) 191 if (unlikely(!kevent))
192 return NULL; 192 return NULL;
193 193
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 93aa5715f224..78b1deae3fa2 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -44,6 +44,9 @@ static int set_task_ioprio(struct task_struct *task, int ioprio)
44 task->ioprio = ioprio; 44 task->ioprio = ioprio;
45 45
46 ioc = task->io_context; 46 ioc = task->io_context;
47 /* see wmb() in current_io_context() */
48 smp_read_barrier_depends();
49
47 if (ioc && ioc->set_ioprio) 50 if (ioc && ioc->set_ioprio)
48 ioc->set_ioprio(ioc, ioprio); 51 ioc->set_ioprio(ioc, ioprio);
49 52
@@ -111,9 +114,9 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
111 continue; 114 continue;
112 ret = set_task_ioprio(p, ioprio); 115 ret = set_task_ioprio(p, ioprio);
113 if (ret) 116 if (ret)
114 break; 117 goto free_uid;
115 } while_each_thread(g, p); 118 } while_each_thread(g, p);
116 119free_uid:
117 if (who) 120 if (who)
118 free_uid(user); 121 free_uid(user);
119 break; 122 break;
@@ -137,6 +140,29 @@ out:
137 return ret; 140 return ret;
138} 141}
139 142
143int ioprio_best(unsigned short aprio, unsigned short bprio)
144{
145 unsigned short aclass = IOPRIO_PRIO_CLASS(aprio);
146 unsigned short bclass = IOPRIO_PRIO_CLASS(bprio);
147
148 if (!ioprio_valid(aprio))
149 return bprio;
150 if (!ioprio_valid(bprio))
151 return aprio;
152
153 if (aclass == IOPRIO_CLASS_NONE)
154 aclass = IOPRIO_CLASS_BE;
155 if (bclass == IOPRIO_CLASS_NONE)
156 bclass = IOPRIO_CLASS_BE;
157
158 if (aclass == bclass)
159 return min(aprio, bprio);
160 if (aclass > bclass)
161 return bprio;
162 else
163 return aprio;
164}
165
140asmlinkage long sys_ioprio_get(int which, int who) 166asmlinkage long sys_ioprio_get(int which, int who)
141{ 167{
142 struct task_struct *g, *p; 168 struct task_struct *g, *p;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 0971814c38b8..42da60784311 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -261,7 +261,7 @@ void journal_commit_transaction(journal_t *journal)
261 struct buffer_head *bh = jh2bh(jh); 261 struct buffer_head *bh = jh2bh(jh);
262 262
263 jbd_lock_bh_state(bh); 263 jbd_lock_bh_state(bh);
264 kfree(jh->b_committed_data); 264 jbd_slab_free(jh->b_committed_data, bh->b_size);
265 jh->b_committed_data = NULL; 265 jh->b_committed_data = NULL;
266 jbd_unlock_bh_state(bh); 266 jbd_unlock_bh_state(bh);
267 } 267 }
@@ -745,14 +745,14 @@ restart_loop:
745 * Otherwise, we can just throw away the frozen data now. 745 * Otherwise, we can just throw away the frozen data now.
746 */ 746 */
747 if (jh->b_committed_data) { 747 if (jh->b_committed_data) {
748 kfree(jh->b_committed_data); 748 jbd_slab_free(jh->b_committed_data, bh->b_size);
749 jh->b_committed_data = NULL; 749 jh->b_committed_data = NULL;
750 if (jh->b_frozen_data) { 750 if (jh->b_frozen_data) {
751 jh->b_committed_data = jh->b_frozen_data; 751 jh->b_committed_data = jh->b_frozen_data;
752 jh->b_frozen_data = NULL; 752 jh->b_frozen_data = NULL;
753 } 753 }
754 } else if (jh->b_frozen_data) { 754 } else if (jh->b_frozen_data) {
755 kfree(jh->b_frozen_data); 755 jbd_slab_free(jh->b_frozen_data, bh->b_size);
756 jh->b_frozen_data = NULL; 756 jh->b_frozen_data = NULL;
757 } 757 }
758 758
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 8c9b28dff119..f66724ce443a 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -84,6 +84,7 @@ EXPORT_SYMBOL(journal_force_commit);
84 84
85static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 85static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
86static void __journal_abort_soft (journal_t *journal, int errno); 86static void __journal_abort_soft (journal_t *journal, int errno);
87static int journal_create_jbd_slab(size_t slab_size);
87 88
88/* 89/*
89 * Helper function used to manage commit timeouts 90 * Helper function used to manage commit timeouts
@@ -328,10 +329,10 @@ repeat:
328 char *tmp; 329 char *tmp;
329 330
330 jbd_unlock_bh_state(bh_in); 331 jbd_unlock_bh_state(bh_in);
331 tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS); 332 tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS);
332 jbd_lock_bh_state(bh_in); 333 jbd_lock_bh_state(bh_in);
333 if (jh_in->b_frozen_data) { 334 if (jh_in->b_frozen_data) {
334 kfree(tmp); 335 jbd_slab_free(tmp, bh_in->b_size);
335 goto repeat; 336 goto repeat;
336 } 337 }
337 338
@@ -1069,17 +1070,17 @@ static int load_superblock(journal_t *journal)
1069int journal_load(journal_t *journal) 1070int journal_load(journal_t *journal)
1070{ 1071{
1071 int err; 1072 int err;
1073 journal_superblock_t *sb;
1072 1074
1073 err = load_superblock(journal); 1075 err = load_superblock(journal);
1074 if (err) 1076 if (err)
1075 return err; 1077 return err;
1076 1078
1079 sb = journal->j_superblock;
1077 /* If this is a V2 superblock, then we have to check the 1080 /* If this is a V2 superblock, then we have to check the
1078 * features flags on it. */ 1081 * features flags on it. */
1079 1082
1080 if (journal->j_format_version >= 2) { 1083 if (journal->j_format_version >= 2) {
1081 journal_superblock_t *sb = journal->j_superblock;
1082
1083 if ((sb->s_feature_ro_compat & 1084 if ((sb->s_feature_ro_compat &
1084 ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) || 1085 ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) ||
1085 (sb->s_feature_incompat & 1086 (sb->s_feature_incompat &
@@ -1090,6 +1091,13 @@ int journal_load(journal_t *journal)
1090 } 1091 }
1091 } 1092 }
1092 1093
1094 /*
1095 * Create a slab for this blocksize
1096 */
1097 err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize));
1098 if (err)
1099 return err;
1100
1093 /* Let the recovery code check whether it needs to recover any 1101 /* Let the recovery code check whether it needs to recover any
1094 * data from the journal. */ 1102 * data from the journal. */
1095 if (journal_recover(journal)) 1103 if (journal_recover(journal))
@@ -1612,6 +1620,77 @@ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
1612} 1620}
1613 1621
1614/* 1622/*
1623 * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
1624 * and allocate frozen and commit buffers from these slabs.
1625 *
1626 * Reason for doing this is to avoid, SLAB_DEBUG - since it could
1627 * cause bh to cross page boundary.
1628 */
1629
1630#define JBD_MAX_SLABS 5
1631#define JBD_SLAB_INDEX(size) (size >> 11)
1632
1633static kmem_cache_t *jbd_slab[JBD_MAX_SLABS];
1634static const char *jbd_slab_names[JBD_MAX_SLABS] = {
1635 "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
1636};
1637
1638static void journal_destroy_jbd_slabs(void)
1639{
1640 int i;
1641
1642 for (i = 0; i < JBD_MAX_SLABS; i++) {
1643 if (jbd_slab[i])
1644 kmem_cache_destroy(jbd_slab[i]);
1645 jbd_slab[i] = NULL;
1646 }
1647}
1648
1649static int journal_create_jbd_slab(size_t slab_size)
1650{
1651 int i = JBD_SLAB_INDEX(slab_size);
1652
1653 BUG_ON(i >= JBD_MAX_SLABS);
1654
1655 /*
1656 * Check if we already have a slab created for this size
1657 */
1658 if (jbd_slab[i])
1659 return 0;
1660
1661 /*
1662 * Create a slab and force alignment to be same as slabsize -
1663 * this will make sure that allocations won't cross the page
1664 * boundary.
1665 */
1666 jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
1667 slab_size, slab_size, 0, NULL, NULL);
1668 if (!jbd_slab[i]) {
1669 printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
1670 return -ENOMEM;
1671 }
1672 return 0;
1673}
1674
1675void * jbd_slab_alloc(size_t size, gfp_t flags)
1676{
1677 int idx;
1678
1679 idx = JBD_SLAB_INDEX(size);
1680 BUG_ON(jbd_slab[idx] == NULL);
1681 return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
1682}
1683
1684void jbd_slab_free(void *ptr, size_t size)
1685{
1686 int idx;
1687
1688 idx = JBD_SLAB_INDEX(size);
1689 BUG_ON(jbd_slab[idx] == NULL);
1690 kmem_cache_free(jbd_slab[idx], ptr);
1691}
1692
1693/*
1615 * Journal_head storage management 1694 * Journal_head storage management
1616 */ 1695 */
1617static kmem_cache_t *journal_head_cache; 1696static kmem_cache_t *journal_head_cache;
@@ -1799,13 +1878,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
1799 printk(KERN_WARNING "%s: freeing " 1878 printk(KERN_WARNING "%s: freeing "
1800 "b_frozen_data\n", 1879 "b_frozen_data\n",
1801 __FUNCTION__); 1880 __FUNCTION__);
1802 kfree(jh->b_frozen_data); 1881 jbd_slab_free(jh->b_frozen_data, bh->b_size);
1803 } 1882 }
1804 if (jh->b_committed_data) { 1883 if (jh->b_committed_data) {
1805 printk(KERN_WARNING "%s: freeing " 1884 printk(KERN_WARNING "%s: freeing "
1806 "b_committed_data\n", 1885 "b_committed_data\n",
1807 __FUNCTION__); 1886 __FUNCTION__);
1808 kfree(jh->b_committed_data); 1887 jbd_slab_free(jh->b_committed_data, bh->b_size);
1809 } 1888 }
1810 bh->b_private = NULL; 1889 bh->b_private = NULL;
1811 jh->b_bh = NULL; /* debug, really */ 1890 jh->b_bh = NULL; /* debug, really */
@@ -1961,6 +2040,7 @@ static void journal_destroy_caches(void)
1961 journal_destroy_revoke_caches(); 2040 journal_destroy_revoke_caches();
1962 journal_destroy_journal_head_cache(); 2041 journal_destroy_journal_head_cache();
1963 journal_destroy_handle_cache(); 2042 journal_destroy_handle_cache();
2043 journal_destroy_jbd_slabs();
1964} 2044}
1965 2045
1966static int __init journal_init(void) 2046static int __init journal_init(void)
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 508b2ea91f43..de2e4cbbf79a 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -666,8 +666,9 @@ repeat:
666 if (!frozen_buffer) { 666 if (!frozen_buffer) {
667 JBUFFER_TRACE(jh, "allocate memory for buffer"); 667 JBUFFER_TRACE(jh, "allocate memory for buffer");
668 jbd_unlock_bh_state(bh); 668 jbd_unlock_bh_state(bh);
669 frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size, 669 frozen_buffer =
670 GFP_NOFS); 670 jbd_slab_alloc(jh2bh(jh)->b_size,
671 GFP_NOFS);
671 if (!frozen_buffer) { 672 if (!frozen_buffer) {
672 printk(KERN_EMERG 673 printk(KERN_EMERG
673 "%s: OOM for frozen_buffer\n", 674 "%s: OOM for frozen_buffer\n",
@@ -879,7 +880,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
879 880
880repeat: 881repeat:
881 if (!jh->b_committed_data) { 882 if (!jh->b_committed_data) {
882 committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS); 883 committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
883 if (!committed_data) { 884 if (!committed_data) {
884 printk(KERN_EMERG "%s: No memory for committed data\n", 885 printk(KERN_EMERG "%s: No memory for committed data\n",
885 __FUNCTION__); 886 __FUNCTION__);
@@ -906,7 +907,7 @@ repeat:
906out: 907out:
907 journal_put_journal_head(jh); 908 journal_put_journal_head(jh);
908 if (unlikely(committed_data)) 909 if (unlikely(committed_data))
909 kfree(committed_data); 910 jbd_slab_free(committed_data, bh->b_size);
910 return err; 911 return err;
911} 912}
912 913
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 43e3f566aad6..a223cf4faa9b 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -168,16 +168,15 @@ void jfs_dirty_inode(struct inode *inode)
168 set_cflag(COMMIT_Dirty, inode); 168 set_cflag(COMMIT_Dirty, inode);
169} 169}
170 170
171static int 171int jfs_get_block(struct inode *ip, sector_t lblock,
172jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, 172 struct buffer_head *bh_result, int create)
173 struct buffer_head *bh_result, int create)
174{ 173{
175 s64 lblock64 = lblock; 174 s64 lblock64 = lblock;
176 int rc = 0; 175 int rc = 0;
177 xad_t xad; 176 xad_t xad;
178 s64 xaddr; 177 s64 xaddr;
179 int xflag; 178 int xflag;
180 s32 xlen = max_blocks; 179 s32 xlen = bh_result->b_size >> ip->i_blkbits;
181 180
182 /* 181 /*
183 * Take appropriate lock on inode 182 * Take appropriate lock on inode
@@ -188,7 +187,7 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
188 IREAD_LOCK(ip); 187 IREAD_LOCK(ip);
189 188
190 if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) && 189 if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) &&
191 (!xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)) && 190 (!xtLookup(ip, lblock64, xlen, &xflag, &xaddr, &xlen, 0)) &&
192 xaddr) { 191 xaddr) {
193 if (xflag & XAD_NOTRECORDED) { 192 if (xflag & XAD_NOTRECORDED) {
194 if (!create) 193 if (!create)
@@ -255,13 +254,6 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
255 return rc; 254 return rc;
256} 255}
257 256
258static int jfs_get_block(struct inode *ip, sector_t lblock,
259 struct buffer_head *bh_result, int create)
260{
261 return jfs_get_blocks(ip, lblock, bh_result->b_size >> ip->i_blkbits,
262 bh_result, create);
263}
264
265static int jfs_writepage(struct page *page, struct writeback_control *wbc) 257static int jfs_writepage(struct page *page, struct writeback_control *wbc)
266{ 258{
267 return nobh_writepage(page, jfs_get_block, wbc); 259 return nobh_writepage(page, jfs_get_block, wbc);
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index b5c7da6190dc..1fc48df670c8 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -32,6 +32,7 @@ extern void jfs_truncate_nolock(struct inode *, loff_t);
32extern void jfs_free_zero_link(struct inode *); 32extern void jfs_free_zero_link(struct inode *);
33extern struct dentry *jfs_get_parent(struct dentry *dentry); 33extern struct dentry *jfs_get_parent(struct dentry *dentry);
34extern void jfs_set_inode_flags(struct inode *); 34extern void jfs_set_inode_flags(struct inode *);
35extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
35 36
36extern const struct address_space_operations jfs_aops; 37extern const struct address_space_operations jfs_aops;
37extern struct inode_operations jfs_dir_inode_operations; 38extern struct inode_operations jfs_dir_inode_operations;
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 10c46231ce15..efbb586bed4b 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2944,7 +2944,7 @@ int jfs_sync(void *arg)
2944 * Inode is being freed 2944 * Inode is being freed
2945 */ 2945 */
2946 list_del_init(&jfs_ip->anon_inode_list); 2946 list_del_init(&jfs_ip->anon_inode_list);
2947 } else if (! !mutex_trylock(&jfs_ip->commit_mutex)) { 2947 } else if (mutex_trylock(&jfs_ip->commit_mutex)) {
2948 /* 2948 /*
2949 * inode will be removed from anonymous list 2949 * inode will be removed from anonymous list
2950 * when it is committed 2950 * when it is committed
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 09ea03f62277..295268ad231b 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -165,8 +165,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
165 165
166 out3: 166 out3:
167 txEnd(tid); 167 txEnd(tid);
168 mutex_unlock(&JFS_IP(dip)->commit_mutex);
169 mutex_unlock(&JFS_IP(ip)->commit_mutex); 168 mutex_unlock(&JFS_IP(ip)->commit_mutex);
169 mutex_unlock(&JFS_IP(dip)->commit_mutex);
170 if (rc) { 170 if (rc) {
171 free_ea_wmap(ip); 171 free_ea_wmap(ip);
172 ip->i_nlink = 0; 172 ip->i_nlink = 0;
@@ -300,8 +300,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
300 300
301 out3: 301 out3:
302 txEnd(tid); 302 txEnd(tid);
303 mutex_unlock(&JFS_IP(dip)->commit_mutex);
304 mutex_unlock(&JFS_IP(ip)->commit_mutex); 303 mutex_unlock(&JFS_IP(ip)->commit_mutex);
304 mutex_unlock(&JFS_IP(dip)->commit_mutex);
305 if (rc) { 305 if (rc) {
306 free_ea_wmap(ip); 306 free_ea_wmap(ip);
307 ip->i_nlink = 0; 307 ip->i_nlink = 0;
@@ -384,8 +384,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
384 if (rc == -EIO) 384 if (rc == -EIO)
385 txAbort(tid, 1); 385 txAbort(tid, 1);
386 txEnd(tid); 386 txEnd(tid);
387 mutex_unlock(&JFS_IP(dip)->commit_mutex);
388 mutex_unlock(&JFS_IP(ip)->commit_mutex); 387 mutex_unlock(&JFS_IP(ip)->commit_mutex);
388 mutex_unlock(&JFS_IP(dip)->commit_mutex);
389 389
390 goto out2; 390 goto out2;
391 } 391 }
@@ -422,8 +422,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
422 422
423 txEnd(tid); 423 txEnd(tid);
424 424
425 mutex_unlock(&JFS_IP(dip)->commit_mutex);
426 mutex_unlock(&JFS_IP(ip)->commit_mutex); 425 mutex_unlock(&JFS_IP(ip)->commit_mutex);
426 mutex_unlock(&JFS_IP(dip)->commit_mutex);
427 427
428 /* 428 /*
429 * Truncating the directory index table is not guaranteed. It 429 * Truncating the directory index table is not guaranteed. It
@@ -503,8 +503,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
503 if (rc == -EIO) 503 if (rc == -EIO)
504 txAbort(tid, 1); /* Marks FS Dirty */ 504 txAbort(tid, 1); /* Marks FS Dirty */
505 txEnd(tid); 505 txEnd(tid);
506 mutex_unlock(&JFS_IP(dip)->commit_mutex);
507 mutex_unlock(&JFS_IP(ip)->commit_mutex); 506 mutex_unlock(&JFS_IP(ip)->commit_mutex);
507 mutex_unlock(&JFS_IP(dip)->commit_mutex);
508 IWRITE_UNLOCK(ip); 508 IWRITE_UNLOCK(ip);
509 goto out1; 509 goto out1;
510 } 510 }
@@ -527,8 +527,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
527 if ((new_size = commitZeroLink(tid, ip)) < 0) { 527 if ((new_size = commitZeroLink(tid, ip)) < 0) {
528 txAbort(tid, 1); /* Marks FS Dirty */ 528 txAbort(tid, 1); /* Marks FS Dirty */
529 txEnd(tid); 529 txEnd(tid);
530 mutex_unlock(&JFS_IP(dip)->commit_mutex);
531 mutex_unlock(&JFS_IP(ip)->commit_mutex); 530 mutex_unlock(&JFS_IP(ip)->commit_mutex);
531 mutex_unlock(&JFS_IP(dip)->commit_mutex);
532 IWRITE_UNLOCK(ip); 532 IWRITE_UNLOCK(ip);
533 rc = new_size; 533 rc = new_size;
534 goto out1; 534 goto out1;
@@ -556,9 +556,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
556 556
557 txEnd(tid); 557 txEnd(tid);
558 558
559 mutex_unlock(&JFS_IP(dip)->commit_mutex);
560 mutex_unlock(&JFS_IP(ip)->commit_mutex); 559 mutex_unlock(&JFS_IP(ip)->commit_mutex);
561 560 mutex_unlock(&JFS_IP(dip)->commit_mutex);
562 561
563 while (new_size && (rc == 0)) { 562 while (new_size && (rc == 0)) {
564 tid = txBegin(dip->i_sb, 0); 563 tid = txBegin(dip->i_sb, 0);
@@ -847,8 +846,8 @@ static int jfs_link(struct dentry *old_dentry,
847 out: 846 out:
848 txEnd(tid); 847 txEnd(tid);
849 848
850 mutex_unlock(&JFS_IP(dir)->commit_mutex);
851 mutex_unlock(&JFS_IP(ip)->commit_mutex); 849 mutex_unlock(&JFS_IP(ip)->commit_mutex);
850 mutex_unlock(&JFS_IP(dir)->commit_mutex);
852 851
853 jfs_info("jfs_link: rc:%d", rc); 852 jfs_info("jfs_link: rc:%d", rc);
854 return rc; 853 return rc;
@@ -1037,8 +1036,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
1037 1036
1038 out3: 1037 out3:
1039 txEnd(tid); 1038 txEnd(tid);
1040 mutex_unlock(&JFS_IP(dip)->commit_mutex);
1041 mutex_unlock(&JFS_IP(ip)->commit_mutex); 1039 mutex_unlock(&JFS_IP(ip)->commit_mutex);
1040 mutex_unlock(&JFS_IP(dip)->commit_mutex);
1042 if (rc) { 1041 if (rc) {
1043 free_ea_wmap(ip); 1042 free_ea_wmap(ip);
1044 ip->i_nlink = 0; 1043 ip->i_nlink = 0;
@@ -1160,10 +1159,11 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1160 if (S_ISDIR(new_ip->i_mode)) { 1159 if (S_ISDIR(new_ip->i_mode)) {
1161 new_ip->i_nlink--; 1160 new_ip->i_nlink--;
1162 if (new_ip->i_nlink) { 1161 if (new_ip->i_nlink) {
1163 mutex_unlock(&JFS_IP(new_dir)->commit_mutex); 1162 mutex_unlock(&JFS_IP(new_ip)->commit_mutex);
1164 mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
1165 if (old_dir != new_dir) 1163 if (old_dir != new_dir)
1166 mutex_unlock(&JFS_IP(old_dir)->commit_mutex); 1164 mutex_unlock(&JFS_IP(old_dir)->commit_mutex);
1165 mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
1166 mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
1167 if (!S_ISDIR(old_ip->i_mode) && new_ip) 1167 if (!S_ISDIR(old_ip->i_mode) && new_ip)
1168 IWRITE_UNLOCK(new_ip); 1168 IWRITE_UNLOCK(new_ip);
1169 jfs_error(new_ip->i_sb, 1169 jfs_error(new_ip->i_sb,
@@ -1281,13 +1281,12 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1281 1281
1282 out4: 1282 out4:
1283 txEnd(tid); 1283 txEnd(tid);
1284
1285 mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
1286 mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
1287 if (old_dir != new_dir)
1288 mutex_unlock(&JFS_IP(old_dir)->commit_mutex);
1289 if (new_ip) 1284 if (new_ip)
1290 mutex_unlock(&JFS_IP(new_ip)->commit_mutex); 1285 mutex_unlock(&JFS_IP(new_ip)->commit_mutex);
1286 if (old_dir != new_dir)
1287 mutex_unlock(&JFS_IP(old_dir)->commit_mutex);
1288 mutex_unlock(&JFS_IP(old_ip)->commit_mutex);
1289 mutex_unlock(&JFS_IP(new_dir)->commit_mutex);
1291 1290
1292 while (new_size && (rc == 0)) { 1291 while (new_size && (rc == 0)) {
1293 tid = txBegin(new_ip->i_sb, 0); 1292 tid = txBegin(new_ip->i_sb, 0);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4f6cfebc82db..143bcd1d5eaa 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -26,6 +26,7 @@
26#include <linux/moduleparam.h> 26#include <linux/moduleparam.h>
27#include <linux/kthread.h> 27#include <linux/kthread.h>
28#include <linux/posix_acl.h> 28#include <linux/posix_acl.h>
29#include <linux/buffer_head.h>
29#include <asm/uaccess.h> 30#include <asm/uaccess.h>
30#include <linux/seq_file.h> 31#include <linux/seq_file.h>
31 32
@@ -298,7 +299,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
298 break; 299 break;
299 } 300 }
300 301
301#if defined(CONFIG_QUOTA) 302#ifdef CONFIG_QUOTA
302 case Opt_quota: 303 case Opt_quota:
303 case Opt_usrquota: 304 case Opt_usrquota:
304 *flag |= JFS_USRQUOTA; 305 *flag |= JFS_USRQUOTA;
@@ -597,7 +598,7 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
597 if (sbi->flag & JFS_NOINTEGRITY) 598 if (sbi->flag & JFS_NOINTEGRITY)
598 seq_puts(seq, ",nointegrity"); 599 seq_puts(seq, ",nointegrity");
599 600
600#if defined(CONFIG_QUOTA) 601#ifdef CONFIG_QUOTA
601 if (sbi->flag & JFS_USRQUOTA) 602 if (sbi->flag & JFS_USRQUOTA)
602 seq_puts(seq, ",usrquota"); 603 seq_puts(seq, ",usrquota");
603 604
@@ -608,6 +609,113 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
608 return 0; 609 return 0;
609} 610}
610 611
612#ifdef CONFIG_QUOTA
613
614/* Read data from quotafile - avoid pagecache and such because we cannot afford
615 * acquiring the locks... As quota files are never truncated and quota code
616 * itself serializes the operations (and noone else should touch the files)
617 * we don't have to be afraid of races */
618static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data,
619 size_t len, loff_t off)
620{
621 struct inode *inode = sb_dqopt(sb)->files[type];
622 sector_t blk = off >> sb->s_blocksize_bits;
623 int err = 0;
624 int offset = off & (sb->s_blocksize - 1);
625 int tocopy;
626 size_t toread;
627 struct buffer_head tmp_bh;
628 struct buffer_head *bh;
629 loff_t i_size = i_size_read(inode);
630
631 if (off > i_size)
632 return 0;
633 if (off+len > i_size)
634 len = i_size-off;
635 toread = len;
636 while (toread > 0) {
637 tocopy = sb->s_blocksize - offset < toread ?
638 sb->s_blocksize - offset : toread;
639
640 tmp_bh.b_state = 0;
641 tmp_bh.b_size = 1 << inode->i_blkbits;
642 err = jfs_get_block(inode, blk, &tmp_bh, 0);
643 if (err)
644 return err;
645 if (!buffer_mapped(&tmp_bh)) /* A hole? */
646 memset(data, 0, tocopy);
647 else {
648 bh = sb_bread(sb, tmp_bh.b_blocknr);
649 if (!bh)
650 return -EIO;
651 memcpy(data, bh->b_data+offset, tocopy);
652 brelse(bh);
653 }
654 offset = 0;
655 toread -= tocopy;
656 data += tocopy;
657 blk++;
658 }
659 return len;
660}
661
662/* Write to quotafile */
663static ssize_t jfs_quota_write(struct super_block *sb, int type,
664 const char *data, size_t len, loff_t off)
665{
666 struct inode *inode = sb_dqopt(sb)->files[type];
667 sector_t blk = off >> sb->s_blocksize_bits;
668 int err = 0;
669 int offset = off & (sb->s_blocksize - 1);
670 int tocopy;
671 size_t towrite = len;
672 struct buffer_head tmp_bh;
673 struct buffer_head *bh;
674
675 mutex_lock(&inode->i_mutex);
676 while (towrite > 0) {
677 tocopy = sb->s_blocksize - offset < towrite ?
678 sb->s_blocksize - offset : towrite;
679
680 tmp_bh.b_state = 0;
681 tmp_bh.b_size = 1 << inode->i_blkbits;
682 err = jfs_get_block(inode, blk, &tmp_bh, 1);
683 if (err)
684 goto out;
685 if (offset || tocopy != sb->s_blocksize)
686 bh = sb_bread(sb, tmp_bh.b_blocknr);
687 else
688 bh = sb_getblk(sb, tmp_bh.b_blocknr);
689 if (!bh) {
690 err = -EIO;
691 goto out;
692 }
693 lock_buffer(bh);
694 memcpy(bh->b_data+offset, data, tocopy);
695 flush_dcache_page(bh->b_page);
696 set_buffer_uptodate(bh);
697 mark_buffer_dirty(bh);
698 unlock_buffer(bh);
699 brelse(bh);
700 offset = 0;
701 towrite -= tocopy;
702 data += tocopy;
703 blk++;
704 }
705out:
706 if (len == towrite)
707 return err;
708 if (inode->i_size < off+len-towrite)
709 i_size_write(inode, off+len-towrite);
710 inode->i_version++;
711 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
712 mark_inode_dirty(inode);
713 mutex_unlock(&inode->i_mutex);
714 return len - towrite;
715}
716
717#endif
718
611static struct super_operations jfs_super_operations = { 719static struct super_operations jfs_super_operations = {
612 .alloc_inode = jfs_alloc_inode, 720 .alloc_inode = jfs_alloc_inode,
613 .destroy_inode = jfs_destroy_inode, 721 .destroy_inode = jfs_destroy_inode,
@@ -621,7 +729,11 @@ static struct super_operations jfs_super_operations = {
621 .unlockfs = jfs_unlockfs, 729 .unlockfs = jfs_unlockfs,
622 .statfs = jfs_statfs, 730 .statfs = jfs_statfs,
623 .remount_fs = jfs_remount, 731 .remount_fs = jfs_remount,
624 .show_options = jfs_show_options 732 .show_options = jfs_show_options,
733#ifdef CONFIG_QUOTA
734 .quota_read = jfs_quota_read,
735 .quota_write = jfs_quota_write,
736#endif
625}; 737};
626 738
627static struct export_operations jfs_export_operations = { 739static struct export_operations jfs_export_operations = {
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 5980c45998cc..89ba0df14c22 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -454,7 +454,7 @@ static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *ho
454 fl->fl_ops = &nlmclnt_lock_ops; 454 fl->fl_ops = &nlmclnt_lock_ops;
455} 455}
456 456
457static void do_vfs_lock(struct file_lock *fl) 457static int do_vfs_lock(struct file_lock *fl)
458{ 458{
459 int res = 0; 459 int res = 0;
460 switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { 460 switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
@@ -467,9 +467,7 @@ static void do_vfs_lock(struct file_lock *fl)
467 default: 467 default:
468 BUG(); 468 BUG();
469 } 469 }
470 if (res < 0) 470 return res;
471 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n",
472 __FUNCTION__);
473} 471}
474 472
475/* 473/*
@@ -498,6 +496,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
498 struct nlm_host *host = req->a_host; 496 struct nlm_host *host = req->a_host;
499 struct nlm_res *resp = &req->a_res; 497 struct nlm_res *resp = &req->a_res;
500 struct nlm_wait *block = NULL; 498 struct nlm_wait *block = NULL;
499 unsigned char fl_flags = fl->fl_flags;
501 int status = -ENOLCK; 500 int status = -ENOLCK;
502 501
503 if (!host->h_monitored && nsm_monitor(host) < 0) { 502 if (!host->h_monitored && nsm_monitor(host) < 0) {
@@ -505,6 +504,10 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
505 host->h_name); 504 host->h_name);
506 goto out; 505 goto out;
507 } 506 }
507 fl->fl_flags |= FL_ACCESS;
508 status = do_vfs_lock(fl);
509 if (status < 0)
510 goto out;
508 511
509 block = nlmclnt_prepare_block(host, fl); 512 block = nlmclnt_prepare_block(host, fl);
510again: 513again:
@@ -539,9 +542,10 @@ again:
539 up_read(&host->h_rwsem); 542 up_read(&host->h_rwsem);
540 goto again; 543 goto again;
541 } 544 }
542 fl->fl_flags |= FL_SLEEP;
543 /* Ensure the resulting lock will get added to granted list */ 545 /* Ensure the resulting lock will get added to granted list */
544 do_vfs_lock(fl); 546 fl->fl_flags = fl_flags | FL_SLEEP;
547 if (do_vfs_lock(fl) < 0)
548 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__);
545 up_read(&host->h_rwsem); 549 up_read(&host->h_rwsem);
546 } 550 }
547 status = nlm_stat_to_errno(resp->status); 551 status = nlm_stat_to_errno(resp->status);
@@ -552,6 +556,7 @@ out_unblock:
552 nlmclnt_cancel(host, req->a_args.block, fl); 556 nlmclnt_cancel(host, req->a_args.block, fl);
553out: 557out:
554 nlm_release_call(req); 558 nlm_release_call(req);
559 fl->fl_flags = fl_flags;
555 return status; 560 return status;
556} 561}
557 562
@@ -606,15 +611,19 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
606{ 611{
607 struct nlm_host *host = req->a_host; 612 struct nlm_host *host = req->a_host;
608 struct nlm_res *resp = &req->a_res; 613 struct nlm_res *resp = &req->a_res;
609 int status; 614 int status = 0;
610 615
611 /* 616 /*
612 * Note: the server is supposed to either grant us the unlock 617 * Note: the server is supposed to either grant us the unlock
613 * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either 618 * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either
614 * case, we want to unlock. 619 * case, we want to unlock.
615 */ 620 */
621 fl->fl_flags |= FL_EXISTS;
616 down_read(&host->h_rwsem); 622 down_read(&host->h_rwsem);
617 do_vfs_lock(fl); 623 if (do_vfs_lock(fl) == -ENOENT) {
624 up_read(&host->h_rwsem);
625 goto out;
626 }
618 up_read(&host->h_rwsem); 627 up_read(&host->h_rwsem);
619 628
620 if (req->a_flags & RPC_TASK_ASYNC) 629 if (req->a_flags & RPC_TASK_ASYNC)
@@ -624,7 +633,6 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
624 if (status < 0) 633 if (status < 0)
625 goto out; 634 goto out;
626 635
627 status = 0;
628 if (resp->status == NLM_LCK_GRANTED) 636 if (resp->status == NLM_LCK_GRANTED)
629 goto out; 637 goto out;
630 638
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index baf5ae513481..c9d419703cf3 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -638,9 +638,6 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
638 if (task->tk_status < 0) { 638 if (task->tk_status < 0) {
639 /* RPC error: Re-insert for retransmission */ 639 /* RPC error: Re-insert for retransmission */
640 timeout = 10 * HZ; 640 timeout = 10 * HZ;
641 } else if (block->b_done) {
642 /* Block already removed, kill it for real */
643 timeout = 0;
644 } else { 641 } else {
645 /* Call was successful, now wait for client callback */ 642 /* Call was successful, now wait for client callback */
646 timeout = 60 * HZ; 643 timeout = 60 * HZ;
@@ -709,13 +706,10 @@ nlmsvc_retry_blocked(void)
709 break; 706 break;
710 if (time_after(block->b_when,jiffies)) 707 if (time_after(block->b_when,jiffies))
711 break; 708 break;
712 dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n", 709 dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
713 block, block->b_when, block->b_done); 710 block, block->b_when);
714 kref_get(&block->b_count); 711 kref_get(&block->b_count);
715 if (block->b_done) 712 nlmsvc_grant_blocked(block);
716 nlmsvc_unlink_block(block);
717 else
718 nlmsvc_grant_blocked(block);
719 nlmsvc_release_block(block); 713 nlmsvc_release_block(block);
720 } 714 }
721 715
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 2a4df9b3779a..01b4db9e5466 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -237,19 +237,22 @@ static int
237nlm_traverse_files(struct nlm_host *host, int action) 237nlm_traverse_files(struct nlm_host *host, int action)
238{ 238{
239 struct nlm_file *file, **fp; 239 struct nlm_file *file, **fp;
240 int i; 240 int i, ret = 0;
241 241
242 mutex_lock(&nlm_file_mutex); 242 mutex_lock(&nlm_file_mutex);
243 for (i = 0; i < FILE_NRHASH; i++) { 243 for (i = 0; i < FILE_NRHASH; i++) {
244 fp = nlm_files + i; 244 fp = nlm_files + i;
245 while ((file = *fp) != NULL) { 245 while ((file = *fp) != NULL) {
246 file->f_count++;
247 mutex_unlock(&nlm_file_mutex);
248
246 /* Traverse locks, blocks and shares of this file 249 /* Traverse locks, blocks and shares of this file
247 * and update file->f_locks count */ 250 * and update file->f_locks count */
248 if (nlm_inspect_file(host, file, action)) { 251 if (nlm_inspect_file(host, file, action))
249 mutex_unlock(&nlm_file_mutex); 252 ret = 1;
250 return 1;
251 }
252 253
254 mutex_lock(&nlm_file_mutex);
255 file->f_count--;
253 /* No more references to this file. Let go of it. */ 256 /* No more references to this file. Let go of it. */
254 if (!file->f_blocks && !file->f_locks 257 if (!file->f_blocks && !file->f_locks
255 && !file->f_shares && !file->f_count) { 258 && !file->f_shares && !file->f_count) {
@@ -262,7 +265,7 @@ nlm_traverse_files(struct nlm_host *host, int action)
262 } 265 }
263 } 266 }
264 mutex_unlock(&nlm_file_mutex); 267 mutex_unlock(&nlm_file_mutex);
265 return 0; 268 return ret;
266} 269}
267 270
268/* 271/*
diff --git a/fs/locks.c b/fs/locks.c
index 1ad29c9b6252..d7c53392cac1 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -725,6 +725,10 @@ next_task:
725/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks 725/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
726 * at the head of the list, but that's secret knowledge known only to 726 * at the head of the list, but that's secret knowledge known only to
727 * flock_lock_file and posix_lock_file. 727 * flock_lock_file and posix_lock_file.
728 *
729 * Note that if called with an FL_EXISTS argument, the caller may determine
730 * whether or not a lock was successfully freed by testing the return
731 * value for -ENOENT.
728 */ 732 */
729static int flock_lock_file(struct file *filp, struct file_lock *request) 733static int flock_lock_file(struct file *filp, struct file_lock *request)
730{ 734{
@@ -735,6 +739,8 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
735 int found = 0; 739 int found = 0;
736 740
737 lock_kernel(); 741 lock_kernel();
742 if (request->fl_flags & FL_ACCESS)
743 goto find_conflict;
738 for_each_lock(inode, before) { 744 for_each_lock(inode, before) {
739 struct file_lock *fl = *before; 745 struct file_lock *fl = *before;
740 if (IS_POSIX(fl)) 746 if (IS_POSIX(fl))
@@ -750,8 +756,11 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
750 break; 756 break;
751 } 757 }
752 758
753 if (request->fl_type == F_UNLCK) 759 if (request->fl_type == F_UNLCK) {
760 if ((request->fl_flags & FL_EXISTS) && !found)
761 error = -ENOENT;
754 goto out; 762 goto out;
763 }
755 764
756 error = -ENOMEM; 765 error = -ENOMEM;
757 new_fl = locks_alloc_lock(); 766 new_fl = locks_alloc_lock();
@@ -764,6 +773,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
764 if (found) 773 if (found)
765 cond_resched(); 774 cond_resched();
766 775
776find_conflict:
767 for_each_lock(inode, before) { 777 for_each_lock(inode, before) {
768 struct file_lock *fl = *before; 778 struct file_lock *fl = *before;
769 if (IS_POSIX(fl)) 779 if (IS_POSIX(fl))
@@ -777,6 +787,8 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
777 locks_insert_block(fl, request); 787 locks_insert_block(fl, request);
778 goto out; 788 goto out;
779 } 789 }
790 if (request->fl_flags & FL_ACCESS)
791 goto out;
780 locks_copy_lock(new_fl, request); 792 locks_copy_lock(new_fl, request);
781 locks_insert_lock(&inode->i_flock, new_fl); 793 locks_insert_lock(&inode->i_flock, new_fl);
782 new_fl = NULL; 794 new_fl = NULL;
@@ -948,8 +960,11 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request
948 960
949 error = 0; 961 error = 0;
950 if (!added) { 962 if (!added) {
951 if (request->fl_type == F_UNLCK) 963 if (request->fl_type == F_UNLCK) {
964 if (request->fl_flags & FL_EXISTS)
965 error = -ENOENT;
952 goto out; 966 goto out;
967 }
953 968
954 if (!new_fl) { 969 if (!new_fl) {
955 error = -ENOLCK; 970 error = -ENOLCK;
@@ -996,6 +1011,10 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request
996 * Add a POSIX style lock to a file. 1011 * Add a POSIX style lock to a file.
997 * We merge adjacent & overlapping locks whenever possible. 1012 * We merge adjacent & overlapping locks whenever possible.
998 * POSIX locks are sorted by owner task, then by starting address 1013 * POSIX locks are sorted by owner task, then by starting address
1014 *
1015 * Note that if called with an FL_EXISTS argument, the caller may determine
1016 * whether or not a lock was successfully freed by testing the return
1017 * value for -ENOENT.
999 */ 1018 */
1000int posix_lock_file(struct file *filp, struct file_lock *fl) 1019int posix_lock_file(struct file *filp, struct file_lock *fl)
1001{ 1020{
@@ -1402,8 +1421,9 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp)
1402 if (!leases_enable) 1421 if (!leases_enable)
1403 goto out; 1422 goto out;
1404 1423
1405 error = lease_alloc(filp, arg, &fl); 1424 error = -ENOMEM;
1406 if (error) 1425 fl = locks_alloc_lock();
1426 if (fl == NULL)
1407 goto out; 1427 goto out;
1408 1428
1409 locks_copy_lock(fl, lease); 1429 locks_copy_lock(fl, lease);
@@ -1411,6 +1431,7 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp)
1411 locks_insert_lock(before, fl); 1431 locks_insert_lock(before, fl);
1412 1432
1413 *flp = fl; 1433 *flp = fl;
1434 error = 0;
1414out: 1435out:
1415 return error; 1436 return error;
1416} 1437}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 9ea91c5eeb7b..330ff9fc7cf0 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -204,6 +204,8 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
204 /* 204 /*
205 * Allocate the buffer map to keep the superblock small. 205 * Allocate the buffer map to keep the superblock small.
206 */ 206 */
207 if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
208 goto out_illegal_sb;
207 i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh); 209 i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh);
208 map = kmalloc(i, GFP_KERNEL); 210 map = kmalloc(i, GFP_KERNEL);
209 if (!map) 211 if (!map)
@@ -263,7 +265,7 @@ out_no_root:
263 265
264out_no_bitmap: 266out_no_bitmap:
265 printk("MINIX-fs: bad superblock or unable to read bitmaps\n"); 267 printk("MINIX-fs: bad superblock or unable to read bitmaps\n");
266 out_freemap: 268out_freemap:
267 for (i = 0; i < sbi->s_imap_blocks; i++) 269 for (i = 0; i < sbi->s_imap_blocks; i++)
268 brelse(sbi->s_imap[i]); 270 brelse(sbi->s_imap[i]);
269 for (i = 0; i < sbi->s_zmap_blocks; i++) 271 for (i = 0; i < sbi->s_zmap_blocks; i++)
@@ -276,11 +278,16 @@ out_no_map:
276 printk("MINIX-fs: can't allocate map\n"); 278 printk("MINIX-fs: can't allocate map\n");
277 goto out_release; 279 goto out_release;
278 280
281out_illegal_sb:
282 if (!silent)
283 printk("MINIX-fs: bad superblock\n");
284 goto out_release;
285
279out_no_fs: 286out_no_fs:
280 if (!silent) 287 if (!silent)
281 printk("VFS: Can't find a Minix or Minix V2 filesystem " 288 printk("VFS: Can't find a Minix or Minix V2 filesystem "
282 "on device %s\n", s->s_id); 289 "on device %s\n", s->s_id);
283 out_release: 290out_release:
284 brelse(bh); 291 brelse(bh);
285 goto out; 292 goto out;
286 293
@@ -290,7 +297,7 @@ out_bad_hblock:
290 297
291out_bad_sb: 298out_bad_sb:
292 printk("MINIX-fs: unable to read superblock\n"); 299 printk("MINIX-fs: unable to read superblock\n");
293 out: 300out:
294 s->s_fs_info = NULL; 301 s->s_fs_info = NULL;
295 kfree(sbi); 302 kfree(sbi);
296 return -EINVAL; 303 return -EINVAL;
diff --git a/fs/namei.c b/fs/namei.c
index c9750d755aff..432d6bc6fab0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -159,7 +159,7 @@ char * getname(const char __user * filename)
159#ifdef CONFIG_AUDITSYSCALL 159#ifdef CONFIG_AUDITSYSCALL
160void putname(const char *name) 160void putname(const char *name)
161{ 161{
162 if (unlikely(current->audit_context)) 162 if (unlikely(!audit_dummy_context()))
163 audit_putname(name); 163 audit_putname(name);
164 else 164 else
165 __putname(name); 165 __putname(name);
@@ -227,10 +227,10 @@ int generic_permission(struct inode *inode, int mask,
227 227
228int permission(struct inode *inode, int mask, struct nameidata *nd) 228int permission(struct inode *inode, int mask, struct nameidata *nd)
229{ 229{
230 umode_t mode = inode->i_mode;
230 int retval, submask; 231 int retval, submask;
231 232
232 if (mask & MAY_WRITE) { 233 if (mask & MAY_WRITE) {
233 umode_t mode = inode->i_mode;
234 234
235 /* 235 /*
236 * Nobody gets write access to a read-only fs. 236 * Nobody gets write access to a read-only fs.
@@ -247,6 +247,13 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
247 } 247 }
248 248
249 249
250 /*
251 * MAY_EXEC on regular files requires special handling: We override
252 * filesystem execute permissions if the mode bits aren't set.
253 */
254 if ((mask & MAY_EXEC) && S_ISREG(mode) && !(mode & S_IXUGO))
255 return -EACCES;
256
250 /* Ordinary permission routines do not understand MAY_APPEND. */ 257 /* Ordinary permission routines do not understand MAY_APPEND. */
251 submask = mask & ~MAY_APPEND; 258 submask = mask & ~MAY_APPEND;
252 if (inode->i_op && inode->i_op->permission) 259 if (inode->i_op && inode->i_op->permission)
@@ -1125,7 +1132,7 @@ static int fastcall do_path_lookup(int dfd, const char *name,
1125 retval = link_path_walk(name, nd); 1132 retval = link_path_walk(name, nd);
1126out: 1133out:
1127 if (likely(retval == 0)) { 1134 if (likely(retval == 0)) {
1128 if (unlikely(current->audit_context && nd && nd->dentry && 1135 if (unlikely(!audit_dummy_context() && nd && nd->dentry &&
1129 nd->dentry->d_inode)) 1136 nd->dentry->d_inode))
1130 audit_inode(name, nd->dentry->d_inode); 1137 audit_inode(name, nd->dentry->d_inode);
1131 } 1138 }
@@ -1357,7 +1364,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1357 return -ENOENT; 1364 return -ENOENT;
1358 1365
1359 BUG_ON(victim->d_parent->d_inode != dir); 1366 BUG_ON(victim->d_parent->d_inode != dir);
1360 audit_inode_child(victim->d_name.name, victim->d_inode, dir->i_ino); 1367 audit_inode_child(victim->d_name.name, victim->d_inode, dir);
1361 1368
1362 error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); 1369 error = permission(dir,MAY_WRITE | MAY_EXEC, NULL);
1363 if (error) 1370 if (error)
@@ -1659,6 +1666,7 @@ do_last:
1659 * It already exists. 1666 * It already exists.
1660 */ 1667 */
1661 mutex_unlock(&dir->d_inode->i_mutex); 1668 mutex_unlock(&dir->d_inode->i_mutex);
1669 audit_inode_update(path.dentry->d_inode);
1662 1670
1663 error = -EEXIST; 1671 error = -EEXIST;
1664 if (flag & O_EXCL) 1672 if (flag & O_EXCL)
@@ -1669,6 +1677,7 @@ do_last:
1669 if (flag & O_NOFOLLOW) 1677 if (flag & O_NOFOLLOW)
1670 goto exit_dput; 1678 goto exit_dput;
1671 } 1679 }
1680
1672 error = -ENOENT; 1681 error = -ENOENT;
1673 if (!path.dentry->d_inode) 1682 if (!path.dentry->d_inode)
1674 goto exit_dput; 1683 goto exit_dput;
@@ -1712,8 +1721,14 @@ do_link:
1712 if (error) 1721 if (error)
1713 goto exit_dput; 1722 goto exit_dput;
1714 error = __do_follow_link(&path, nd); 1723 error = __do_follow_link(&path, nd);
1715 if (error) 1724 if (error) {
1725 /* Does someone understand code flow here? Or it is only
1726 * me so stupid? Anathema to whoever designed this non-sense
1727 * with "intent.open".
1728 */
1729 release_open_intent(nd);
1716 return error; 1730 return error;
1731 }
1717 nd->flags &= ~LOOKUP_PARENT; 1732 nd->flags &= ~LOOKUP_PARENT;
1718 if (nd->last_type == LAST_BIND) 1733 if (nd->last_type == LAST_BIND)
1719 goto ok; 1734 goto ok;
@@ -1759,6 +1774,8 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1759 if (nd->last_type != LAST_NORM) 1774 if (nd->last_type != LAST_NORM)
1760 goto fail; 1775 goto fail;
1761 nd->flags &= ~LOOKUP_PARENT; 1776 nd->flags &= ~LOOKUP_PARENT;
1777 nd->flags |= LOOKUP_CREATE;
1778 nd->intent.open.flags = O_EXCL;
1762 1779
1763 /* 1780 /*
1764 * Do the final lookup. 1781 * Do the final lookup.
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3ddda6f7ecc2..e7ffb4deb3e5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -690,7 +690,9 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
690 goto out_force; 690 goto out_force;
691 /* This is an open(2) */ 691 /* This is an open(2) */
692 if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 && 692 if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 &&
693 !(server->flags & NFS_MOUNT_NOCTO)) 693 !(server->flags & NFS_MOUNT_NOCTO) &&
694 (S_ISREG(inode->i_mode) ||
695 S_ISDIR(inode->i_mode)))
694 goto out_force; 696 goto out_force;
695 } 697 }
696 return nfs_revalidate_inode(server, inode); 698 return nfs_revalidate_inode(server, inode);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4cdd1b499e35..fecd3b095deb 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -67,25 +67,19 @@ struct nfs_direct_req {
67 struct kref kref; /* release manager */ 67 struct kref kref; /* release manager */
68 68
69 /* I/O parameters */ 69 /* I/O parameters */
70 struct list_head list, /* nfs_read/write_data structs */
71 rewrite_list; /* saved nfs_write_data structs */
72 struct nfs_open_context *ctx; /* file open context info */ 70 struct nfs_open_context *ctx; /* file open context info */
73 struct kiocb * iocb; /* controlling i/o request */ 71 struct kiocb * iocb; /* controlling i/o request */
74 struct inode * inode; /* target file of i/o */ 72 struct inode * inode; /* target file of i/o */
75 unsigned long user_addr; /* location of user's buffer */
76 size_t user_count; /* total bytes to move */
77 loff_t pos; /* starting offset in file */
78 struct page ** pages; /* pages in our buffer */
79 unsigned int npages; /* count of pages */
80 73
81 /* completion state */ 74 /* completion state */
75 atomic_t io_count; /* i/os we're waiting for */
82 spinlock_t lock; /* protect completion state */ 76 spinlock_t lock; /* protect completion state */
83 int outstanding; /* i/os we're waiting for */
84 ssize_t count, /* bytes actually processed */ 77 ssize_t count, /* bytes actually processed */
85 error; /* any reported error */ 78 error; /* any reported error */
86 struct completion completion; /* wait for i/o completion */ 79 struct completion completion; /* wait for i/o completion */
87 80
88 /* commit state */ 81 /* commit state */
82 struct list_head rewrite_list; /* saved nfs_write_data structs */
89 struct nfs_write_data * commit_data; /* special write_data for commits */ 83 struct nfs_write_data * commit_data; /* special write_data for commits */
90 int flags; 84 int flags;
91#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ 85#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
@@ -93,8 +87,37 @@ struct nfs_direct_req {
93 struct nfs_writeverf verf; /* unstable write verifier */ 87 struct nfs_writeverf verf; /* unstable write verifier */
94}; 88};
95 89
96static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync);
97static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); 90static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
91static const struct rpc_call_ops nfs_write_direct_ops;
92
93static inline void get_dreq(struct nfs_direct_req *dreq)
94{
95 atomic_inc(&dreq->io_count);
96}
97
98static inline int put_dreq(struct nfs_direct_req *dreq)
99{
100 return atomic_dec_and_test(&dreq->io_count);
101}
102
103/*
104 * "size" is never larger than rsize or wsize.
105 */
106static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size)
107{
108 int page_count;
109
110 page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
111 page_count -= user_addr >> PAGE_SHIFT;
112 BUG_ON(page_count < 0);
113
114 return page_count;
115}
116
117static inline unsigned int nfs_max_pages(unsigned int size)
118{
119 return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
120}
98 121
99/** 122/**
100 * nfs_direct_IO - NFS address space operation for direct I/O 123 * nfs_direct_IO - NFS address space operation for direct I/O
@@ -118,50 +141,21 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
118 return -EINVAL; 141 return -EINVAL;
119} 142}
120 143
121static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty) 144static void nfs_direct_dirty_pages(struct page **pages, int npages)
122{ 145{
123 int i; 146 int i;
124 for (i = 0; i < npages; i++) { 147 for (i = 0; i < npages; i++) {
125 struct page *page = pages[i]; 148 struct page *page = pages[i];
126 if (do_dirty && !PageCompound(page)) 149 if (!PageCompound(page))
127 set_page_dirty_lock(page); 150 set_page_dirty_lock(page);
128 page_cache_release(page);
129 } 151 }
130 kfree(pages);
131} 152}
132 153
133static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages) 154static void nfs_direct_release_pages(struct page **pages, int npages)
134{ 155{
135 int result = -ENOMEM; 156 int i;
136 unsigned long page_count; 157 for (i = 0; i < npages; i++)
137 size_t array_size; 158 page_cache_release(pages[i]);
138
139 page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
140 page_count -= user_addr >> PAGE_SHIFT;
141
142 array_size = (page_count * sizeof(struct page *));
143 *pages = kmalloc(array_size, GFP_KERNEL);
144 if (*pages) {
145 down_read(&current->mm->mmap_sem);
146 result = get_user_pages(current, current->mm, user_addr,
147 page_count, (rw == READ), 0,
148 *pages, NULL);
149 up_read(&current->mm->mmap_sem);
150 if (result != page_count) {
151 /*
152 * If we got fewer pages than expected from
153 * get_user_pages(), the user buffer runs off the
154 * end of a mapping; return EFAULT.
155 */
156 if (result >= 0) {
157 nfs_free_user_pages(*pages, result, 0);
158 result = -EFAULT;
159 } else
160 kfree(*pages);
161 *pages = NULL;
162 }
163 }
164 return result;
165} 159}
166 160
167static inline struct nfs_direct_req *nfs_direct_req_alloc(void) 161static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
@@ -173,13 +167,13 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
173 return NULL; 167 return NULL;
174 168
175 kref_init(&dreq->kref); 169 kref_init(&dreq->kref);
170 kref_get(&dreq->kref);
176 init_completion(&dreq->completion); 171 init_completion(&dreq->completion);
177 INIT_LIST_HEAD(&dreq->list);
178 INIT_LIST_HEAD(&dreq->rewrite_list); 172 INIT_LIST_HEAD(&dreq->rewrite_list);
179 dreq->iocb = NULL; 173 dreq->iocb = NULL;
180 dreq->ctx = NULL; 174 dreq->ctx = NULL;
181 spin_lock_init(&dreq->lock); 175 spin_lock_init(&dreq->lock);
182 dreq->outstanding = 0; 176 atomic_set(&dreq->io_count, 0);
183 dreq->count = 0; 177 dreq->count = 0;
184 dreq->error = 0; 178 dreq->error = 0;
185 dreq->flags = 0; 179 dreq->flags = 0;
@@ -220,18 +214,11 @@ out:
220} 214}
221 215
222/* 216/*
223 * We must hold a reference to all the pages in this direct read request 217 * Synchronous I/O uses a stack-allocated iocb. Thus we can't trust
224 * until the RPCs complete. This could be long *after* we are woken up in 218 * the iocb is still valid here if this is a synchronous request.
225 * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
226 *
227 * In addition, synchronous I/O uses a stack-allocated iocb. Thus we
228 * can't trust the iocb is still valid here if this is a synchronous
229 * request. If the waiter is woken prematurely, the iocb is long gone.
230 */ 219 */
231static void nfs_direct_complete(struct nfs_direct_req *dreq) 220static void nfs_direct_complete(struct nfs_direct_req *dreq)
232{ 221{
233 nfs_free_user_pages(dreq->pages, dreq->npages, 1);
234
235 if (dreq->iocb) { 222 if (dreq->iocb) {
236 long res = (long) dreq->error; 223 long res = (long) dreq->error;
237 if (!res) 224 if (!res)
@@ -244,48 +231,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
244} 231}
245 232
246/* 233/*
247 * Note we also set the number of requests we have in the dreq when we are 234 * We must hold a reference to all the pages in this direct read request
248 * done. This prevents races with I/O completion so we will always wait 235 * until the RPCs complete. This could be long *after* we are woken up in
249 * until all requests have been dispatched and completed. 236 * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
250 */ 237 */
251static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
252{
253 struct list_head *list;
254 struct nfs_direct_req *dreq;
255 unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
256
257 dreq = nfs_direct_req_alloc();
258 if (!dreq)
259 return NULL;
260
261 list = &dreq->list;
262 for(;;) {
263 struct nfs_read_data *data = nfs_readdata_alloc(rpages);
264
265 if (unlikely(!data)) {
266 while (!list_empty(list)) {
267 data = list_entry(list->next,
268 struct nfs_read_data, pages);
269 list_del(&data->pages);
270 nfs_readdata_free(data);
271 }
272 kref_put(&dreq->kref, nfs_direct_req_release);
273 return NULL;
274 }
275
276 INIT_LIST_HEAD(&data->pages);
277 list_add(&data->pages, list);
278
279 data->req = (struct nfs_page *) dreq;
280 dreq->outstanding++;
281 if (nbytes <= rsize)
282 break;
283 nbytes -= rsize;
284 }
285 kref_get(&dreq->kref);
286 return dreq;
287}
288
289static void nfs_direct_read_result(struct rpc_task *task, void *calldata) 238static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
290{ 239{
291 struct nfs_read_data *data = calldata; 240 struct nfs_read_data *data = calldata;
@@ -294,6 +243,9 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
294 if (nfs_readpage_result(task, data) != 0) 243 if (nfs_readpage_result(task, data) != 0)
295 return; 244 return;
296 245
246 nfs_direct_dirty_pages(data->pagevec, data->npages);
247 nfs_direct_release_pages(data->pagevec, data->npages);
248
297 spin_lock(&dreq->lock); 249 spin_lock(&dreq->lock);
298 250
299 if (likely(task->tk_status >= 0)) 251 if (likely(task->tk_status >= 0))
@@ -301,13 +253,10 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
301 else 253 else
302 dreq->error = task->tk_status; 254 dreq->error = task->tk_status;
303 255
304 if (--dreq->outstanding) {
305 spin_unlock(&dreq->lock);
306 return;
307 }
308
309 spin_unlock(&dreq->lock); 256 spin_unlock(&dreq->lock);
310 nfs_direct_complete(dreq); 257
258 if (put_dreq(dreq))
259 nfs_direct_complete(dreq);
311} 260}
312 261
313static const struct rpc_call_ops nfs_read_direct_ops = { 262static const struct rpc_call_ops nfs_read_direct_ops = {
@@ -316,41 +265,60 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
316}; 265};
317 266
318/* 267/*
319 * For each nfs_read_data struct that was allocated on the list, dispatch 268 * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
320 * an NFS READ operation 269 * operation. If nfs_readdata_alloc() or get_user_pages() fails,
270 * bail and stop sending more reads. Read length accounting is
271 * handled automatically by nfs_direct_read_result(). Otherwise, if
272 * no requests have been sent, just return an error.
321 */ 273 */
322static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) 274static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
323{ 275{
324 struct nfs_open_context *ctx = dreq->ctx; 276 struct nfs_open_context *ctx = dreq->ctx;
325 struct inode *inode = ctx->dentry->d_inode; 277 struct inode *inode = ctx->dentry->d_inode;
326 struct list_head *list = &dreq->list;
327 struct page **pages = dreq->pages;
328 size_t count = dreq->user_count;
329 loff_t pos = dreq->pos;
330 size_t rsize = NFS_SERVER(inode)->rsize; 278 size_t rsize = NFS_SERVER(inode)->rsize;
331 unsigned int curpage, pgbase; 279 unsigned int rpages = nfs_max_pages(rsize);
280 unsigned int pgbase;
281 int result;
282 ssize_t started = 0;
283
284 get_dreq(dreq);
332 285
333 curpage = 0; 286 pgbase = user_addr & ~PAGE_MASK;
334 pgbase = dreq->user_addr & ~PAGE_MASK;
335 do { 287 do {
336 struct nfs_read_data *data; 288 struct nfs_read_data *data;
337 size_t bytes; 289 size_t bytes;
338 290
291 result = -ENOMEM;
292 data = nfs_readdata_alloc(rpages);
293 if (unlikely(!data))
294 break;
295
339 bytes = rsize; 296 bytes = rsize;
340 if (count < rsize) 297 if (count < rsize)
341 bytes = count; 298 bytes = count;
342 299
343 BUG_ON(list_empty(list)); 300 data->npages = nfs_direct_count_pages(user_addr, bytes);
344 data = list_entry(list->next, struct nfs_read_data, pages); 301 down_read(&current->mm->mmap_sem);
345 list_del_init(&data->pages); 302 result = get_user_pages(current, current->mm, user_addr,
303 data->npages, 1, 0, data->pagevec, NULL);
304 up_read(&current->mm->mmap_sem);
305 if (unlikely(result < data->npages)) {
306 if (result > 0)
307 nfs_direct_release_pages(data->pagevec, result);
308 nfs_readdata_release(data);
309 break;
310 }
311
312 get_dreq(dreq);
346 313
314 data->req = (struct nfs_page *) dreq;
347 data->inode = inode; 315 data->inode = inode;
348 data->cred = ctx->cred; 316 data->cred = ctx->cred;
349 data->args.fh = NFS_FH(inode); 317 data->args.fh = NFS_FH(inode);
350 data->args.context = ctx; 318 data->args.context = ctx;
351 data->args.offset = pos; 319 data->args.offset = pos;
352 data->args.pgbase = pgbase; 320 data->args.pgbase = pgbase;
353 data->args.pages = &pages[curpage]; 321 data->args.pages = data->pagevec;
354 data->args.count = bytes; 322 data->args.count = bytes;
355 data->res.fattr = &data->fattr; 323 data->res.fattr = &data->fattr;
356 data->res.eof = 0; 324 data->res.eof = 0;
@@ -373,33 +341,35 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
373 bytes, 341 bytes,
374 (unsigned long long)data->args.offset); 342 (unsigned long long)data->args.offset);
375 343
344 started += bytes;
345 user_addr += bytes;
376 pos += bytes; 346 pos += bytes;
377 pgbase += bytes; 347 pgbase += bytes;
378 curpage += pgbase >> PAGE_SHIFT;
379 pgbase &= ~PAGE_MASK; 348 pgbase &= ~PAGE_MASK;
380 349
381 count -= bytes; 350 count -= bytes;
382 } while (count != 0); 351 } while (count != 0);
383 BUG_ON(!list_empty(list)); 352
353 if (put_dreq(dreq))
354 nfs_direct_complete(dreq);
355
356 if (started)
357 return 0;
358 return result < 0 ? (ssize_t) result : -EFAULT;
384} 359}
385 360
386static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages) 361static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos)
387{ 362{
388 ssize_t result; 363 ssize_t result = 0;
389 sigset_t oldset; 364 sigset_t oldset;
390 struct inode *inode = iocb->ki_filp->f_mapping->host; 365 struct inode *inode = iocb->ki_filp->f_mapping->host;
391 struct rpc_clnt *clnt = NFS_CLIENT(inode); 366 struct rpc_clnt *clnt = NFS_CLIENT(inode);
392 struct nfs_direct_req *dreq; 367 struct nfs_direct_req *dreq;
393 368
394 dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize); 369 dreq = nfs_direct_req_alloc();
395 if (!dreq) 370 if (!dreq)
396 return -ENOMEM; 371 return -ENOMEM;
397 372
398 dreq->user_addr = user_addr;
399 dreq->user_count = count;
400 dreq->pos = pos;
401 dreq->pages = pages;
402 dreq->npages = nr_pages;
403 dreq->inode = inode; 373 dreq->inode = inode;
404 dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); 374 dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
405 if (!is_sync_kiocb(iocb)) 375 if (!is_sync_kiocb(iocb))
@@ -407,8 +377,9 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
407 377
408 nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); 378 nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
409 rpc_clnt_sigmask(clnt, &oldset); 379 rpc_clnt_sigmask(clnt, &oldset);
410 nfs_direct_read_schedule(dreq); 380 result = nfs_direct_read_schedule(dreq, user_addr, count, pos);
411 result = nfs_direct_wait(dreq); 381 if (!result)
382 result = nfs_direct_wait(dreq);
412 rpc_clnt_sigunmask(clnt, &oldset); 383 rpc_clnt_sigunmask(clnt, &oldset);
413 384
414 return result; 385 return result;
@@ -416,10 +387,10 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
416 387
417static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) 388static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
418{ 389{
419 list_splice_init(&dreq->rewrite_list, &dreq->list); 390 while (!list_empty(&dreq->rewrite_list)) {
420 while (!list_empty(&dreq->list)) { 391 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
421 struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages);
422 list_del(&data->pages); 392 list_del(&data->pages);
393 nfs_direct_release_pages(data->pagevec, data->npages);
423 nfs_writedata_release(data); 394 nfs_writedata_release(data);
424 } 395 }
425} 396}
@@ -427,14 +398,51 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
427#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 398#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
428static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) 399static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
429{ 400{
430 struct list_head *pos; 401 struct inode *inode = dreq->inode;
402 struct list_head *p;
403 struct nfs_write_data *data;
431 404
432 list_splice_init(&dreq->rewrite_list, &dreq->list);
433 list_for_each(pos, &dreq->list)
434 dreq->outstanding++;
435 dreq->count = 0; 405 dreq->count = 0;
406 get_dreq(dreq);
407
408 list_for_each(p, &dreq->rewrite_list) {
409 data = list_entry(p, struct nfs_write_data, pages);
410
411 get_dreq(dreq);
412
413 /*
414 * Reset data->res.
415 */
416 nfs_fattr_init(&data->fattr);
417 data->res.count = data->args.count;
418 memset(&data->verf, 0, sizeof(data->verf));
419
420 /*
421 * Reuse data->task; data->args should not have changed
422 * since the original request was sent.
423 */
424 rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
425 &nfs_write_direct_ops, data);
426 NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE);
427
428 data->task.tk_priority = RPC_PRIORITY_NORMAL;
429 data->task.tk_cookie = (unsigned long) inode;
430
431 /*
432 * We're called via an RPC callback, so BKL is already held.
433 */
434 rpc_execute(&data->task);
435
436 dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
437 data->task.tk_pid,
438 inode->i_sb->s_id,
439 (long long)NFS_FILEID(inode),
440 data->args.count,
441 (unsigned long long)data->args.offset);
442 }
436 443
437 nfs_direct_write_schedule(dreq, FLUSH_STABLE); 444 if (put_dreq(dreq))
445 nfs_direct_write_complete(dreq, inode);
438} 446}
439 447
440static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) 448static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
@@ -471,8 +479,8 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
471 data->cred = dreq->ctx->cred; 479 data->cred = dreq->ctx->cred;
472 480
473 data->args.fh = NFS_FH(data->inode); 481 data->args.fh = NFS_FH(data->inode);
474 data->args.offset = dreq->pos; 482 data->args.offset = 0;
475 data->args.count = dreq->user_count; 483 data->args.count = 0;
476 data->res.count = 0; 484 data->res.count = 0;
477 data->res.fattr = &data->fattr; 485 data->res.fattr = &data->fattr;
478 data->res.verf = &data->verf; 486 data->res.verf = &data->verf;
@@ -534,47 +542,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
534} 542}
535#endif 543#endif
536 544
537static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
538{
539 struct list_head *list;
540 struct nfs_direct_req *dreq;
541 unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
542
543 dreq = nfs_direct_req_alloc();
544 if (!dreq)
545 return NULL;
546
547 list = &dreq->list;
548 for(;;) {
549 struct nfs_write_data *data = nfs_writedata_alloc(wpages);
550
551 if (unlikely(!data)) {
552 while (!list_empty(list)) {
553 data = list_entry(list->next,
554 struct nfs_write_data, pages);
555 list_del(&data->pages);
556 nfs_writedata_free(data);
557 }
558 kref_put(&dreq->kref, nfs_direct_req_release);
559 return NULL;
560 }
561
562 INIT_LIST_HEAD(&data->pages);
563 list_add(&data->pages, list);
564
565 data->req = (struct nfs_page *) dreq;
566 dreq->outstanding++;
567 if (nbytes <= wsize)
568 break;
569 nbytes -= wsize;
570 }
571
572 nfs_alloc_commit_data(dreq);
573
574 kref_get(&dreq->kref);
575 return dreq;
576}
577
578static void nfs_direct_write_result(struct rpc_task *task, void *calldata) 545static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
579{ 546{
580 struct nfs_write_data *data = calldata; 547 struct nfs_write_data *data = calldata;
@@ -604,8 +571,6 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
604 } 571 }
605 } 572 }
606 } 573 }
607 /* In case we have to resend */
608 data->args.stable = NFS_FILE_SYNC;
609 574
610 spin_unlock(&dreq->lock); 575 spin_unlock(&dreq->lock);
611} 576}
@@ -619,14 +584,8 @@ static void nfs_direct_write_release(void *calldata)
619 struct nfs_write_data *data = calldata; 584 struct nfs_write_data *data = calldata;
620 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; 585 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
621 586
622 spin_lock(&dreq->lock); 587 if (put_dreq(dreq))
623 if (--dreq->outstanding) { 588 nfs_direct_write_complete(dreq, data->inode);
624 spin_unlock(&dreq->lock);
625 return;
626 }
627 spin_unlock(&dreq->lock);
628
629 nfs_direct_write_complete(dreq, data->inode);
630} 589}
631 590
632static const struct rpc_call_ops nfs_write_direct_ops = { 591static const struct rpc_call_ops nfs_write_direct_ops = {
@@ -635,41 +594,62 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
635}; 594};
636 595
637/* 596/*
638 * For each nfs_write_data struct that was allocated on the list, dispatch 597 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
639 * an NFS WRITE operation 598 * operation. If nfs_writedata_alloc() or get_user_pages() fails,
599 * bail and stop sending more writes. Write length accounting is
600 * handled automatically by nfs_direct_write_result(). Otherwise, if
601 * no requests have been sent, just return an error.
640 */ 602 */
641static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) 603static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
642{ 604{
643 struct nfs_open_context *ctx = dreq->ctx; 605 struct nfs_open_context *ctx = dreq->ctx;
644 struct inode *inode = ctx->dentry->d_inode; 606 struct inode *inode = ctx->dentry->d_inode;
645 struct list_head *list = &dreq->list;
646 struct page **pages = dreq->pages;
647 size_t count = dreq->user_count;
648 loff_t pos = dreq->pos;
649 size_t wsize = NFS_SERVER(inode)->wsize; 607 size_t wsize = NFS_SERVER(inode)->wsize;
650 unsigned int curpage, pgbase; 608 unsigned int wpages = nfs_max_pages(wsize);
609 unsigned int pgbase;
610 int result;
611 ssize_t started = 0;
651 612
652 curpage = 0; 613 get_dreq(dreq);
653 pgbase = dreq->user_addr & ~PAGE_MASK; 614
615 pgbase = user_addr & ~PAGE_MASK;
654 do { 616 do {
655 struct nfs_write_data *data; 617 struct nfs_write_data *data;
656 size_t bytes; 618 size_t bytes;
657 619
620 result = -ENOMEM;
621 data = nfs_writedata_alloc(wpages);
622 if (unlikely(!data))
623 break;
624
658 bytes = wsize; 625 bytes = wsize;
659 if (count < wsize) 626 if (count < wsize)
660 bytes = count; 627 bytes = count;
661 628
662 BUG_ON(list_empty(list)); 629 data->npages = nfs_direct_count_pages(user_addr, bytes);
663 data = list_entry(list->next, struct nfs_write_data, pages); 630 down_read(&current->mm->mmap_sem);
631 result = get_user_pages(current, current->mm, user_addr,
632 data->npages, 0, 0, data->pagevec, NULL);
633 up_read(&current->mm->mmap_sem);
634 if (unlikely(result < data->npages)) {
635 if (result > 0)
636 nfs_direct_release_pages(data->pagevec, result);
637 nfs_writedata_release(data);
638 break;
639 }
640
641 get_dreq(dreq);
642
664 list_move_tail(&data->pages, &dreq->rewrite_list); 643 list_move_tail(&data->pages, &dreq->rewrite_list);
665 644
645 data->req = (struct nfs_page *) dreq;
666 data->inode = inode; 646 data->inode = inode;
667 data->cred = ctx->cred; 647 data->cred = ctx->cred;
668 data->args.fh = NFS_FH(inode); 648 data->args.fh = NFS_FH(inode);
669 data->args.context = ctx; 649 data->args.context = ctx;
670 data->args.offset = pos; 650 data->args.offset = pos;
671 data->args.pgbase = pgbase; 651 data->args.pgbase = pgbase;
672 data->args.pages = &pages[curpage]; 652 data->args.pages = data->pagevec;
673 data->args.count = bytes; 653 data->args.count = bytes;
674 data->res.fattr = &data->fattr; 654 data->res.fattr = &data->fattr;
675 data->res.count = bytes; 655 data->res.count = bytes;
@@ -693,19 +673,26 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
693 bytes, 673 bytes,
694 (unsigned long long)data->args.offset); 674 (unsigned long long)data->args.offset);
695 675
676 started += bytes;
677 user_addr += bytes;
696 pos += bytes; 678 pos += bytes;
697 pgbase += bytes; 679 pgbase += bytes;
698 curpage += pgbase >> PAGE_SHIFT;
699 pgbase &= ~PAGE_MASK; 680 pgbase &= ~PAGE_MASK;
700 681
701 count -= bytes; 682 count -= bytes;
702 } while (count != 0); 683 } while (count != 0);
703 BUG_ON(!list_empty(list)); 684
685 if (put_dreq(dreq))
686 nfs_direct_write_complete(dreq, inode);
687
688 if (started)
689 return 0;
690 return result < 0 ? (ssize_t) result : -EFAULT;
704} 691}
705 692
706static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages) 693static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos)
707{ 694{
708 ssize_t result; 695 ssize_t result = 0;
709 sigset_t oldset; 696 sigset_t oldset;
710 struct inode *inode = iocb->ki_filp->f_mapping->host; 697 struct inode *inode = iocb->ki_filp->f_mapping->host;
711 struct rpc_clnt *clnt = NFS_CLIENT(inode); 698 struct rpc_clnt *clnt = NFS_CLIENT(inode);
@@ -713,17 +700,14 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
713 size_t wsize = NFS_SERVER(inode)->wsize; 700 size_t wsize = NFS_SERVER(inode)->wsize;
714 int sync = 0; 701 int sync = 0;
715 702
716 dreq = nfs_direct_write_alloc(count, wsize); 703 dreq = nfs_direct_req_alloc();
717 if (!dreq) 704 if (!dreq)
718 return -ENOMEM; 705 return -ENOMEM;
706 nfs_alloc_commit_data(dreq);
707
719 if (dreq->commit_data == NULL || count < wsize) 708 if (dreq->commit_data == NULL || count < wsize)
720 sync = FLUSH_STABLE; 709 sync = FLUSH_STABLE;
721 710
722 dreq->user_addr = user_addr;
723 dreq->user_count = count;
724 dreq->pos = pos;
725 dreq->pages = pages;
726 dreq->npages = nr_pages;
727 dreq->inode = inode; 711 dreq->inode = inode;
728 dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); 712 dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
729 if (!is_sync_kiocb(iocb)) 713 if (!is_sync_kiocb(iocb))
@@ -734,8 +718,9 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
734 nfs_begin_data_update(inode); 718 nfs_begin_data_update(inode);
735 719
736 rpc_clnt_sigmask(clnt, &oldset); 720 rpc_clnt_sigmask(clnt, &oldset);
737 nfs_direct_write_schedule(dreq, sync); 721 result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync);
738 result = nfs_direct_wait(dreq); 722 if (!result)
723 result = nfs_direct_wait(dreq);
739 rpc_clnt_sigunmask(clnt, &oldset); 724 rpc_clnt_sigunmask(clnt, &oldset);
740 725
741 return result; 726 return result;
@@ -765,8 +750,6 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
765ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) 750ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
766{ 751{
767 ssize_t retval = -EINVAL; 752 ssize_t retval = -EINVAL;
768 int page_count;
769 struct page **pages;
770 struct file *file = iocb->ki_filp; 753 struct file *file = iocb->ki_filp;
771 struct address_space *mapping = file->f_mapping; 754 struct address_space *mapping = file->f_mapping;
772 755
@@ -788,14 +771,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count,
788 if (retval) 771 if (retval)
789 goto out; 772 goto out;
790 773
791 retval = nfs_get_user_pages(READ, (unsigned long) buf, 774 retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos);
792 count, &pages);
793 if (retval < 0)
794 goto out;
795 page_count = retval;
796
797 retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos,
798 pages, page_count);
799 if (retval > 0) 775 if (retval > 0)
800 iocb->ki_pos = pos + retval; 776 iocb->ki_pos = pos + retval;
801 777
@@ -831,8 +807,6 @@ out:
831ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) 807ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
832{ 808{
833 ssize_t retval; 809 ssize_t retval;
834 int page_count;
835 struct page **pages;
836 struct file *file = iocb->ki_filp; 810 struct file *file = iocb->ki_filp;
837 struct address_space *mapping = file->f_mapping; 811 struct address_space *mapping = file->f_mapping;
838 812
@@ -860,14 +834,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t
860 if (retval) 834 if (retval)
861 goto out; 835 goto out;
862 836
863 retval = nfs_get_user_pages(WRITE, (unsigned long) buf, 837 retval = nfs_direct_write(iocb, (unsigned long) buf, count, pos);
864 count, &pages);
865 if (retval < 0)
866 goto out;
867 page_count = retval;
868
869 retval = nfs_direct_write(iocb, (unsigned long) buf, count,
870 pos, pages, page_count);
871 838
872 /* 839 /*
873 * XXX: nfs_end_data_update() already ensures this file's 840 * XXX: nfs_end_data_update() already ensures this file's
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index cc2b874ad5a4..48e892880d5b 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -312,7 +312,13 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
312 312
313static int nfs_release_page(struct page *page, gfp_t gfp) 313static int nfs_release_page(struct page *page, gfp_t gfp)
314{ 314{
315 return !nfs_wb_page(page->mapping->host, page); 315 if (gfp & __GFP_FS)
316 return !nfs_wb_page(page->mapping->host, page);
317 else
318 /*
319 * Avoid deadlock on nfs_wait_on_request().
320 */
321 return 0;
316} 322}
317 323
318const struct address_space_operations nfs_file_aops = { 324const struct address_space_operations nfs_file_aops = {
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index b81e7ed3c902..07a5dd57646e 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -130,9 +130,7 @@ nfs_idmap_delete(struct nfs4_client *clp)
130 130
131 if (!idmap) 131 if (!idmap)
132 return; 132 return;
133 dput(idmap->idmap_dentry); 133 rpc_unlink(idmap->idmap_dentry);
134 idmap->idmap_dentry = NULL;
135 rpc_unlink(idmap->idmap_path);
136 clp->cl_idmap = NULL; 134 clp->cl_idmap = NULL;
137 kfree(idmap); 135 kfree(idmap);
138} 136}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 19b98ca468eb..86b3169c8cac 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -51,7 +51,7 @@ char *nfs_path(const char *base, const struct dentry *dentry,
51 namelen = dentry->d_name.len; 51 namelen = dentry->d_name.len;
52 buflen -= namelen + 1; 52 buflen -= namelen + 1;
53 if (buflen < 0) 53 if (buflen < 0)
54 goto Elong; 54 goto Elong_unlock;
55 end -= namelen; 55 end -= namelen;
56 memcpy(end, dentry->d_name.name, namelen); 56 memcpy(end, dentry->d_name.name, namelen);
57 *--end = '/'; 57 *--end = '/';
@@ -68,6 +68,8 @@ char *nfs_path(const char *base, const struct dentry *dentry,
68 end -= namelen; 68 end -= namelen;
69 memcpy(end, base, namelen); 69 memcpy(end, base, namelen);
70 return end; 70 return end;
71Elong_unlock:
72 spin_unlock(&dcache_lock);
71Elong: 73Elong:
72 return ERR_PTR(-ENAMETOOLONG); 74 return ERR_PTR(-ENAMETOOLONG);
73} 75}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b4916b092194..153898e1331f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2668,7 +2668,7 @@ out:
2668 nfs4_set_cached_acl(inode, acl); 2668 nfs4_set_cached_acl(inode, acl);
2669} 2669}
2670 2670
2671static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) 2671static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
2672{ 2672{
2673 struct page *pages[NFS4ACL_MAXPAGES]; 2673 struct page *pages[NFS4ACL_MAXPAGES];
2674 struct nfs_getaclargs args = { 2674 struct nfs_getaclargs args = {
@@ -2721,6 +2721,19 @@ out_free:
2721 return ret; 2721 return ret;
2722} 2722}
2723 2723
2724static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
2725{
2726 struct nfs4_exception exception = { };
2727 ssize_t ret;
2728 do {
2729 ret = __nfs4_get_acl_uncached(inode, buf, buflen);
2730 if (ret >= 0)
2731 break;
2732 ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception);
2733 } while (exception.retry);
2734 return ret;
2735}
2736
2724static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) 2737static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
2725{ 2738{
2726 struct nfs_server *server = NFS_SERVER(inode); 2739 struct nfs_server *server = NFS_SERVER(inode);
@@ -2737,7 +2750,7 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
2737 return nfs4_get_acl_uncached(inode, buf, buflen); 2750 return nfs4_get_acl_uncached(inode, buf, buflen);
2738} 2751}
2739 2752
2740static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) 2753static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
2741{ 2754{
2742 struct nfs_server *server = NFS_SERVER(inode); 2755 struct nfs_server *server = NFS_SERVER(inode);
2743 struct page *pages[NFS4ACL_MAXPAGES]; 2756 struct page *pages[NFS4ACL_MAXPAGES];
@@ -2763,6 +2776,18 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
2763 return ret; 2776 return ret;
2764} 2777}
2765 2778
2779static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
2780{
2781 struct nfs4_exception exception = { };
2782 int err;
2783 do {
2784 err = nfs4_handle_exception(NFS_SERVER(inode),
2785 __nfs4_proc_set_acl(inode, buf, buflen),
2786 &exception);
2787 } while (exception.retry);
2788 return err;
2789}
2790
2766static int 2791static int
2767nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) 2792nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
2768{ 2793{
@@ -3144,9 +3169,6 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
3144 default: 3169 default:
3145 BUG(); 3170 BUG();
3146 } 3171 }
3147 if (res < 0)
3148 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n",
3149 __FUNCTION__);
3150 return res; 3172 return res;
3151} 3173}
3152 3174
@@ -3258,8 +3280,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
3258 return ERR_PTR(-ENOMEM); 3280 return ERR_PTR(-ENOMEM);
3259 } 3281 }
3260 3282
3261 /* Unlock _before_ we do the RPC call */
3262 do_vfs_lock(fl->fl_file, fl);
3263 return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data); 3283 return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
3264} 3284}
3265 3285
@@ -3270,30 +3290,28 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
3270 struct rpc_task *task; 3290 struct rpc_task *task;
3271 int status = 0; 3291 int status = 0;
3272 3292
3273 /* Is this a delegated lock? */
3274 if (test_bit(NFS_DELEGATED_STATE, &state->flags))
3275 goto out_unlock;
3276 /* Is this open_owner holding any locks on the server? */
3277 if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
3278 goto out_unlock;
3279
3280 status = nfs4_set_lock_state(state, request); 3293 status = nfs4_set_lock_state(state, request);
3294 /* Unlock _before_ we do the RPC call */
3295 request->fl_flags |= FL_EXISTS;
3296 if (do_vfs_lock(request->fl_file, request) == -ENOENT)
3297 goto out;
3281 if (status != 0) 3298 if (status != 0)
3282 goto out_unlock; 3299 goto out;
3300 /* Is this a delegated lock? */
3301 if (test_bit(NFS_DELEGATED_STATE, &state->flags))
3302 goto out;
3283 lsp = request->fl_u.nfs4_fl.owner; 3303 lsp = request->fl_u.nfs4_fl.owner;
3284 status = -ENOMEM;
3285 seqid = nfs_alloc_seqid(&lsp->ls_seqid); 3304 seqid = nfs_alloc_seqid(&lsp->ls_seqid);
3305 status = -ENOMEM;
3286 if (seqid == NULL) 3306 if (seqid == NULL)
3287 goto out_unlock; 3307 goto out;
3288 task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid); 3308 task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid);
3289 status = PTR_ERR(task); 3309 status = PTR_ERR(task);
3290 if (IS_ERR(task)) 3310 if (IS_ERR(task))
3291 goto out_unlock; 3311 goto out;
3292 status = nfs4_wait_for_completion_rpc_task(task); 3312 status = nfs4_wait_for_completion_rpc_task(task);
3293 rpc_release_task(task); 3313 rpc_release_task(task);
3294 return status; 3314out:
3295out_unlock:
3296 do_vfs_lock(request->fl_file, request);
3297 return status; 3315 return status;
3298} 3316}
3299 3317
@@ -3461,10 +3479,10 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
3461 struct nfs4_exception exception = { }; 3479 struct nfs4_exception exception = { };
3462 int err; 3480 int err;
3463 3481
3464 /* Cache the lock if possible... */
3465 if (test_bit(NFS_DELEGATED_STATE, &state->flags))
3466 return 0;
3467 do { 3482 do {
3483 /* Cache the lock if possible... */
3484 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
3485 return 0;
3468 err = _nfs4_do_setlk(state, F_SETLK, request, 1); 3486 err = _nfs4_do_setlk(state, F_SETLK, request, 1);
3469 if (err != -NFS4ERR_DELAY) 3487 if (err != -NFS4ERR_DELAY)
3470 break; 3488 break;
@@ -3483,6 +3501,8 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
3483 if (err != 0) 3501 if (err != 0)
3484 return err; 3502 return err;
3485 do { 3503 do {
3504 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
3505 return 0;
3486 err = _nfs4_do_setlk(state, F_SETLK, request, 0); 3506 err = _nfs4_do_setlk(state, F_SETLK, request, 0);
3487 if (err != -NFS4ERR_DELAY) 3507 if (err != -NFS4ERR_DELAY)
3488 break; 3508 break;
@@ -3494,29 +3514,42 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
3494static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) 3514static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
3495{ 3515{
3496 struct nfs4_client *clp = state->owner->so_client; 3516 struct nfs4_client *clp = state->owner->so_client;
3517 unsigned char fl_flags = request->fl_flags;
3497 int status; 3518 int status;
3498 3519
3499 /* Is this a delegated open? */ 3520 /* Is this a delegated open? */
3500 if (NFS_I(state->inode)->delegation_state != 0) {
3501 /* Yes: cache locks! */
3502 status = do_vfs_lock(request->fl_file, request);
3503 /* ...but avoid races with delegation recall... */
3504 if (status < 0 || test_bit(NFS_DELEGATED_STATE, &state->flags))
3505 return status;
3506 }
3507 down_read(&clp->cl_sem);
3508 status = nfs4_set_lock_state(state, request); 3521 status = nfs4_set_lock_state(state, request);
3509 if (status != 0) 3522 if (status != 0)
3510 goto out; 3523 goto out;
3524 request->fl_flags |= FL_ACCESS;
3525 status = do_vfs_lock(request->fl_file, request);
3526 if (status < 0)
3527 goto out;
3528 down_read(&clp->cl_sem);
3529 if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
3530 struct nfs_inode *nfsi = NFS_I(state->inode);
3531 /* Yes: cache locks! */
3532 down_read(&nfsi->rwsem);
3533 /* ...but avoid races with delegation recall... */
3534 if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
3535 request->fl_flags = fl_flags & ~FL_SLEEP;
3536 status = do_vfs_lock(request->fl_file, request);
3537 up_read(&nfsi->rwsem);
3538 goto out_unlock;
3539 }
3540 up_read(&nfsi->rwsem);
3541 }
3511 status = _nfs4_do_setlk(state, cmd, request, 0); 3542 status = _nfs4_do_setlk(state, cmd, request, 0);
3512 if (status != 0) 3543 if (status != 0)
3513 goto out; 3544 goto out_unlock;
3514 /* Note: we always want to sleep here! */ 3545 /* Note: we always want to sleep here! */
3515 request->fl_flags |= FL_SLEEP; 3546 request->fl_flags = fl_flags | FL_SLEEP;
3516 if (do_vfs_lock(request->fl_file, request) < 0) 3547 if (do_vfs_lock(request->fl_file, request) < 0)
3517 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); 3548 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__);
3518out: 3549out_unlock:
3519 up_read(&clp->cl_sem); 3550 up_read(&clp->cl_sem);
3551out:
3552 request->fl_flags = fl_flags;
3520 return status; 3553 return status;
3521} 3554}
3522 3555
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1750d996f49f..730ec8fb31c6 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3355,7 +3355,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
3355 struct kvec *iov = rcvbuf->head; 3355 struct kvec *iov = rcvbuf->head;
3356 unsigned int nr, pglen = rcvbuf->page_len; 3356 unsigned int nr, pglen = rcvbuf->page_len;
3357 uint32_t *end, *entry, *p, *kaddr; 3357 uint32_t *end, *entry, *p, *kaddr;
3358 uint32_t len, attrlen; 3358 uint32_t len, attrlen, xlen;
3359 int hdrlen, recvd, status; 3359 int hdrlen, recvd, status;
3360 3360
3361 status = decode_op_hdr(xdr, OP_READDIR); 3361 status = decode_op_hdr(xdr, OP_READDIR);
@@ -3377,10 +3377,10 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
3377 3377
3378 BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE); 3378 BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE);
3379 kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0); 3379 kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0);
3380 end = (uint32_t *) ((char *)p + pglen + readdir->pgbase); 3380 end = p + ((pglen + readdir->pgbase) >> 2);
3381 entry = p; 3381 entry = p;
3382 for (nr = 0; *p++; nr++) { 3382 for (nr = 0; *p++; nr++) {
3383 if (p + 3 > end) 3383 if (end - p < 3)
3384 goto short_pkt; 3384 goto short_pkt;
3385 dprintk("cookie = %Lu, ", *((unsigned long long *)p)); 3385 dprintk("cookie = %Lu, ", *((unsigned long long *)p));
3386 p += 2; /* cookie */ 3386 p += 2; /* cookie */
@@ -3389,18 +3389,19 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
3389 printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len); 3389 printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
3390 goto err_unmap; 3390 goto err_unmap;
3391 } 3391 }
3392 dprintk("filename = %*s\n", len, (char *)p); 3392 xlen = XDR_QUADLEN(len);
3393 p += XDR_QUADLEN(len); 3393 if (end - p < xlen + 1)
3394 if (p + 1 > end)
3395 goto short_pkt; 3394 goto short_pkt;
3395 dprintk("filename = %*s\n", len, (char *)p);
3396 p += xlen;
3396 len = ntohl(*p++); /* bitmap length */ 3397 len = ntohl(*p++); /* bitmap length */
3397 p += len; 3398 if (end - p < len + 1)
3398 if (p + 1 > end)
3399 goto short_pkt; 3399 goto short_pkt;
3400 p += len;
3400 attrlen = XDR_QUADLEN(ntohl(*p++)); 3401 attrlen = XDR_QUADLEN(ntohl(*p++));
3401 p += attrlen; /* attributes */ 3402 if (end - p < attrlen + 2)
3402 if (p + 2 > end)
3403 goto short_pkt; 3403 goto short_pkt;
3404 p += attrlen; /* attributes */
3404 entry = p; 3405 entry = p;
3405 } 3406 }
3406 if (!nr && (entry[0] != 0 || entry[1] == 0)) 3407 if (!nr && (entry[0] != 0 || entry[1] == 0))
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 52bf634260a1..da9cf11c326f 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -63,7 +63,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
63 return p; 63 return p;
64} 64}
65 65
66void nfs_readdata_free(struct nfs_read_data *p) 66static void nfs_readdata_free(struct nfs_read_data *p)
67{ 67{
68 if (p && (p->pagevec != &p->page_array[0])) 68 if (p && (p->pagevec != &p->page_array[0]))
69 kfree(p->pagevec); 69 kfree(p->pagevec);
@@ -116,10 +116,17 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
116 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 116 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
117 base &= ~PAGE_CACHE_MASK; 117 base &= ~PAGE_CACHE_MASK;
118 pglen = PAGE_CACHE_SIZE - base; 118 pglen = PAGE_CACHE_SIZE - base;
119 if (pglen < remainder) 119 for (;;) {
120 if (remainder <= pglen) {
121 memclear_highpage_flush(*pages, base, remainder);
122 break;
123 }
120 memclear_highpage_flush(*pages, base, pglen); 124 memclear_highpage_flush(*pages, base, pglen);
121 else 125 pages++;
122 memclear_highpage_flush(*pages, base, remainder); 126 remainder -= pglen;
127 pglen = PAGE_CACHE_SIZE;
128 base = 0;
129 }
123} 130}
124 131
125/* 132/*
@@ -476,6 +483,8 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
476 unsigned int base = data->args.pgbase; 483 unsigned int base = data->args.pgbase;
477 struct page **pages; 484 struct page **pages;
478 485
486 if (data->res.eof)
487 count = data->args.count;
479 if (unlikely(count == 0)) 488 if (unlikely(count == 0))
480 return; 489 return;
481 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 490 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
@@ -483,11 +492,7 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
483 count += base; 492 count += base;
484 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) 493 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
485 SetPageUptodate(*pages); 494 SetPageUptodate(*pages);
486 /* 495 if (count != 0)
487 * Was this an eof or a short read? If the latter, don't mark the page
488 * as uptodate yet.
489 */
490 if (count > 0 && (data->res.eof || data->args.count == data->res.count))
491 SetPageUptodate(*pages); 496 SetPageUptodate(*pages);
492} 497}
493 498
@@ -502,6 +507,8 @@ static void nfs_readpage_set_pages_error(struct nfs_read_data *data)
502 count += base; 507 count += base;
503 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) 508 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
504 SetPageError(*pages); 509 SetPageError(*pages);
510 if (count != 0)
511 SetPageError(*pages);
505} 512}
506 513
507/* 514/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index bca5734ca9fb..50774991f8d5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -137,7 +137,7 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
137 return p; 137 return p;
138} 138}
139 139
140void nfs_writedata_free(struct nfs_write_data *p) 140static void nfs_writedata_free(struct nfs_write_data *p)
141{ 141{
142 if (p && (p->pagevec != &p->page_array[0])) 142 if (p && (p->pagevec != &p->page_array[0]))
143 kfree(p->pagevec); 143 kfree(p->pagevec);
@@ -578,7 +578,7 @@ static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, un
578 return ret; 578 return ret;
579} 579}
580 580
581static void nfs_cancel_requests(struct list_head *head) 581static void nfs_cancel_dirty_list(struct list_head *head)
582{ 582{
583 struct nfs_page *req; 583 struct nfs_page *req;
584 while(!list_empty(head)) { 584 while(!list_empty(head)) {
@@ -589,6 +589,19 @@ static void nfs_cancel_requests(struct list_head *head)
589 } 589 }
590} 590}
591 591
592static void nfs_cancel_commit_list(struct list_head *head)
593{
594 struct nfs_page *req;
595
596 while(!list_empty(head)) {
597 req = nfs_list_entry(head->next);
598 nfs_list_remove_request(req);
599 nfs_inode_remove_request(req);
600 nfs_clear_page_writeback(req);
601 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
602 }
603}
604
592/* 605/*
593 * nfs_scan_dirty - Scan an inode for dirty requests 606 * nfs_scan_dirty - Scan an inode for dirty requests
594 * @inode: NFS inode to scan 607 * @inode: NFS inode to scan
@@ -1381,6 +1394,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1381 nfs_list_remove_request(req); 1394 nfs_list_remove_request(req);
1382 nfs_mark_request_commit(req); 1395 nfs_mark_request_commit(req);
1383 nfs_clear_page_writeback(req); 1396 nfs_clear_page_writeback(req);
1397 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1384 } 1398 }
1385 return -ENOMEM; 1399 return -ENOMEM;
1386} 1400}
@@ -1499,7 +1513,7 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
1499 if (pages != 0) { 1513 if (pages != 0) {
1500 spin_unlock(&nfsi->req_lock); 1514 spin_unlock(&nfsi->req_lock);
1501 if (how & FLUSH_INVALIDATE) 1515 if (how & FLUSH_INVALIDATE)
1502 nfs_cancel_requests(&head); 1516 nfs_cancel_dirty_list(&head);
1503 else 1517 else
1504 ret = nfs_flush_list(inode, &head, pages, how); 1518 ret = nfs_flush_list(inode, &head, pages, how);
1505 spin_lock(&nfsi->req_lock); 1519 spin_lock(&nfsi->req_lock);
@@ -1512,7 +1526,7 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
1512 break; 1526 break;
1513 if (how & FLUSH_INVALIDATE) { 1527 if (how & FLUSH_INVALIDATE) {
1514 spin_unlock(&nfsi->req_lock); 1528 spin_unlock(&nfsi->req_lock);
1515 nfs_cancel_requests(&head); 1529 nfs_cancel_commit_list(&head);
1516 spin_lock(&nfsi->req_lock); 1530 spin_lock(&nfsi->req_lock);
1517 continue; 1531 continue;
1518 } 1532 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index b0e095ea0c03..ee4eff27aedc 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -721,6 +721,12 @@ nfsd4_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
721 return nfs_ok; 721 return nfs_ok;
722} 722}
723 723
724static inline void nfsd4_increment_op_stats(u32 opnum)
725{
726 if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP)
727 nfsdstats.nfs4_opcount[opnum]++;
728}
729
724 730
725/* 731/*
726 * COMPOUND call. 732 * COMPOUND call.
@@ -930,6 +936,8 @@ encode_op:
930 /* XXX Ugh, we need to get rid of this kind of special case: */ 936 /* XXX Ugh, we need to get rid of this kind of special case: */
931 if (op->opnum == OP_READ && op->u.read.rd_filp) 937 if (op->opnum == OP_READ && op->u.read.rd_filp)
932 fput(op->u.read.rd_filp); 938 fput(op->u.read.rd_filp);
939
940 nfsd4_increment_op_stats(op->opnum);
933 } 941 }
934 942
935out: 943out:
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index ecc439d2565f..501d83884530 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -187,6 +187,11 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
187 goto out; 187 goto out;
188 } 188 }
189 189
190 /* Set user creds for this exportpoint */
191 error = nfserrno(nfsd_setuser(rqstp, exp));
192 if (error)
193 goto out;
194
190 /* 195 /*
191 * Look up the dentry using the NFS file handle. 196 * Look up the dentry using the NFS file handle.
192 */ 197 */
@@ -241,16 +246,17 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
241 dprintk("nfsd: fh_verify - just checking\n"); 246 dprintk("nfsd: fh_verify - just checking\n");
242 dentry = fhp->fh_dentry; 247 dentry = fhp->fh_dentry;
243 exp = fhp->fh_export; 248 exp = fhp->fh_export;
249 /* Set user creds for this exportpoint; necessary even
250 * in the "just checking" case because this may be a
251 * filehandle that was created by fh_compose, and that
252 * is about to be used in another nfsv4 compound
253 * operation */
254 error = nfserrno(nfsd_setuser(rqstp, exp));
255 if (error)
256 goto out;
244 } 257 }
245 cache_get(&exp->h); 258 cache_get(&exp->h);
246 259
247 /* Set user creds for this exportpoint; necessary even in the "just
248 * checking" case because this may be a filehandle that was created by
249 * fh_compose, and that is about to be used in another nfsv4 compound
250 * operation */
251 error = nfserrno(nfsd_setuser(rqstp, exp));
252 if (error)
253 goto out;
254 260
255 error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type); 261 error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type);
256 if (error) 262 if (error)
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 57265d563804..71944cddf680 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -72,6 +72,16 @@ static int nfsd_proc_show(struct seq_file *seq, void *v)
72 /* show my rpc info */ 72 /* show my rpc info */
73 svc_seq_show(seq, &nfsd_svcstats); 73 svc_seq_show(seq, &nfsd_svcstats);
74 74
75#ifdef CONFIG_NFSD_V4
76 /* Show count for individual nfsv4 operations */
77 /* Writing operation numbers 0 1 2 also for maintaining uniformity */
78 seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1);
79 for (i = 0; i <= LAST_NFS4_OP; i++)
80 seq_printf(seq, " %u", nfsdstats.nfs4_opcount[i]);
81
82 seq_putc(seq, '\n');
83#endif
84
75 return 0; 85 return 0;
76} 86}
77 87
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 1b8346dd0572..9503240ef0e5 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2375,7 +2375,6 @@ leave:
2375 mlog(0, "returning %d\n", ret); 2375 mlog(0, "returning %d\n", ret);
2376 return ret; 2376 return ret;
2377} 2377}
2378EXPORT_SYMBOL_GPL(dlm_migrate_lockres);
2379 2378
2380int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock) 2379int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock)
2381{ 2380{
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index b0c3134f4f70..37be4b2e0d4a 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -155,7 +155,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
155 else 155 else
156 status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions); 156 status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions);
157 157
158 if (status != DLM_NORMAL) 158 if (status != DLM_NORMAL && (status != DLM_CANCELGRANT || !master_node))
159 goto leave; 159 goto leave;
160 160
161 /* By now this has been masked out of cancel requests. */ 161 /* By now this has been masked out of cancel requests. */
@@ -183,8 +183,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
183 spin_lock(&lock->spinlock); 183 spin_lock(&lock->spinlock);
184 /* if the master told us the lock was already granted, 184 /* if the master told us the lock was already granted,
185 * let the ast handle all of these actions */ 185 * let the ast handle all of these actions */
186 if (status == DLM_NORMAL && 186 if (status == DLM_CANCELGRANT) {
187 lksb->status == DLM_CANCELGRANT) {
188 actions &= ~(DLM_UNLOCK_REMOVE_LOCK| 187 actions &= ~(DLM_UNLOCK_REMOVE_LOCK|
189 DLM_UNLOCK_REGRANT_LOCK| 188 DLM_UNLOCK_REGRANT_LOCK|
190 DLM_UNLOCK_CLEAR_CONVERT_TYPE); 189 DLM_UNLOCK_CLEAR_CONVERT_TYPE);
@@ -349,14 +348,9 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
349 vec, veclen, owner, &status); 348 vec, veclen, owner, &status);
350 if (tmpret >= 0) { 349 if (tmpret >= 0) {
351 // successfully sent and received 350 // successfully sent and received
352 if (status == DLM_CANCELGRANT) 351 if (status == DLM_FORWARD)
353 ret = DLM_NORMAL;
354 else if (status == DLM_FORWARD) {
355 mlog(0, "master was in-progress. retry\n"); 352 mlog(0, "master was in-progress. retry\n");
356 ret = DLM_FORWARD; 353 ret = status;
357 } else
358 ret = status;
359 lksb->status = status;
360 } else { 354 } else {
361 mlog_errno(tmpret); 355 mlog_errno(tmpret);
362 if (dlm_is_host_down(tmpret)) { 356 if (dlm_is_host_down(tmpret)) {
@@ -372,7 +366,6 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
372 /* something bad. this will BUG in ocfs2 */ 366 /* something bad. this will BUG in ocfs2 */
373 ret = dlm_err_to_dlm_status(tmpret); 367 ret = dlm_err_to_dlm_status(tmpret);
374 } 368 }
375 lksb->status = ret;
376 } 369 }
377 370
378 return ret; 371 return ret;
@@ -483,6 +476,10 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data)
483 476
484 /* lock was found on queue */ 477 /* lock was found on queue */
485 lksb = lock->lksb; 478 lksb = lock->lksb;
479 if (flags & (LKM_VALBLK|LKM_PUT_LVB) &&
480 lock->ml.type != LKM_EXMODE)
481 flags &= ~(LKM_VALBLK|LKM_PUT_LVB);
482
486 /* unlockast only called on originating node */ 483 /* unlockast only called on originating node */
487 if (flags & LKM_PUT_LVB) { 484 if (flags & LKM_PUT_LVB) {
488 lksb->flags |= DLM_LKSB_PUT_LVB; 485 lksb->flags |= DLM_LKSB_PUT_LVB;
@@ -507,11 +504,8 @@ not_found:
507 "cookie=%u:%llu\n", 504 "cookie=%u:%llu\n",
508 dlm_get_lock_cookie_node(unlock->cookie), 505 dlm_get_lock_cookie_node(unlock->cookie),
509 dlm_get_lock_cookie_seq(unlock->cookie)); 506 dlm_get_lock_cookie_seq(unlock->cookie));
510 else { 507 else
511 /* send the lksb->status back to the other node */
512 status = lksb->status;
513 dlm_lock_put(lock); 508 dlm_lock_put(lock);
514 }
515 509
516leave: 510leave:
517 if (res) 511 if (res)
@@ -533,26 +527,22 @@ static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm,
533 527
534 if (dlm_lock_on_list(&res->blocked, lock)) { 528 if (dlm_lock_on_list(&res->blocked, lock)) {
535 /* cancel this outright */ 529 /* cancel this outright */
536 lksb->status = DLM_NORMAL;
537 status = DLM_NORMAL; 530 status = DLM_NORMAL;
538 *actions = (DLM_UNLOCK_CALL_AST | 531 *actions = (DLM_UNLOCK_CALL_AST |
539 DLM_UNLOCK_REMOVE_LOCK); 532 DLM_UNLOCK_REMOVE_LOCK);
540 } else if (dlm_lock_on_list(&res->converting, lock)) { 533 } else if (dlm_lock_on_list(&res->converting, lock)) {
541 /* cancel the request, put back on granted */ 534 /* cancel the request, put back on granted */
542 lksb->status = DLM_NORMAL;
543 status = DLM_NORMAL; 535 status = DLM_NORMAL;
544 *actions = (DLM_UNLOCK_CALL_AST | 536 *actions = (DLM_UNLOCK_CALL_AST |
545 DLM_UNLOCK_REMOVE_LOCK | 537 DLM_UNLOCK_REMOVE_LOCK |
546 DLM_UNLOCK_REGRANT_LOCK | 538 DLM_UNLOCK_REGRANT_LOCK |
547 DLM_UNLOCK_CLEAR_CONVERT_TYPE); 539 DLM_UNLOCK_CLEAR_CONVERT_TYPE);
548 } else if (dlm_lock_on_list(&res->granted, lock)) { 540 } else if (dlm_lock_on_list(&res->granted, lock)) {
549 /* too late, already granted. DLM_CANCELGRANT */ 541 /* too late, already granted. */
550 lksb->status = DLM_CANCELGRANT; 542 status = DLM_CANCELGRANT;
551 status = DLM_NORMAL;
552 *actions = DLM_UNLOCK_CALL_AST; 543 *actions = DLM_UNLOCK_CALL_AST;
553 } else { 544 } else {
554 mlog(ML_ERROR, "lock to cancel is not on any list!\n"); 545 mlog(ML_ERROR, "lock to cancel is not on any list!\n");
555 lksb->status = DLM_IVLOCKID;
556 status = DLM_IVLOCKID; 546 status = DLM_IVLOCKID;
557 *actions = 0; 547 *actions = 0;
558 } 548 }
@@ -569,13 +559,11 @@ static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm,
569 559
570 /* unlock request */ 560 /* unlock request */
571 if (!dlm_lock_on_list(&res->granted, lock)) { 561 if (!dlm_lock_on_list(&res->granted, lock)) {
572 lksb->status = DLM_DENIED;
573 status = DLM_DENIED; 562 status = DLM_DENIED;
574 dlm_error(status); 563 dlm_error(status);
575 *actions = 0; 564 *actions = 0;
576 } else { 565 } else {
577 /* unlock granted lock */ 566 /* unlock granted lock */
578 lksb->status = DLM_NORMAL;
579 status = DLM_NORMAL; 567 status = DLM_NORMAL;
580 *actions = (DLM_UNLOCK_FREE_LOCK | 568 *actions = (DLM_UNLOCK_FREE_LOCK |
581 DLM_UNLOCK_CALL_AST | 569 DLM_UNLOCK_CALL_AST |
@@ -632,6 +620,8 @@ retry:
632 620
633 spin_lock(&res->spinlock); 621 spin_lock(&res->spinlock);
634 is_master = (res->owner == dlm->node_num); 622 is_master = (res->owner == dlm->node_num);
623 if (flags & LKM_VALBLK && lock->ml.type != LKM_EXMODE)
624 flags &= ~LKM_VALBLK;
635 spin_unlock(&res->spinlock); 625 spin_unlock(&res->spinlock);
636 626
637 if (is_master) { 627 if (is_master) {
@@ -665,7 +655,7 @@ retry:
665 } 655 }
666 656
667 if (call_ast) { 657 if (call_ast) {
668 mlog(0, "calling unlockast(%p, %d)\n", data, lksb->status); 658 mlog(0, "calling unlockast(%p, %d)\n", data, status);
669 if (is_master) { 659 if (is_master) {
670 /* it is possible that there is one last bast 660 /* it is possible that there is one last bast
671 * pending. make sure it is flushed, then 661 * pending. make sure it is flushed, then
@@ -677,9 +667,12 @@ retry:
677 wait_event(dlm->ast_wq, 667 wait_event(dlm->ast_wq,
678 dlm_lock_basts_flushed(dlm, lock)); 668 dlm_lock_basts_flushed(dlm, lock));
679 } 669 }
680 (*unlockast)(data, lksb->status); 670 (*unlockast)(data, status);
681 } 671 }
682 672
673 if (status == DLM_CANCELGRANT)
674 status = DLM_NORMAL;
675
683 if (status == DLM_NORMAL) { 676 if (status == DLM_NORMAL) {
684 mlog(0, "kicking the thread\n"); 677 mlog(0, "kicking the thread\n");
685 dlm_kick_thread(dlm, res); 678 dlm_kick_thread(dlm, res);
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 0d1973ea32b0..1f17a4d08287 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -840,6 +840,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
840 840
841 mlog(0, "Allocating %u clusters for a new window.\n", 841 mlog(0, "Allocating %u clusters for a new window.\n",
842 ocfs2_local_alloc_window_bits(osb)); 842 ocfs2_local_alloc_window_bits(osb));
843
844 /* Instruct the allocation code to try the most recently used
845 * cluster group. We'll re-record the group used this pass
846 * below. */
847 ac->ac_last_group = osb->la_last_gd;
848
843 /* we used the generic suballoc reserve function, but we set 849 /* we used the generic suballoc reserve function, but we set
844 * everything up nicely, so there's no reason why we can't use 850 * everything up nicely, so there's no reason why we can't use
845 * the more specific cluster api to claim bits. */ 851 * the more specific cluster api to claim bits. */
@@ -852,6 +858,8 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
852 goto bail; 858 goto bail;
853 } 859 }
854 860
861 osb->la_last_gd = ac->ac_last_group;
862
855 la->la_bm_off = cpu_to_le32(cluster_off); 863 la->la_bm_off = cpu_to_le32(cluster_off);
856 alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); 864 alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
857 /* just in case... In the future when we find space ourselves, 865 /* just in case... In the future when we find space ourselves,
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index cd4a6f253d13..0462a7f4e21b 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -197,7 +197,6 @@ struct ocfs2_super
197 struct ocfs2_node_map recovery_map; 197 struct ocfs2_node_map recovery_map;
198 struct ocfs2_node_map umount_map; 198 struct ocfs2_node_map umount_map;
199 199
200 u32 num_clusters;
201 u64 root_blkno; 200 u64 root_blkno;
202 u64 system_dir_blkno; 201 u64 system_dir_blkno;
203 u64 bitmap_blkno; 202 u64 bitmap_blkno;
@@ -237,6 +236,7 @@ struct ocfs2_super
237 236
238 enum ocfs2_local_alloc_state local_alloc_state; 237 enum ocfs2_local_alloc_state local_alloc_state;
239 struct buffer_head *local_alloc_bh; 238 struct buffer_head *local_alloc_bh;
239 u64 la_last_gd;
240 240
241 /* Next two fields are for local node slot recovery during 241 /* Next two fields are for local node slot recovery during
242 * mount. */ 242 * mount. */
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 195523090c87..9d91e66f51a9 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -70,12 +70,6 @@ static int ocfs2_block_group_search(struct inode *inode,
70 struct buffer_head *group_bh, 70 struct buffer_head *group_bh,
71 u32 bits_wanted, u32 min_bits, 71 u32 bits_wanted, u32 min_bits,
72 u16 *bit_off, u16 *bits_found); 72 u16 *bit_off, u16 *bits_found);
73static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
74 u32 bits_wanted,
75 u32 min_bits,
76 u16 *bit_off,
77 unsigned int *num_bits,
78 u64 *bg_blkno);
79static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 73static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
80 struct ocfs2_alloc_context *ac, 74 struct ocfs2_alloc_context *ac,
81 u32 bits_wanted, 75 u32 bits_wanted,
@@ -85,11 +79,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
85 u64 *bg_blkno); 79 u64 *bg_blkno);
86static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, 80static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
87 int nr); 81 int nr);
88static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
89 struct buffer_head *bg_bh,
90 unsigned int bits_wanted,
91 u16 *bit_off,
92 u16 *bits_found);
93static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, 82static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle,
94 struct inode *alloc_inode, 83 struct inode *alloc_inode,
95 struct ocfs2_group_desc *bg, 84 struct ocfs2_group_desc *bg,
@@ -143,6 +132,64 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
143 return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); 132 return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
144} 133}
145 134
135/* somewhat more expensive than our other checks, so use sparingly. */
136static int ocfs2_check_group_descriptor(struct super_block *sb,
137 struct ocfs2_dinode *di,
138 struct ocfs2_group_desc *gd)
139{
140 unsigned int max_bits;
141
142 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
143 OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd);
144 return -EIO;
145 }
146
147 if (di->i_blkno != gd->bg_parent_dinode) {
148 ocfs2_error(sb, "Group descriptor # %llu has bad parent "
149 "pointer (%llu, expected %llu)",
150 (unsigned long long)le64_to_cpu(gd->bg_blkno),
151 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
152 (unsigned long long)le64_to_cpu(di->i_blkno));
153 return -EIO;
154 }
155
156 max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
157 if (le16_to_cpu(gd->bg_bits) > max_bits) {
158 ocfs2_error(sb, "Group descriptor # %llu has bit count of %u",
159 (unsigned long long)le64_to_cpu(gd->bg_blkno),
160 le16_to_cpu(gd->bg_bits));
161 return -EIO;
162 }
163
164 if (le16_to_cpu(gd->bg_chain) >=
165 le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
166 ocfs2_error(sb, "Group descriptor # %llu has bad chain %u",
167 (unsigned long long)le64_to_cpu(gd->bg_blkno),
168 le16_to_cpu(gd->bg_chain));
169 return -EIO;
170 }
171
172 if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
173 ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
174 "claims that %u are free",
175 (unsigned long long)le64_to_cpu(gd->bg_blkno),
176 le16_to_cpu(gd->bg_bits),
177 le16_to_cpu(gd->bg_free_bits_count));
178 return -EIO;
179 }
180
181 if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
182 ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
183 "max bitmap bits of %u",
184 (unsigned long long)le64_to_cpu(gd->bg_blkno),
185 le16_to_cpu(gd->bg_bits),
186 8 * le16_to_cpu(gd->bg_size));
187 return -EIO;
188 }
189
190 return 0;
191}
192
146static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, 193static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
147 struct inode *alloc_inode, 194 struct inode *alloc_inode,
148 struct buffer_head *bg_bh, 195 struct buffer_head *bg_bh,
@@ -663,6 +710,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
663static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, 710static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
664 struct buffer_head *bg_bh, 711 struct buffer_head *bg_bh,
665 unsigned int bits_wanted, 712 unsigned int bits_wanted,
713 unsigned int total_bits,
666 u16 *bit_off, 714 u16 *bit_off,
667 u16 *bits_found) 715 u16 *bits_found)
668{ 716{
@@ -679,10 +727,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
679 found = start = best_offset = best_size = 0; 727 found = start = best_offset = best_size = 0;
680 bitmap = bg->bg_bitmap; 728 bitmap = bg->bg_bitmap;
681 729
682 while((offset = ocfs2_find_next_zero_bit(bitmap, 730 while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
683 le16_to_cpu(bg->bg_bits), 731 if (offset == total_bits)
684 start)) != -1) {
685 if (offset == le16_to_cpu(bg->bg_bits))
686 break; 732 break;
687 733
688 if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) { 734 if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
@@ -911,14 +957,35 @@ static int ocfs2_cluster_group_search(struct inode *inode,
911{ 957{
912 int search = -ENOSPC; 958 int search = -ENOSPC;
913 int ret; 959 int ret;
914 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; 960 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
915 u16 tmp_off, tmp_found; 961 u16 tmp_off, tmp_found;
962 unsigned int max_bits, gd_cluster_off;
916 963
917 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 964 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
918 965
919 if (bg->bg_free_bits_count) { 966 if (gd->bg_free_bits_count) {
967 max_bits = le16_to_cpu(gd->bg_bits);
968
969 /* Tail groups in cluster bitmaps which aren't cpg
970 * aligned are prone to partial extention by a failed
971 * fs resize. If the file system resize never got to
972 * update the dinode cluster count, then we don't want
973 * to trust any clusters past it, regardless of what
974 * the group descriptor says. */
975 gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
976 le64_to_cpu(gd->bg_blkno));
977 if ((gd_cluster_off + max_bits) >
978 OCFS2_I(inode)->ip_clusters) {
979 max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
980 mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n",
981 (unsigned long long)le64_to_cpu(gd->bg_blkno),
982 le16_to_cpu(gd->bg_bits),
983 OCFS2_I(inode)->ip_clusters, max_bits);
984 }
985
920 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 986 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
921 group_bh, bits_wanted, 987 group_bh, bits_wanted,
988 max_bits,
922 &tmp_off, &tmp_found); 989 &tmp_off, &tmp_found);
923 if (ret) 990 if (ret)
924 return ret; 991 return ret;
@@ -951,17 +1018,109 @@ static int ocfs2_block_group_search(struct inode *inode,
951 if (bg->bg_free_bits_count) 1018 if (bg->bg_free_bits_count)
952 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 1019 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
953 group_bh, bits_wanted, 1020 group_bh, bits_wanted,
1021 le16_to_cpu(bg->bg_bits),
954 bit_off, bits_found); 1022 bit_off, bits_found);
955 1023
956 return ret; 1024 return ret;
957} 1025}
958 1026
1027static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
1028 struct ocfs2_journal_handle *handle,
1029 struct buffer_head *di_bh,
1030 u32 num_bits,
1031 u16 chain)
1032{
1033 int ret;
1034 u32 tmp_used;
1035 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
1036 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
1037
1038 ret = ocfs2_journal_access(handle, inode, di_bh,
1039 OCFS2_JOURNAL_ACCESS_WRITE);
1040 if (ret < 0) {
1041 mlog_errno(ret);
1042 goto out;
1043 }
1044
1045 tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
1046 di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
1047 le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
1048
1049 ret = ocfs2_journal_dirty(handle, di_bh);
1050 if (ret < 0)
1051 mlog_errno(ret);
1052
1053out:
1054 return ret;
1055}
1056
1057static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1058 u32 bits_wanted,
1059 u32 min_bits,
1060 u16 *bit_off,
1061 unsigned int *num_bits,
1062 u64 gd_blkno,
1063 u16 *bits_left)
1064{
1065 int ret;
1066 u16 found;
1067 struct buffer_head *group_bh = NULL;
1068 struct ocfs2_group_desc *gd;
1069 struct inode *alloc_inode = ac->ac_inode;
1070 struct ocfs2_journal_handle *handle = ac->ac_handle;
1071
1072 ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno,
1073 &group_bh, OCFS2_BH_CACHED, alloc_inode);
1074 if (ret < 0) {
1075 mlog_errno(ret);
1076 return ret;
1077 }
1078
1079 gd = (struct ocfs2_group_desc *) group_bh->b_data;
1080 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
1081 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
1082 ret = -EIO;
1083 goto out;
1084 }
1085
1086 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
1087 bit_off, &found);
1088 if (ret < 0) {
1089 if (ret != -ENOSPC)
1090 mlog_errno(ret);
1091 goto out;
1092 }
1093
1094 *num_bits = found;
1095
1096 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
1097 *num_bits,
1098 le16_to_cpu(gd->bg_chain));
1099 if (ret < 0) {
1100 mlog_errno(ret);
1101 goto out;
1102 }
1103
1104 ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
1105 *bit_off, *num_bits);
1106 if (ret < 0)
1107 mlog_errno(ret);
1108
1109 *bits_left = le16_to_cpu(gd->bg_free_bits_count);
1110
1111out:
1112 brelse(group_bh);
1113
1114 return ret;
1115}
1116
959static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, 1117static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
960 u32 bits_wanted, 1118 u32 bits_wanted,
961 u32 min_bits, 1119 u32 min_bits,
962 u16 *bit_off, 1120 u16 *bit_off,
963 unsigned int *num_bits, 1121 unsigned int *num_bits,
964 u64 *bg_blkno) 1122 u64 *bg_blkno,
1123 u16 *bits_left)
965{ 1124{
966 int status; 1125 int status;
967 u16 chain, tmp_bits; 1126 u16 chain, tmp_bits;
@@ -988,9 +1147,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
988 goto bail; 1147 goto bail;
989 } 1148 }
990 bg = (struct ocfs2_group_desc *) group_bh->b_data; 1149 bg = (struct ocfs2_group_desc *) group_bh->b_data;
991 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 1150 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
992 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 1151 if (status) {
993 status = -EIO; 1152 mlog_errno(status);
994 goto bail; 1153 goto bail;
995 } 1154 }
996 1155
@@ -1018,9 +1177,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1018 goto bail; 1177 goto bail;
1019 } 1178 }
1020 bg = (struct ocfs2_group_desc *) group_bh->b_data; 1179 bg = (struct ocfs2_group_desc *) group_bh->b_data;
1021 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 1180 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
1022 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 1181 if (status) {
1023 status = -EIO; 1182 mlog_errno(status);
1024 goto bail; 1183 goto bail;
1025 } 1184 }
1026 } 1185 }
@@ -1099,6 +1258,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1099 (unsigned long long)fe->i_blkno); 1258 (unsigned long long)fe->i_blkno);
1100 1259
1101 *bg_blkno = le64_to_cpu(bg->bg_blkno); 1260 *bg_blkno = le64_to_cpu(bg->bg_blkno);
1261 *bits_left = le16_to_cpu(bg->bg_free_bits_count);
1102bail: 1262bail:
1103 if (group_bh) 1263 if (group_bh)
1104 brelse(group_bh); 1264 brelse(group_bh);
@@ -1120,6 +1280,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1120{ 1280{
1121 int status; 1281 int status;
1122 u16 victim, i; 1282 u16 victim, i;
1283 u16 bits_left = 0;
1284 u64 hint_blkno = ac->ac_last_group;
1123 struct ocfs2_chain_list *cl; 1285 struct ocfs2_chain_list *cl;
1124 struct ocfs2_dinode *fe; 1286 struct ocfs2_dinode *fe;
1125 1287
@@ -1146,6 +1308,28 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1146 goto bail; 1308 goto bail;
1147 } 1309 }
1148 1310
1311 if (hint_blkno) {
1312 /* Attempt to short-circuit the usual search mechanism
1313 * by jumping straight to the most recently used
1314 * allocation group. This helps us mantain some
1315 * contiguousness across allocations. */
1316 status = ocfs2_search_one_group(ac, bits_wanted, min_bits,
1317 bit_off, num_bits,
1318 hint_blkno, &bits_left);
1319 if (!status) {
1320 /* Be careful to update *bg_blkno here as the
1321 * caller is expecting it to be filled in, and
1322 * ocfs2_search_one_group() won't do that for
1323 * us. */
1324 *bg_blkno = hint_blkno;
1325 goto set_hint;
1326 }
1327 if (status < 0 && status != -ENOSPC) {
1328 mlog_errno(status);
1329 goto bail;
1330 }
1331 }
1332
1149 cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; 1333 cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
1150 1334
1151 victim = ocfs2_find_victim_chain(cl); 1335 victim = ocfs2_find_victim_chain(cl);
@@ -1153,9 +1337,9 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1153 ac->ac_allow_chain_relink = 1; 1337 ac->ac_allow_chain_relink = 1;
1154 1338
1155 status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off, 1339 status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off,
1156 num_bits, bg_blkno); 1340 num_bits, bg_blkno, &bits_left);
1157 if (!status) 1341 if (!status)
1158 goto bail; 1342 goto set_hint;
1159 if (status < 0 && status != -ENOSPC) { 1343 if (status < 0 && status != -ENOSPC) {
1160 mlog_errno(status); 1344 mlog_errno(status);
1161 goto bail; 1345 goto bail;
@@ -1177,8 +1361,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1177 1361
1178 ac->ac_chain = i; 1362 ac->ac_chain = i;
1179 status = ocfs2_search_chain(ac, bits_wanted, min_bits, 1363 status = ocfs2_search_chain(ac, bits_wanted, min_bits,
1180 bit_off, num_bits, 1364 bit_off, num_bits, bg_blkno,
1181 bg_blkno); 1365 &bits_left);
1182 if (!status) 1366 if (!status)
1183 break; 1367 break;
1184 if (status < 0 && status != -ENOSPC) { 1368 if (status < 0 && status != -ENOSPC) {
@@ -1186,8 +1370,19 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1186 goto bail; 1370 goto bail;
1187 } 1371 }
1188 } 1372 }
1189bail:
1190 1373
1374set_hint:
1375 if (status != -ENOSPC) {
1376 /* If the next search of this group is not likely to
1377 * yield a suitable extent, then we reset the last
1378 * group hint so as to not waste a disk read */
1379 if (bits_left < min_bits)
1380 ac->ac_last_group = 0;
1381 else
1382 ac->ac_last_group = *bg_blkno;
1383 }
1384
1385bail:
1191 mlog_exit(status); 1386 mlog_exit(status);
1192 return status; 1387 return status;
1193} 1388}
@@ -1341,7 +1536,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
1341{ 1536{
1342 int status; 1537 int status;
1343 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; 1538 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
1344 u64 bg_blkno; 1539 u64 bg_blkno = 0;
1345 u16 bg_bit_off; 1540 u16 bg_bit_off;
1346 1541
1347 mlog_entry_void(); 1542 mlog_entry_void();
@@ -1494,9 +1689,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
1494 } 1689 }
1495 1690
1496 group = (struct ocfs2_group_desc *) group_bh->b_data; 1691 group = (struct ocfs2_group_desc *) group_bh->b_data;
1497 if (!OCFS2_IS_VALID_GROUP_DESC(group)) { 1692 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group);
1498 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, group); 1693 if (status) {
1499 status = -EIO; 1694 mlog_errno(status);
1500 goto bail; 1695 goto bail;
1501 } 1696 }
1502 BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); 1697 BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index a76c82a7ceac..c787838d1052 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -49,6 +49,8 @@ struct ocfs2_alloc_context {
49 u16 ac_chain; 49 u16 ac_chain;
50 int ac_allow_chain_relink; 50 int ac_allow_chain_relink;
51 group_search_t *ac_group_search; 51 group_search_t *ac_group_search;
52
53 u64 ac_last_group;
52}; 54};
53 55
54void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); 56void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 382706a67ffd..d17e33e66a1e 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1442,8 +1442,13 @@ static int ocfs2_initialize_super(struct super_block *sb,
1442 1442
1443 osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; 1443 osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
1444 1444
1445 /* We don't have a cluster lock on the bitmap here because
1446 * we're only interested in static information and the extra
1447 * complexity at mount time isn't worht it. Don't pass the
1448 * inode in to the read function though as we don't want it to
1449 * be put in the cache. */
1445 status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0, 1450 status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0,
1446 inode); 1451 NULL);
1447 iput(inode); 1452 iput(inode);
1448 if (status < 0) { 1453 if (status < 0) {
1449 mlog_errno(status); 1454 mlog_errno(status);
@@ -1452,7 +1457,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
1452 1457
1453 di = (struct ocfs2_dinode *) bitmap_bh->b_data; 1458 di = (struct ocfs2_dinode *) bitmap_bh->b_data;
1454 osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg); 1459 osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg);
1455 osb->num_clusters = le32_to_cpu(di->id1.bitmap1.i_total);
1456 brelse(bitmap_bh); 1460 brelse(bitmap_bh);
1457 mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n", 1461 mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n",
1458 (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg); 1462 (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg);
diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig
index c9a478099281..e478f1941831 100644
--- a/fs/partitions/Kconfig
+++ b/fs/partitions/Kconfig
@@ -99,7 +99,7 @@ config IBM_PARTITION
99 99
100config MAC_PARTITION 100config MAC_PARTITION
101 bool "Macintosh partition map support" if PARTITION_ADVANCED 101 bool "Macintosh partition map support" if PARTITION_ADVANCED
102 default y if MAC 102 default y if (MAC || PPC_PMAC)
103 help 103 help
104 Say Y here if you would like to use hard disks under Linux which 104 Say Y here if you would like to use hard disks under Linux which
105 were partitioned on a Macintosh. 105 were partitioned on a Macintosh.
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 839634026eb5..51c6a748df49 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -339,6 +339,7 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
339 p->start_sect = start; 339 p->start_sect = start;
340 p->nr_sects = len; 340 p->nr_sects = len;
341 p->partno = part; 341 p->partno = part;
342 p->policy = disk->policy;
342 343
343 if (isdigit(disk->kobj.name[strlen(disk->kobj.name)-1])) 344 if (isdigit(disk->kobj.name[strlen(disk->kobj.name)-1]))
344 snprintf(p->kobj.name,KOBJ_NAME_LEN,"%sp%d",disk->kobj.name,part); 345 snprintf(p->kobj.name,KOBJ_NAME_LEN,"%sp%d",disk->kobj.name,part);
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c
index abe91ca03edf..0a5927c806ca 100644
--- a/fs/partitions/sun.c
+++ b/fs/partitions/sun.c
@@ -74,7 +74,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev)
74 spc = be16_to_cpu(label->ntrks) * be16_to_cpu(label->nsect); 74 spc = be16_to_cpu(label->ntrks) * be16_to_cpu(label->nsect);
75 for (i = 0; i < 8; i++, p++) { 75 for (i = 0; i < 8; i++, p++) {
76 unsigned long st_sector; 76 unsigned long st_sector;
77 int num_sectors; 77 unsigned int num_sectors;
78 78
79 st_sector = be32_to_cpu(p->start_cylinder) * spc; 79 st_sector = be32_to_cpu(p->start_cylinder) * spc;
80 num_sectors = be32_to_cpu(p->num_sectors); 80 num_sectors = be32_to_cpu(p->num_sectors);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 7495d3e20775..0b615d62a159 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -74,6 +74,7 @@
74#include <linux/times.h> 74#include <linux/times.h>
75#include <linux/cpuset.h> 75#include <linux/cpuset.h>
76#include <linux/rcupdate.h> 76#include <linux/rcupdate.h>
77#include <linux/delayacct.h>
77 78
78#include <asm/uaccess.h> 79#include <asm/uaccess.h>
79#include <asm/pgtable.h> 80#include <asm/pgtable.h>
@@ -411,7 +412,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
411 412
412 res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ 413 res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
413%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ 414%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
414%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n", 415%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu %llu\n",
415 task->pid, 416 task->pid,
416 tcomm, 417 tcomm,
417 state, 418 state,
@@ -455,7 +456,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
455 task->exit_signal, 456 task->exit_signal,
456 task_cpu(task), 457 task_cpu(task),
457 task->rt_priority, 458 task->rt_priority,
458 task->policy); 459 task->policy,
460 (unsigned long long)delayacct_blkio_ticks(task));
459 if(mm) 461 if(mm)
460 mmput(mm); 462 mmput(mm);
461 return res; 463 return res;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 243a94af0427..fe8d55fb17cc 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -551,6 +551,27 @@ static int proc_fd_access_allowed(struct inode *inode)
551 return allowed; 551 return allowed;
552} 552}
553 553
554static int proc_setattr(struct dentry *dentry, struct iattr *attr)
555{
556 int error;
557 struct inode *inode = dentry->d_inode;
558
559 if (attr->ia_valid & ATTR_MODE)
560 return -EPERM;
561
562 error = inode_change_ok(inode, attr);
563 if (!error) {
564 error = security_inode_setattr(dentry, attr);
565 if (!error)
566 error = inode_setattr(inode, attr);
567 }
568 return error;
569}
570
571static struct inode_operations proc_def_inode_operations = {
572 .setattr = proc_setattr,
573};
574
554extern struct seq_operations mounts_op; 575extern struct seq_operations mounts_op;
555struct proc_mounts { 576struct proc_mounts {
556 struct seq_file m; 577 struct seq_file m;
@@ -1111,7 +1132,8 @@ out:
1111 1132
1112static struct inode_operations proc_pid_link_inode_operations = { 1133static struct inode_operations proc_pid_link_inode_operations = {
1113 .readlink = proc_pid_readlink, 1134 .readlink = proc_pid_readlink,
1114 .follow_link = proc_pid_follow_link 1135 .follow_link = proc_pid_follow_link,
1136 .setattr = proc_setattr,
1115}; 1137};
1116 1138
1117static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) 1139static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
@@ -1285,6 +1307,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
1285 ei = PROC_I(inode); 1307 ei = PROC_I(inode);
1286 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1308 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1287 inode->i_ino = fake_ino(task->pid, ino); 1309 inode->i_ino = fake_ino(task->pid, ino);
1310 inode->i_op = &proc_def_inode_operations;
1288 1311
1289 /* 1312 /*
1290 * grab the reference to task. 1313 * grab the reference to task.
@@ -1339,6 +1362,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1339 inode->i_uid = 0; 1362 inode->i_uid = 0;
1340 inode->i_gid = 0; 1363 inode->i_gid = 0;
1341 } 1364 }
1365 inode->i_mode &= ~(S_ISUID | S_ISGID);
1342 security_task_to_inode(task, inode); 1366 security_task_to_inode(task, inode);
1343 put_task_struct(task); 1367 put_task_struct(task);
1344 return 1; 1368 return 1;
@@ -1389,6 +1413,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1389 inode->i_uid = 0; 1413 inode->i_uid = 0;
1390 inode->i_gid = 0; 1414 inode->i_gid = 0;
1391 } 1415 }
1416 inode->i_mode &= ~(S_ISUID | S_ISGID);
1392 security_task_to_inode(task, inode); 1417 security_task_to_inode(task, inode);
1393 put_task_struct(task); 1418 put_task_struct(task);
1394 return 1; 1419 return 1;
@@ -1527,11 +1552,13 @@ static struct file_operations proc_task_operations = {
1527 */ 1552 */
1528static struct inode_operations proc_fd_inode_operations = { 1553static struct inode_operations proc_fd_inode_operations = {
1529 .lookup = proc_lookupfd, 1554 .lookup = proc_lookupfd,
1555 .setattr = proc_setattr,
1530}; 1556};
1531 1557
1532static struct inode_operations proc_task_inode_operations = { 1558static struct inode_operations proc_task_inode_operations = {
1533 .lookup = proc_task_lookup, 1559 .lookup = proc_task_lookup,
1534 .getattr = proc_task_getattr, 1560 .getattr = proc_task_getattr,
1561 .setattr = proc_setattr,
1535}; 1562};
1536 1563
1537#ifdef CONFIG_SECURITY 1564#ifdef CONFIG_SECURITY
@@ -1845,11 +1872,13 @@ static struct file_operations proc_tid_base_operations = {
1845static struct inode_operations proc_tgid_base_inode_operations = { 1872static struct inode_operations proc_tgid_base_inode_operations = {
1846 .lookup = proc_tgid_base_lookup, 1873 .lookup = proc_tgid_base_lookup,
1847 .getattr = pid_getattr, 1874 .getattr = pid_getattr,
1875 .setattr = proc_setattr,
1848}; 1876};
1849 1877
1850static struct inode_operations proc_tid_base_inode_operations = { 1878static struct inode_operations proc_tid_base_inode_operations = {
1851 .lookup = proc_tid_base_lookup, 1879 .lookup = proc_tid_base_lookup,
1852 .getattr = pid_getattr, 1880 .getattr = pid_getattr,
1881 .setattr = proc_setattr,
1853}; 1882};
1854 1883
1855#ifdef CONFIG_SECURITY 1884#ifdef CONFIG_SECURITY
@@ -1892,11 +1921,13 @@ static struct dentry *proc_tid_attr_lookup(struct inode *dir,
1892static struct inode_operations proc_tgid_attr_inode_operations = { 1921static struct inode_operations proc_tgid_attr_inode_operations = {
1893 .lookup = proc_tgid_attr_lookup, 1922 .lookup = proc_tgid_attr_lookup,
1894 .getattr = pid_getattr, 1923 .getattr = pid_getattr,
1924 .setattr = proc_setattr,
1895}; 1925};
1896 1926
1897static struct inode_operations proc_tid_attr_inode_operations = { 1927static struct inode_operations proc_tid_attr_inode_operations = {
1898 .lookup = proc_tid_attr_lookup, 1928 .lookup = proc_tid_attr_lookup,
1899 .getattr = pid_getattr, 1929 .getattr = pid_getattr,
1930 .setattr = proc_setattr,
1900}; 1931};
1901#endif 1932#endif
1902 1933
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 6dcef089e18e..49dfb2ab783e 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -192,7 +192,7 @@ int proc_fill_super(struct super_block *s, void *data, int silent)
192{ 192{
193 struct inode * root_inode; 193 struct inode * root_inode;
194 194
195 s->s_flags |= MS_NODIRATIME; 195 s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
196 s->s_blocksize = 1024; 196 s->s_blocksize = 1024;
197 s->s_blocksize_bits = 10; 197 s->s_blocksize_bits = 10;
198 s->s_magic = PROC_SUPER_MAGIC; 198 s->s_magic = PROC_SUPER_MAGIC;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 036d14d83627..6a984f64edd7 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -42,8 +42,6 @@ const struct file_operations proc_kcore_operations = {
42#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET) 42#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
43#endif 43#endif
44 44
45#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
46
47/* An ELF note in memory */ 45/* An ELF note in memory */
48struct memelfnote 46struct memelfnote
49{ 47{
@@ -384,7 +382,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
384 */ 382 */
385 if (n) { 383 if (n) {
386 if (clear_user(buffer + tsz - n, 384 if (clear_user(buffer + tsz - n,
387 tsz - n)) 385 n))
388 return -EFAULT; 386 return -EFAULT;
389 } 387 }
390 } else { 388 } else {
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 9f2cfc30f9cf..942156225447 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -169,7 +169,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
169 "Mapped: %8lu kB\n" 169 "Mapped: %8lu kB\n"
170 "Slab: %8lu kB\n" 170 "Slab: %8lu kB\n"
171 "PageTables: %8lu kB\n" 171 "PageTables: %8lu kB\n"
172 "NFS Unstable: %8lu kB\n" 172 "NFS_Unstable: %8lu kB\n"
173 "Bounce: %8lu kB\n" 173 "Bounce: %8lu kB\n"
174 "CommitLimit: %8lu kB\n" 174 "CommitLimit: %8lu kB\n"
175 "Committed_AS: %8lu kB\n" 175 "Committed_AS: %8lu kB\n"
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 99fffc9e1bfd..677139b48e00 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -283,9 +283,9 @@ unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
283 283
284/*****************************************************************************/ 284/*****************************************************************************/
285/* 285/*
286 * set up a mapping 286 * set up a mapping for shared memory segments
287 */ 287 */
288int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma) 288int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma)
289{ 289{
290 return 0; 290 return vma->vm_flags & VM_SHARED ? 0 : -ENOSYS;
291} 291}
diff --git a/fs/read_write.c b/fs/read_write.c
index 5bc0e9234f9d..d4cb3183c99c 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -436,7 +436,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
436 return seg; 436 return seg;
437} 437}
438 438
439EXPORT_SYMBOL(iov_shorten); 439EXPORT_UNUSED_SYMBOL(iov_shorten); /* June 2006 */
440 440
441/* A write operation does a read from user space and vice versa */ 441/* A write operation does a read from user space and vice versa */
442#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 442#define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 752cea12e30f..1627edd50810 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -48,8 +48,8 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
48 return 0; 48 return 0;
49 } 49 }
50 50
51 reiserfs_write_lock(inode->i_sb);
52 mutex_lock(&inode->i_mutex); 51 mutex_lock(&inode->i_mutex);
52 reiserfs_write_lock(inode->i_sb);
53 /* freeing preallocation only involves relogging blocks that 53 /* freeing preallocation only involves relogging blocks that
54 * are already in the current transaction. preallocation gets 54 * are already in the current transaction. preallocation gets
55 * freed at the end of each transaction, so it is impossible for 55 * freed at the end of each transaction, so it is impossible for
@@ -860,8 +860,12 @@ static int reiserfs_submit_file_region_for_write(struct reiserfs_transaction_han
860 // this sets the proper flags for O_SYNC to trigger a commit 860 // this sets the proper flags for O_SYNC to trigger a commit
861 mark_inode_dirty(inode); 861 mark_inode_dirty(inode);
862 reiserfs_write_unlock(inode->i_sb); 862 reiserfs_write_unlock(inode->i_sb);
863 } else 863 } else {
864 reiserfs_write_lock(inode->i_sb);
865 reiserfs_update_inode_transaction(inode);
864 mark_inode_dirty(inode); 866 mark_inode_dirty(inode);
867 reiserfs_write_unlock(inode->i_sb);
868 }
865 869
866 sd_update = 1; 870 sd_update = 1;
867 } 871 }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 12dfdcfbee3d..52f1e2136546 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -39,14 +39,10 @@ void reiserfs_delete_inode(struct inode *inode)
39 39
40 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ 40 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
41 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ 41 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
42 mutex_lock(&inode->i_mutex);
43
44 reiserfs_delete_xattrs(inode); 42 reiserfs_delete_xattrs(inode);
45 43
46 if (journal_begin(&th, inode->i_sb, jbegin_count)) { 44 if (journal_begin(&th, inode->i_sb, jbegin_count))
47 mutex_unlock(&inode->i_mutex);
48 goto out; 45 goto out;
49 }
50 reiserfs_update_inode_transaction(inode); 46 reiserfs_update_inode_transaction(inode);
51 47
52 err = reiserfs_delete_object(&th, inode); 48 err = reiserfs_delete_object(&th, inode);
@@ -57,12 +53,8 @@ void reiserfs_delete_inode(struct inode *inode)
57 if (!err) 53 if (!err)
58 DQUOT_FREE_INODE(inode); 54 DQUOT_FREE_INODE(inode);
59 55
60 if (journal_end(&th, inode->i_sb, jbegin_count)) { 56 if (journal_end(&th, inode->i_sb, jbegin_count))
61 mutex_unlock(&inode->i_mutex);
62 goto out; 57 goto out;
63 }
64
65 mutex_unlock(&inode->i_mutex);
66 58
67 /* check return value from reiserfs_delete_object after 59 /* check return value from reiserfs_delete_object after
68 * ending the transaction 60 * ending the transaction
@@ -2348,6 +2340,7 @@ static int reiserfs_write_full_page(struct page *page,
2348 unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; 2340 unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
2349 int error = 0; 2341 int error = 0;
2350 unsigned long block; 2342 unsigned long block;
2343 sector_t last_block;
2351 struct buffer_head *head, *bh; 2344 struct buffer_head *head, *bh;
2352 int partial = 0; 2345 int partial = 0;
2353 int nr = 0; 2346 int nr = 0;
@@ -2395,10 +2388,19 @@ static int reiserfs_write_full_page(struct page *page,
2395 } 2388 }
2396 bh = head; 2389 bh = head;
2397 block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); 2390 block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
2391 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
2398 /* first map all the buffers, logging any direct items we find */ 2392 /* first map all the buffers, logging any direct items we find */
2399 do { 2393 do {
2400 if ((checked || buffer_dirty(bh)) && (!buffer_mapped(bh) || 2394 if (block > last_block) {
2401 (buffer_mapped(bh) 2395 /*
2396 * This can happen when the block size is less than
2397 * the page size. The corresponding bytes in the page
2398 * were zero filled above
2399 */
2400 clear_buffer_dirty(bh);
2401 set_buffer_uptodate(bh);
2402 } else if ((checked || buffer_dirty(bh)) &&
2403 (!buffer_mapped(bh) || (buffer_mapped(bh)
2402 && bh->b_blocknr == 2404 && bh->b_blocknr ==
2403 0))) { 2405 0))) {
2404 /* not mapped yet, or it points to a direct item, search 2406 /* not mapped yet, or it points to a direct item, search
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 745c88100895..a986b5e1e288 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -116,12 +116,12 @@ static int reiserfs_unpack(struct inode *inode, struct file *filp)
116 if (REISERFS_I(inode)->i_flags & i_nopack_mask) { 116 if (REISERFS_I(inode)->i_flags & i_nopack_mask) {
117 return 0; 117 return 0;
118 } 118 }
119 reiserfs_write_lock(inode->i_sb);
120 119
121 /* we need to make sure nobody is changing the file size beneath 120 /* we need to make sure nobody is changing the file size beneath
122 ** us 121 ** us
123 */ 122 */
124 mutex_lock(&inode->i_mutex); 123 mutex_lock(&inode->i_mutex);
124 reiserfs_write_lock(inode->i_sb);
125 125
126 write_from = inode->i_size & (blocksize - 1); 126 write_from = inode->i_size & (blocksize - 1);
127 /* if we are on a block boundary, we are already unpacked. */ 127 /* if we are on a block boundary, we are already unpacked. */
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 5d8a8cfebc70..c533ec1bcaec 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -492,9 +492,17 @@ static void add_file(struct super_block *sb, char *name,
492 492
493int reiserfs_proc_info_init(struct super_block *sb) 493int reiserfs_proc_info_init(struct super_block *sb)
494{ 494{
495 char b[BDEVNAME_SIZE];
496 char *s;
497
498 /* Some block devices use /'s */
499 strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE);
500 s = strchr(b, '/');
501 if (s)
502 *s = '!';
503
495 spin_lock_init(&__PINFO(sb).lock); 504 spin_lock_init(&__PINFO(sb).lock);
496 REISERFS_SB(sb)->procdir = 505 REISERFS_SB(sb)->procdir = proc_mkdir(b, proc_info_root);
497 proc_mkdir(reiserfs_bdevname(sb), proc_info_root);
498 if (REISERFS_SB(sb)->procdir) { 506 if (REISERFS_SB(sb)->procdir) {
499 REISERFS_SB(sb)->procdir->owner = THIS_MODULE; 507 REISERFS_SB(sb)->procdir->owner = THIS_MODULE;
500 REISERFS_SB(sb)->procdir->data = sb; 508 REISERFS_SB(sb)->procdir->data = sb;
@@ -508,13 +516,22 @@ int reiserfs_proc_info_init(struct super_block *sb)
508 return 0; 516 return 0;
509 } 517 }
510 reiserfs_warning(sb, "reiserfs: cannot create /proc/%s/%s", 518 reiserfs_warning(sb, "reiserfs: cannot create /proc/%s/%s",
511 proc_info_root_name, reiserfs_bdevname(sb)); 519 proc_info_root_name, b);
512 return 1; 520 return 1;
513} 521}
514 522
515int reiserfs_proc_info_done(struct super_block *sb) 523int reiserfs_proc_info_done(struct super_block *sb)
516{ 524{
517 struct proc_dir_entry *de = REISERFS_SB(sb)->procdir; 525 struct proc_dir_entry *de = REISERFS_SB(sb)->procdir;
526 char b[BDEVNAME_SIZE];
527 char *s;
528
529 /* Some block devices use /'s */
530 strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE);
531 s = strchr(b, '/');
532 if (s)
533 *s = '!';
534
518 if (de) { 535 if (de) {
519 remove_proc_entry("journal", de); 536 remove_proc_entry("journal", de);
520 remove_proc_entry("oidmap", de); 537 remove_proc_entry("oidmap", de);
@@ -528,7 +545,7 @@ int reiserfs_proc_info_done(struct super_block *sb)
528 __PINFO(sb).exiting = 1; 545 __PINFO(sb).exiting = 1;
529 spin_unlock(&__PINFO(sb).lock); 546 spin_unlock(&__PINFO(sb).lock);
530 if (proc_info_root) { 547 if (proc_info_root) {
531 remove_proc_entry(reiserfs_bdevname(sb), proc_info_root); 548 remove_proc_entry(b, proc_info_root);
532 REISERFS_SB(sb)->procdir = NULL; 549 REISERFS_SB(sb)->procdir = NULL;
533 } 550 }
534 return 0; 551 return 0;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 39fedaa88a0c..d935fb9394e3 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -424,7 +424,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf)
424 int res = -ENOTDIR; 424 int res = -ENOTDIR;
425 if (!file->f_op || !file->f_op->readdir) 425 if (!file->f_op || !file->f_op->readdir)
426 goto out; 426 goto out;
427 mutex_lock(&inode->i_mutex); 427 mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR);
428// down(&inode->i_zombie); 428// down(&inode->i_zombie);
429 res = -ENOENT; 429 res = -ENOENT;
430 if (!IS_DEADDIR(inode)) { 430 if (!IS_DEADDIR(inode)) {
diff --git a/fs/splice.c b/fs/splice.c
index 05fd2787be98..684bca3d3a10 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1307,6 +1307,85 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
1307} 1307}
1308 1308
1309/* 1309/*
1310 * Make sure there's data to read. Wait for input if we can, otherwise
1311 * return an appropriate error.
1312 */
1313static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
1314{
1315 int ret;
1316
1317 /*
1318 * Check ->nrbufs without the inode lock first. This function
1319 * is speculative anyways, so missing one is ok.
1320 */
1321 if (pipe->nrbufs)
1322 return 0;
1323
1324 ret = 0;
1325 mutex_lock(&pipe->inode->i_mutex);
1326
1327 while (!pipe->nrbufs) {
1328 if (signal_pending(current)) {
1329 ret = -ERESTARTSYS;
1330 break;
1331 }
1332 if (!pipe->writers)
1333 break;
1334 if (!pipe->waiting_writers) {
1335 if (flags & SPLICE_F_NONBLOCK) {
1336 ret = -EAGAIN;
1337 break;
1338 }
1339 }
1340 pipe_wait(pipe);
1341 }
1342
1343 mutex_unlock(&pipe->inode->i_mutex);
1344 return ret;
1345}
1346
1347/*
1348 * Make sure there's writeable room. Wait for room if we can, otherwise
1349 * return an appropriate error.
1350 */
1351static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
1352{
1353 int ret;
1354
1355 /*
1356 * Check ->nrbufs without the inode lock first. This function
1357 * is speculative anyways, so missing one is ok.
1358 */
1359 if (pipe->nrbufs < PIPE_BUFFERS)
1360 return 0;
1361
1362 ret = 0;
1363 mutex_lock(&pipe->inode->i_mutex);
1364
1365 while (pipe->nrbufs >= PIPE_BUFFERS) {
1366 if (!pipe->readers) {
1367 send_sig(SIGPIPE, current, 0);
1368 ret = -EPIPE;
1369 break;
1370 }
1371 if (flags & SPLICE_F_NONBLOCK) {
1372 ret = -EAGAIN;
1373 break;
1374 }
1375 if (signal_pending(current)) {
1376 ret = -ERESTARTSYS;
1377 break;
1378 }
1379 pipe->waiting_writers++;
1380 pipe_wait(pipe);
1381 pipe->waiting_writers--;
1382 }
1383
1384 mutex_unlock(&pipe->inode->i_mutex);
1385 return ret;
1386}
1387
1388/*
1310 * Link contents of ipipe to opipe. 1389 * Link contents of ipipe to opipe.
1311 */ 1390 */
1312static int link_pipe(struct pipe_inode_info *ipipe, 1391static int link_pipe(struct pipe_inode_info *ipipe,
@@ -1314,9 +1393,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1314 size_t len, unsigned int flags) 1393 size_t len, unsigned int flags)
1315{ 1394{
1316 struct pipe_buffer *ibuf, *obuf; 1395 struct pipe_buffer *ibuf, *obuf;
1317 int ret, do_wakeup, i, ipipe_first; 1396 int ret = 0, i = 0, nbuf;
1318
1319 ret = do_wakeup = ipipe_first = 0;
1320 1397
1321 /* 1398 /*
1322 * Potential ABBA deadlock, work around it by ordering lock 1399 * Potential ABBA deadlock, work around it by ordering lock
@@ -1324,126 +1401,62 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1324 * could deadlock (one doing tee from A -> B, the other from B -> A). 1401 * could deadlock (one doing tee from A -> B, the other from B -> A).
1325 */ 1402 */
1326 if (ipipe->inode < opipe->inode) { 1403 if (ipipe->inode < opipe->inode) {
1327 ipipe_first = 1; 1404 mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT);
1328 mutex_lock(&ipipe->inode->i_mutex); 1405 mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD);
1329 mutex_lock(&opipe->inode->i_mutex);
1330 } else { 1406 } else {
1331 mutex_lock(&opipe->inode->i_mutex); 1407 mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT);
1332 mutex_lock(&ipipe->inode->i_mutex); 1408 mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD);
1333 } 1409 }
1334 1410
1335 for (i = 0;; i++) { 1411 do {
1336 if (!opipe->readers) { 1412 if (!opipe->readers) {
1337 send_sig(SIGPIPE, current, 0); 1413 send_sig(SIGPIPE, current, 0);
1338 if (!ret) 1414 if (!ret)
1339 ret = -EPIPE; 1415 ret = -EPIPE;
1340 break; 1416 break;
1341 } 1417 }
1342 if (ipipe->nrbufs - i) {
1343 ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
1344 1418
1345 /* 1419 /*
1346 * If we have room, fill this buffer 1420 * If we have iterated all input buffers or ran out of
1347 */ 1421 * output room, break.
1348 if (opipe->nrbufs < PIPE_BUFFERS) { 1422 */
1349 int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); 1423 if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS)
1350 1424 break;
1351 /*
1352 * Get a reference to this pipe buffer,
1353 * so we can copy the contents over.
1354 */
1355 ibuf->ops->get(ipipe, ibuf);
1356
1357 obuf = opipe->bufs + nbuf;
1358 *obuf = *ibuf;
1359
1360 /*
1361 * Don't inherit the gift flag, we need to
1362 * prevent multiple steals of this page.
1363 */
1364 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1365
1366 if (obuf->len > len)
1367 obuf->len = len;
1368
1369 opipe->nrbufs++;
1370 do_wakeup = 1;
1371 ret += obuf->len;
1372 len -= obuf->len;
1373
1374 if (!len)
1375 break;
1376 if (opipe->nrbufs < PIPE_BUFFERS)
1377 continue;
1378 }
1379
1380 /*
1381 * We have input available, but no output room.
1382 * If we already copied data, return that. If we
1383 * need to drop the opipe lock, it must be ordered
1384 * last to avoid deadlocks.
1385 */
1386 if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) {
1387 if (!ret)
1388 ret = -EAGAIN;
1389 break;
1390 }
1391 if (signal_pending(current)) {
1392 if (!ret)
1393 ret = -ERESTARTSYS;
1394 break;
1395 }
1396 if (do_wakeup) {
1397 smp_mb();
1398 if (waitqueue_active(&opipe->wait))
1399 wake_up_interruptible(&opipe->wait);
1400 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1401 do_wakeup = 0;
1402 }
1403 1425
1404 opipe->waiting_writers++; 1426 ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
1405 pipe_wait(opipe); 1427 nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
1406 opipe->waiting_writers--;
1407 continue;
1408 }
1409 1428
1410 /* 1429 /*
1411 * No input buffers, do the usual checks for available 1430 * Get a reference to this pipe buffer,
1412 * writers and blocking and wait if necessary 1431 * so we can copy the contents over.
1413 */ 1432 */
1414 if (!ipipe->writers) 1433 ibuf->ops->get(ipipe, ibuf);
1415 break; 1434
1416 if (!ipipe->waiting_writers) { 1435 obuf = opipe->bufs + nbuf;
1417 if (ret) 1436 *obuf = *ibuf;
1418 break; 1437
1419 }
1420 /* 1438 /*
1421 * pipe_wait() drops the ipipe mutex. To avoid deadlocks 1439 * Don't inherit the gift flag, we need to
1422 * with another process, we can only safely do that if 1440 * prevent multiple steals of this page.
1423 * the ipipe lock is ordered last.
1424 */ 1441 */
1425 if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) { 1442 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1426 if (!ret)
1427 ret = -EAGAIN;
1428 break;
1429 }
1430 if (signal_pending(current)) {
1431 if (!ret)
1432 ret = -ERESTARTSYS;
1433 break;
1434 }
1435 1443
1436 if (waitqueue_active(&ipipe->wait)) 1444 if (obuf->len > len)
1437 wake_up_interruptible_sync(&ipipe->wait); 1445 obuf->len = len;
1438 kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT);
1439 1446
1440 pipe_wait(ipipe); 1447 opipe->nrbufs++;
1441 } 1448 ret += obuf->len;
1449 len -= obuf->len;
1450 i++;
1451 } while (len);
1442 1452
1443 mutex_unlock(&ipipe->inode->i_mutex); 1453 mutex_unlock(&ipipe->inode->i_mutex);
1444 mutex_unlock(&opipe->inode->i_mutex); 1454 mutex_unlock(&opipe->inode->i_mutex);
1445 1455
1446 if (do_wakeup) { 1456 /*
1457 * If we put data in the output pipe, wakeup any potential readers.
1458 */
1459 if (ret > 0) {
1447 smp_mb(); 1460 smp_mb();
1448 if (waitqueue_active(&opipe->wait)) 1461 if (waitqueue_active(&opipe->wait))
1449 wake_up_interruptible(&opipe->wait); 1462 wake_up_interruptible(&opipe->wait);
@@ -1464,14 +1477,29 @@ static long do_tee(struct file *in, struct file *out, size_t len,
1464{ 1477{
1465 struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe; 1478 struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
1466 struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe; 1479 struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
1480 int ret = -EINVAL;
1467 1481
1468 /* 1482 /*
1469 * Link ipipe to the two output pipes, consuming as we go along. 1483 * Duplicate the contents of ipipe to opipe without actually
1484 * copying the data.
1470 */ 1485 */
1471 if (ipipe && opipe) 1486 if (ipipe && opipe && ipipe != opipe) {
1472 return link_pipe(ipipe, opipe, len, flags); 1487 /*
1488 * Keep going, unless we encounter an error. The ipipe/opipe
1489 * ordering doesn't really matter.
1490 */
1491 ret = link_ipipe_prep(ipipe, flags);
1492 if (!ret) {
1493 ret = link_opipe_prep(opipe, flags);
1494 if (!ret) {
1495 ret = link_pipe(ipipe, opipe, len, flags);
1496 if (!ret && (flags & SPLICE_F_NONBLOCK))
1497 ret = -EAGAIN;
1498 }
1499 }
1500 }
1473 1501
1474 return -EINVAL; 1502 return ret;
1475} 1503}
1476 1504
1477asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags) 1505asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 5e0e31cc46f5..9889e54e1f13 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -109,6 +109,17 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
109 inode->i_ctime = iattr->ia_ctime; 109 inode->i_ctime = iattr->ia_ctime;
110} 110}
111 111
112
113/*
114 * sysfs has a different i_mutex lock order behavior for i_mutex than other
115 * filesystems; sysfs i_mutex is called in many places with subsystem locks
116 * held. At the same time, many of the VFS locking rules do not apply to
117 * sysfs at all (cross directory rename for example). To untangle this mess
118 * (which gives false positives in lockdep), we're giving sysfs inodes their
119 * own class for i_mutex.
120 */
121static struct lock_class_key sysfs_inode_imutex_key;
122
112struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) 123struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd)
113{ 124{
114 struct inode * inode = new_inode(sysfs_sb); 125 struct inode * inode = new_inode(sysfs_sb);
@@ -118,6 +129,7 @@ struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd)
118 inode->i_mapping->a_ops = &sysfs_aops; 129 inode->i_mapping->a_ops = &sysfs_aops;
119 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; 130 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
120 inode->i_op = &sysfs_inode_operations; 131 inode->i_op = &sysfs_inode_operations;
132 lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
121 133
122 if (sd->s_iattr) { 134 if (sd->s_iattr) {
123 /* sysfs_dirent has non-default attributes 135 /* sysfs_dirent has non-default attributes
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 3873c672cb4c..33323473e3c4 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -75,6 +75,12 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err)
75 } 75 }
76 *err = -ENOSPC; 76 *err = -ENOSPC;
77 77
78 UDF_I_UNIQUE(inode) = 0;
79 UDF_I_LENEXTENTS(inode) = 0;
80 UDF_I_NEXT_ALLOC_BLOCK(inode) = 0;
81 UDF_I_NEXT_ALLOC_GOAL(inode) = 0;
82 UDF_I_STRAT4096(inode) = 0;
83
78 block = udf_new_block(dir->i_sb, NULL, UDF_I_LOCATION(dir).partitionReferenceNum, 84 block = udf_new_block(dir->i_sb, NULL, UDF_I_LOCATION(dir).partitionReferenceNum,
79 start, err); 85 start, err);
80 if (*err) 86 if (*err)
@@ -84,11 +90,6 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err)
84 } 90 }
85 91
86 mutex_lock(&sbi->s_alloc_mutex); 92 mutex_lock(&sbi->s_alloc_mutex);
87 UDF_I_UNIQUE(inode) = 0;
88 UDF_I_LENEXTENTS(inode) = 0;
89 UDF_I_NEXT_ALLOC_BLOCK(inode) = 0;
90 UDF_I_NEXT_ALLOC_GOAL(inode) = 0;
91 UDF_I_STRAT4096(inode) = 0;
92 if (UDF_SB_LVIDBH(sb)) 93 if (UDF_SB_LVIDBH(sb))
93 { 94 {
94 struct logicalVolHeaderDesc *lvhd; 95 struct logicalVolHeaderDesc *lvhd;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 4df822c881b6..fcce1a21a51b 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -115,6 +115,13 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
115 ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, SLAB_KERNEL); 115 ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, SLAB_KERNEL);
116 if (!ei) 116 if (!ei)
117 return NULL; 117 return NULL;
118
119 ei->i_unique = 0;
120 ei->i_lenExtents = 0;
121 ei->i_next_alloc_block = 0;
122 ei->i_next_alloc_goal = 0;
123 ei->i_strat4096 = 0;
124
118 return &ei->vfs_inode; 125 return &ei->vfs_inode;
119} 126}
120 127
@@ -1652,7 +1659,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1652 iput(inode); 1659 iput(inode);
1653 goto error_out; 1660 goto error_out;
1654 } 1661 }
1655 sb->s_maxbytes = MAX_LFS_FILESIZE; 1662 sb->s_maxbytes = 1<<30;
1656 return 0; 1663 return 0;
1657 1664
1658error_out: 1665error_out:
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index e1b0e8cfecb4..0abd66ce36ea 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -239,37 +239,51 @@ void udf_truncate_extents(struct inode * inode)
239 { 239 {
240 if (offset) 240 if (offset)
241 { 241 {
242 extoffset -= adsize; 242 /*
243 etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1); 243 * OK, there is not extent covering inode->i_size and
244 if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) 244 * no extent above inode->i_size => truncate is
245 { 245 * extending the file by 'offset'.
246 extoffset -= adsize; 246 */
247 elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset); 247 if ((!bh && extoffset == udf_file_entry_alloc_offset(inode)) ||
248 udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0); 248 (bh && extoffset == sizeof(struct allocExtDesc))) {
249 /* File has no extents at all! */
250 memset(&eloc, 0x00, sizeof(kernel_lb_addr));
251 elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset;
252 udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1);
249 } 253 }
250 else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) 254 else {
251 {
252 kernel_lb_addr neloc = { 0, 0 };
253 extoffset -= adsize; 255 extoffset -= adsize;
254 nelen = EXT_NOT_RECORDED_NOT_ALLOCATED | 256 etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1);
255 ((elen + offset + inode->i_sb->s_blocksize - 1) & 257 if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))
256 ~(inode->i_sb->s_blocksize - 1)); 258 {
257 udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1); 259 extoffset -= adsize;
258 udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1); 260 elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset);
259 } 261 udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0);
260 else 262 }
261 { 263 else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30))
262 if (elen & (inode->i_sb->s_blocksize - 1))
263 { 264 {
265 kernel_lb_addr neloc = { 0, 0 };
264 extoffset -= adsize; 266 extoffset -= adsize;
265 elen = EXT_RECORDED_ALLOCATED | 267 nelen = EXT_NOT_RECORDED_NOT_ALLOCATED |
266 ((elen + inode->i_sb->s_blocksize - 1) & 268 ((elen + offset + inode->i_sb->s_blocksize - 1) &
267 ~(inode->i_sb->s_blocksize - 1)); 269 ~(inode->i_sb->s_blocksize - 1));
268 udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1); 270 udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1);
271 udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1);
272 }
273 else
274 {
275 if (elen & (inode->i_sb->s_blocksize - 1))
276 {
277 extoffset -= adsize;
278 elen = EXT_RECORDED_ALLOCATED |
279 ((elen + inode->i_sb->s_blocksize - 1) &
280 ~(inode->i_sb->s_blocksize - 1));
281 udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1);
282 }
283 memset(&eloc, 0x00, sizeof(kernel_lb_addr));
284 elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset;
285 udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1);
269 } 286 }
270 memset(&eloc, 0x00, sizeof(kernel_lb_addr));
271 elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset;
272 udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1);
273 } 287 }
274 } 288 }
275 } 289 }
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index b01804baa120..b82381475779 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -248,7 +248,7 @@ static void ufs_change_blocknr(struct inode *inode, unsigned int baseblk,
248 248
249 if (likely(cur_index != index)) { 249 if (likely(cur_index != index)) {
250 page = ufs_get_locked_page(mapping, index); 250 page = ufs_get_locked_page(mapping, index);
251 if (IS_ERR(page)) 251 if (!page || IS_ERR(page)) /* it was truncated or EIO */
252 continue; 252 continue;
253 } else 253 } else
254 page = locked_page; 254 page = locked_page;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index e7c8615beb65..30c6e8a9446c 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -169,18 +169,20 @@ static void ufs_clear_frag(struct inode *inode, struct buffer_head *bh)
169 169
170static struct buffer_head * 170static struct buffer_head *
171ufs_clear_frags(struct inode *inode, sector_t beg, 171ufs_clear_frags(struct inode *inode, sector_t beg,
172 unsigned int n) 172 unsigned int n, sector_t want)
173{ 173{
174 struct buffer_head *res, *bh; 174 struct buffer_head *res = NULL, *bh;
175 sector_t end = beg + n; 175 sector_t end = beg + n;
176 176
177 res = sb_getblk(inode->i_sb, beg); 177 for (; beg < end; ++beg) {
178 ufs_clear_frag(inode, res);
179 for (++beg; beg < end; ++beg) {
180 bh = sb_getblk(inode->i_sb, beg); 178 bh = sb_getblk(inode->i_sb, beg);
181 ufs_clear_frag(inode, bh); 179 ufs_clear_frag(inode, bh);
182 brelse(bh); 180 if (want != beg)
181 brelse(bh);
182 else
183 res = bh;
183 } 184 }
185 BUG_ON(!res);
184 return res; 186 return res;
185} 187}
186 188
@@ -265,7 +267,9 @@ repeat:
265 lastfrag = ufsi->i_lastfrag; 267 lastfrag = ufsi->i_lastfrag;
266 268
267 } 269 }
268 goal = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]) + uspi->s_fpb; 270 tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]);
271 if (tmp)
272 goal = tmp + uspi->s_fpb;
269 tmp = ufs_new_fragments (inode, p, fragment - blockoff, 273 tmp = ufs_new_fragments (inode, p, fragment - blockoff,
270 goal, required + blockoff, 274 goal, required + blockoff,
271 err, locked_page); 275 err, locked_page);
@@ -277,13 +281,15 @@ repeat:
277 tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff), 281 tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff),
278 fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff), 282 fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff),
279 err, locked_page); 283 err, locked_page);
280 } 284 } else /* (lastblock > block) */ {
281 /* 285 /*
282 * We will allocate new block before last allocated block 286 * We will allocate new block before last allocated block
283 */ 287 */
284 else /* (lastblock > block) */ { 288 if (block) {
285 if (lastblock && (tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock-1]))) 289 tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[block-1]);
286 goal = tmp + uspi->s_fpb; 290 if (tmp)
291 goal = tmp + uspi->s_fpb;
292 }
287 tmp = ufs_new_fragments(inode, p, fragment - blockoff, 293 tmp = ufs_new_fragments(inode, p, fragment - blockoff,
288 goal, uspi->s_fpb, err, locked_page); 294 goal, uspi->s_fpb, err, locked_page);
289 } 295 }
@@ -296,7 +302,7 @@ repeat:
296 } 302 }
297 303
298 if (!phys) { 304 if (!phys) {
299 result = ufs_clear_frags(inode, tmp + blockoff, required); 305 result = ufs_clear_frags(inode, tmp, required, tmp + blockoff);
300 } else { 306 } else {
301 *phys = tmp + blockoff; 307 *phys = tmp + blockoff;
302 result = NULL; 308 result = NULL;
@@ -383,7 +389,7 @@ repeat:
383 } 389 }
384 } 390 }
385 391
386 if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1]) + uspi->s_fpb)) 392 if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1])))
387 goal = tmp + uspi->s_fpb; 393 goal = tmp + uspi->s_fpb;
388 else 394 else
389 goal = bh->b_blocknr + uspi->s_fpb; 395 goal = bh->b_blocknr + uspi->s_fpb;
@@ -397,7 +403,8 @@ repeat:
397 403
398 404
399 if (!phys) { 405 if (!phys) {
400 result = ufs_clear_frags(inode, tmp + blockoff, uspi->s_fpb); 406 result = ufs_clear_frags(inode, tmp, uspi->s_fpb,
407 tmp + blockoff);
401 } else { 408 } else {
402 *phys = tmp + blockoff; 409 *phys = tmp + blockoff;
403 *new = 1; 410 *new = 1;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index abd5f23a426d..d344b411e261 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -129,7 +129,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
129 struct inode * inode; 129 struct inode * inode;
130 130
131 if (l > sb->s_blocksize) 131 if (l > sb->s_blocksize)
132 goto out; 132 goto out_notlocked;
133 133
134 lock_kernel(); 134 lock_kernel();
135 inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); 135 inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO);
@@ -155,6 +155,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
155 err = ufs_add_nondir(dentry, inode); 155 err = ufs_add_nondir(dentry, inode);
156out: 156out:
157 unlock_kernel(); 157 unlock_kernel();
158out_notlocked:
158 return err; 159 return err;
159 160
160out_fail: 161out_fail:
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index c9b55872079b..ea11d04c41a0 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -375,17 +375,15 @@ static int ufs_alloc_lastblock(struct inode *inode)
375 int err = 0; 375 int err = 0;
376 struct address_space *mapping = inode->i_mapping; 376 struct address_space *mapping = inode->i_mapping;
377 struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi; 377 struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi;
378 struct ufs_inode_info *ufsi = UFS_I(inode);
379 unsigned lastfrag, i, end; 378 unsigned lastfrag, i, end;
380 struct page *lastpage; 379 struct page *lastpage;
381 struct buffer_head *bh; 380 struct buffer_head *bh;
382 381
383 lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift; 382 lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift;
384 383
385 if (!lastfrag) { 384 if (!lastfrag)
386 ufsi->i_lastfrag = 0;
387 goto out; 385 goto out;
388 } 386
389 lastfrag--; 387 lastfrag--;
390 388
391 lastpage = ufs_get_locked_page(mapping, lastfrag >> 389 lastpage = ufs_get_locked_page(mapping, lastfrag >>
@@ -400,25 +398,25 @@ static int ufs_alloc_lastblock(struct inode *inode)
400 for (i = 0; i < end; ++i) 398 for (i = 0; i < end; ++i)
401 bh = bh->b_this_page; 399 bh = bh->b_this_page;
402 400
403 if (!buffer_mapped(bh)) { 401
404 err = ufs_getfrag_block(inode, lastfrag, bh, 1); 402 err = ufs_getfrag_block(inode, lastfrag, bh, 1);
405 403
406 if (unlikely(err)) 404 if (unlikely(err))
407 goto out_unlock; 405 goto out_unlock;
408 406
409 if (buffer_new(bh)) { 407 if (buffer_new(bh)) {
410 clear_buffer_new(bh); 408 clear_buffer_new(bh);
411 unmap_underlying_metadata(bh->b_bdev, 409 unmap_underlying_metadata(bh->b_bdev,
412 bh->b_blocknr); 410 bh->b_blocknr);
413 /* 411 /*
414 * we do not zeroize fragment, because of 412 * we do not zeroize fragment, because of
415 * if it maped to hole, it already contains zeroes 413 * if it maped to hole, it already contains zeroes
416 */ 414 */
417 set_buffer_uptodate(bh); 415 set_buffer_uptodate(bh);
418 mark_buffer_dirty(bh); 416 mark_buffer_dirty(bh);
419 set_page_dirty(lastpage); 417 set_page_dirty(lastpage);
420 }
421 } 418 }
419
422out_unlock: 420out_unlock:
423 ufs_put_locked_page(lastpage); 421 ufs_put_locked_page(lastpage);
424out: 422out:
@@ -440,23 +438,11 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
440 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 438 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
441 return -EPERM; 439 return -EPERM;
442 440
443 if (inode->i_size > old_i_size) { 441 err = ufs_alloc_lastblock(inode);
444 /*
445 * if we expand file we should care about
446 * allocation of block for last byte first of all
447 */
448 err = ufs_alloc_lastblock(inode);
449 442
450 if (err) { 443 if (err) {
451 i_size_write(inode, old_i_size); 444 i_size_write(inode, old_i_size);
452 goto out; 445 goto out;
453 }
454 /*
455 * go away, because of we expand file, and we do not
456 * need free blocks, and zeroizes page
457 */
458 lock_kernel();
459 goto almost_end;
460 } 446 }
461 447
462 block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block); 448 block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block);
@@ -477,21 +463,8 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
477 yield(); 463 yield();
478 } 464 }
479 465
480 if (inode->i_size < old_i_size) {
481 /*
482 * now we should have enough space
483 * to allocate block for last byte
484 */
485 err = ufs_alloc_lastblock(inode);
486 if (err)
487 /*
488 * looks like all the same - we have no space,
489 * but we truncate file already
490 */
491 inode->i_size = (ufsi->i_lastfrag - 1) * uspi->s_fsize;
492 }
493almost_end:
494 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; 466 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
467 ufsi->i_lastfrag = DIRECT_FRAGMENT;
495 unlock_kernel(); 468 unlock_kernel();
496 mark_inode_dirty(inode); 469 mark_inode_dirty(inode);
497out: 470out:
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index 337cf2c46d10..22f820a9b15c 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -251,12 +251,12 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
251{ 251{
252 struct page *page; 252 struct page *page;
253 253
254try_again:
255 page = find_lock_page(mapping, index); 254 page = find_lock_page(mapping, index);
256 if (!page) { 255 if (!page) {
257 page = read_cache_page(mapping, index, 256 page = read_cache_page(mapping, index,
258 (filler_t*)mapping->a_ops->readpage, 257 (filler_t*)mapping->a_ops->readpage,
259 NULL); 258 NULL);
259
260 if (IS_ERR(page)) { 260 if (IS_ERR(page)) {
261 printk(KERN_ERR "ufs_change_blocknr: " 261 printk(KERN_ERR "ufs_change_blocknr: "
262 "read_cache_page error: ino %lu, index: %lu\n", 262 "read_cache_page error: ino %lu, index: %lu\n",
@@ -266,6 +266,14 @@ try_again:
266 266
267 lock_page(page); 267 lock_page(page);
268 268
269 if (unlikely(page->mapping == NULL)) {
270 /* Truncate got there first */
271 unlock_page(page);
272 page_cache_release(page);
273 page = NULL;
274 goto out;
275 }
276
269 if (!PageUptodate(page) || PageError(page)) { 277 if (!PageUptodate(page) || PageError(page)) {
270 unlock_page(page); 278 unlock_page(page);
271 page_cache_release(page); 279 page_cache_release(page);
@@ -275,15 +283,8 @@ try_again:
275 mapping->host->i_ino, index); 283 mapping->host->i_ino, index);
276 284
277 page = ERR_PTR(-EIO); 285 page = ERR_PTR(-EIO);
278 goto out;
279 } 286 }
280 } 287 }
281
282 if (unlikely(!page->mapping || !page_has_buffers(page))) {
283 unlock_page(page);
284 page_cache_release(page);
285 goto try_again;/*we really need these buffers*/
286 }
287out: 288out:
288 return page; 289 return page;
289} 290}
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index ceda3a2859d2..7858703ed84c 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -246,8 +246,8 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
246#define BUF_BUSY XBF_DONT_BLOCK 246#define BUF_BUSY XBF_DONT_BLOCK
247 247
248#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) 248#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags)
249#define XFS_BUF_ZEROFLAGS(bp) \ 249#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \
250 ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI)) 250 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
251 251
252#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE) 252#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE)
253#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE) 253#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE)
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 8c021dc57d1f..a13f75c1a936 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -215,7 +215,6 @@ BUFFER_FNS(PrivateStart, unwritten);
215#define MIN(a,b) (min(a,b)) 215#define MIN(a,b) (min(a,b))
216#define MAX(a,b) (max(a,b)) 216#define MAX(a,b) (max(a,b))
217#define howmany(x, y) (((x)+((y)-1))/(y)) 217#define howmany(x, y) (((x)+((y)-1))/(y))
218#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
219 218
220/* 219/*
221 * Various platform dependent calls that don't fit anywhere else 220 * Various platform dependent calls that don't fit anywhere else
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 9bdef9d51900..4754f342a5d3 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -314,6 +314,13 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
314 return; 314 return;
315 } 315 }
316 316
317 if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
318 xfs_fs_cmn_err(CE_NOTE, mp,
319 "Disabling barriers, underlying device is readonly");
320 mp->m_flags &= ~XFS_MOUNT_BARRIER;
321 return;
322 }
323
317 error = xfs_barrier_test(mp); 324 error = xfs_barrier_test(mp);
318 if (error) { 325 if (error) {
319 xfs_fs_cmn_err(CE_NOTE, mp, 326 xfs_fs_cmn_err(CE_NOTE, mp,
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index e95e99f7168f..f137856c3261 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -217,17 +217,24 @@ xfs_qm_statvfs(
217 return 0; 217 return 0;
218 dp = &dqp->q_core; 218 dp = &dqp->q_core;
219 219
220 limit = dp->d_blk_softlimit ? dp->d_blk_softlimit : dp->d_blk_hardlimit; 220 limit = dp->d_blk_softlimit ?
221 be64_to_cpu(dp->d_blk_softlimit) :
222 be64_to_cpu(dp->d_blk_hardlimit);
221 if (limit && statp->f_blocks > limit) { 223 if (limit && statp->f_blocks > limit) {
222 statp->f_blocks = limit; 224 statp->f_blocks = limit;
223 statp->f_bfree = (statp->f_blocks > dp->d_bcount) ? 225 statp->f_bfree =
224 (statp->f_blocks - dp->d_bcount) : 0; 226 (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ?
227 (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0;
225 } 228 }
226 limit = dp->d_ino_softlimit ? dp->d_ino_softlimit : dp->d_ino_hardlimit; 229
230 limit = dp->d_ino_softlimit ?
231 be64_to_cpu(dp->d_ino_softlimit) :
232 be64_to_cpu(dp->d_ino_hardlimit);
227 if (limit && statp->f_files > limit) { 233 if (limit && statp->f_files > limit) {
228 statp->f_files = limit; 234 statp->f_files = limit;
229 statp->f_ffree = (statp->f_files > dp->d_icount) ? 235 statp->f_ffree =
230 (statp->f_ffree - dp->d_icount) : 0; 236 (statp->f_files > be64_to_cpu(dp->d_icount)) ?
237 (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0;
231 } 238 }
232 239
233 xfs_qm_dqput(dqp); 240 xfs_qm_dqput(dqp);
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index eef6763f3a67..d2bbcd882a69 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1835,40 +1835,47 @@ xfs_alloc_fix_freelist(
1835 &agbp))) 1835 &agbp)))
1836 return error; 1836 return error;
1837 if (!pag->pagf_init) { 1837 if (!pag->pagf_init) {
1838 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1839 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1838 args->agbp = NULL; 1840 args->agbp = NULL;
1839 return 0; 1841 return 0;
1840 } 1842 }
1841 } else 1843 } else
1842 agbp = NULL; 1844 agbp = NULL;
1843 1845
1844 /* If this is a metadata preferred pag and we are user data 1846 /*
1847 * If this is a metadata preferred pag and we are user data
1845 * then try somewhere else if we are not being asked to 1848 * then try somewhere else if we are not being asked to
1846 * try harder at this point 1849 * try harder at this point
1847 */ 1850 */
1848 if (pag->pagf_metadata && args->userdata && flags) { 1851 if (pag->pagf_metadata && args->userdata &&
1852 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1853 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1849 args->agbp = NULL; 1854 args->agbp = NULL;
1850 return 0; 1855 return 0;
1851 } 1856 }
1852 1857
1853 need = XFS_MIN_FREELIST_PAG(pag, mp); 1858 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1854 delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0; 1859 need = XFS_MIN_FREELIST_PAG(pag, mp);
1855 /* 1860 delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
1856 * If it looks like there isn't a long enough extent, or enough 1861 /*
1857 * total blocks, reject it. 1862 * If it looks like there isn't a long enough extent, or enough
1858 */ 1863 * total blocks, reject it.
1859 longest = (pag->pagf_longest > delta) ? 1864 */
1860 (pag->pagf_longest - delta) : 1865 longest = (pag->pagf_longest > delta) ?
1861 (pag->pagf_flcount > 0 || pag->pagf_longest > 0); 1866 (pag->pagf_longest - delta) :
1862 if (args->minlen + args->alignment + args->minalignslop - 1 > longest || 1867 (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
1863 (!(flags & XFS_ALLOC_FLAG_FREEING) && 1868 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1864 (int)(pag->pagf_freeblks + pag->pagf_flcount - 1869 longest ||
1865 need - args->total) < 1870 ((int)(pag->pagf_freeblks + pag->pagf_flcount -
1866 (int)args->minleft)) { 1871 need - args->total) < (int)args->minleft)) {
1867 if (agbp) 1872 if (agbp)
1868 xfs_trans_brelse(tp, agbp); 1873 xfs_trans_brelse(tp, agbp);
1869 args->agbp = NULL; 1874 args->agbp = NULL;
1870 return 0; 1875 return 0;
1876 }
1871 } 1877 }
1878
1872 /* 1879 /*
1873 * Get the a.g. freespace buffer. 1880 * Get the a.g. freespace buffer.
1874 * Can fail if we're not blocking on locks, and it's held. 1881 * Can fail if we're not blocking on locks, and it's held.
@@ -1878,6 +1885,8 @@ xfs_alloc_fix_freelist(
1878 &agbp))) 1885 &agbp)))
1879 return error; 1886 return error;
1880 if (agbp == NULL) { 1887 if (agbp == NULL) {
1888 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1889 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1881 args->agbp = NULL; 1890 args->agbp = NULL;
1882 return 0; 1891 return 0;
1883 } 1892 }
@@ -1887,22 +1896,24 @@ xfs_alloc_fix_freelist(
1887 */ 1896 */
1888 agf = XFS_BUF_TO_AGF(agbp); 1897 agf = XFS_BUF_TO_AGF(agbp);
1889 need = XFS_MIN_FREELIST(agf, mp); 1898 need = XFS_MIN_FREELIST(agf, mp);
1890 delta = need > be32_to_cpu(agf->agf_flcount) ?
1891 (need - be32_to_cpu(agf->agf_flcount)) : 0;
1892 /* 1899 /*
1893 * If there isn't enough total or single-extent, reject it. 1900 * If there isn't enough total or single-extent, reject it.
1894 */ 1901 */
1895 longest = be32_to_cpu(agf->agf_longest); 1902 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1896 longest = (longest > delta) ? (longest - delta) : 1903 delta = need > be32_to_cpu(agf->agf_flcount) ?
1897 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); 1904 (need - be32_to_cpu(agf->agf_flcount)) : 0;
1898 if (args->minlen + args->alignment + args->minalignslop - 1 > longest || 1905 longest = be32_to_cpu(agf->agf_longest);
1899 (!(flags & XFS_ALLOC_FLAG_FREEING) && 1906 longest = (longest > delta) ? (longest - delta) :
1900 (int)(be32_to_cpu(agf->agf_freeblks) + 1907 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1901 be32_to_cpu(agf->agf_flcount) - need - args->total) < 1908 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1902 (int)args->minleft)) { 1909 longest ||
1903 xfs_trans_brelse(tp, agbp); 1910 ((int)(be32_to_cpu(agf->agf_freeblks) +
1904 args->agbp = NULL; 1911 be32_to_cpu(agf->agf_flcount) - need - args->total) <
1905 return 0; 1912 (int)args->minleft)) {
1913 xfs_trans_brelse(tp, agbp);
1914 args->agbp = NULL;
1915 return 0;
1916 }
1906 } 1917 }
1907 /* 1918 /*
1908 * Make the freelist shorter if it's too long. 1919 * Make the freelist shorter if it's too long.
@@ -1950,12 +1961,11 @@ xfs_alloc_fix_freelist(
1950 * on a completely full ag. 1961 * on a completely full ag.
1951 */ 1962 */
1952 if (targs.agbno == NULLAGBLOCK) { 1963 if (targs.agbno == NULLAGBLOCK) {
1953 if (!(flags & XFS_ALLOC_FLAG_FREEING)) { 1964 if (flags & XFS_ALLOC_FLAG_FREEING)
1954 xfs_trans_brelse(tp, agflbp); 1965 break;
1955 args->agbp = NULL; 1966 xfs_trans_brelse(tp, agflbp);
1956 return 0; 1967 args->agbp = NULL;
1957 } 1968 return 0;
1958 break;
1959 } 1969 }
1960 /* 1970 /*
1961 * Put each allocated block on the list. 1971 * Put each allocated block on the list.
@@ -2442,31 +2452,26 @@ xfs_free_extent(
2442 xfs_fsblock_t bno, /* starting block number of extent */ 2452 xfs_fsblock_t bno, /* starting block number of extent */
2443 xfs_extlen_t len) /* length of extent */ 2453 xfs_extlen_t len) /* length of extent */
2444{ 2454{
2445#ifdef DEBUG 2455 xfs_alloc_arg_t args;
2446 xfs_agf_t *agf; /* a.g. freespace header */
2447#endif
2448 xfs_alloc_arg_t args; /* allocation argument structure */
2449 int error; 2456 int error;
2450 2457
2451 ASSERT(len != 0); 2458 ASSERT(len != 0);
2459 memset(&args, 0, sizeof(xfs_alloc_arg_t));
2452 args.tp = tp; 2460 args.tp = tp;
2453 args.mp = tp->t_mountp; 2461 args.mp = tp->t_mountp;
2454 args.agno = XFS_FSB_TO_AGNO(args.mp, bno); 2462 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2455 ASSERT(args.agno < args.mp->m_sb.sb_agcount); 2463 ASSERT(args.agno < args.mp->m_sb.sb_agcount);
2456 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); 2464 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2457 args.alignment = 1;
2458 args.minlen = args.minleft = args.minalignslop = 0;
2459 down_read(&args.mp->m_peraglock); 2465 down_read(&args.mp->m_peraglock);
2460 args.pag = &args.mp->m_perag[args.agno]; 2466 args.pag = &args.mp->m_perag[args.agno];
2461 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) 2467 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
2462 goto error0; 2468 goto error0;
2463#ifdef DEBUG 2469#ifdef DEBUG
2464 ASSERT(args.agbp != NULL); 2470 ASSERT(args.agbp != NULL);
2465 agf = XFS_BUF_TO_AGF(args.agbp); 2471 ASSERT((args.agbno + len) <=
2466 ASSERT(args.agbno + len <= be32_to_cpu(agf->agf_length)); 2472 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length));
2467#endif 2473#endif
2468 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, 2474 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2469 len, 0);
2470error0: 2475error0:
2471 up_read(&args.mp->m_peraglock); 2476 up_read(&args.mp->m_peraglock);
2472 return error; 2477 return error;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 86c1bf0bba9e..1f8ecff8553a 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -334,10 +334,9 @@ xfs_itobp(
334#if !defined(__KERNEL__) 334#if !defined(__KERNEL__)
335 ni = 0; 335 ni = 0;
336#elif defined(DEBUG) 336#elif defined(DEBUG)
337 ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : 337 ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog;
338 (BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog);
339#else /* usual case */ 338#else /* usual case */
340 ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : 1; 339 ni = 1;
341#endif 340#endif
342 341
343 for (i = 0; i < ni; i++) { 342 for (i = 0; i < ni; i++) {
@@ -348,11 +347,15 @@ xfs_itobp(
348 (i << mp->m_sb.sb_inodelog)); 347 (i << mp->m_sb.sb_inodelog));
349 di_ok = INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC && 348 di_ok = INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC &&
350 XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT)); 349 XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT));
351 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, 350 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
352 XFS_RANDOM_ITOBP_INOTOBP))) { 351 XFS_ERRTAG_ITOBP_INOTOBP,
352 XFS_RANDOM_ITOBP_INOTOBP))) {
353 if (imap_flags & XFS_IMAP_BULKSTAT) {
354 xfs_trans_brelse(tp, bp);
355 return XFS_ERROR(EINVAL);
356 }
353#ifdef DEBUG 357#ifdef DEBUG
354 if (!(imap_flags & XFS_IMAP_BULKSTAT)) 358 cmn_err(CE_ALERT,
355 cmn_err(CE_ALERT,
356 "Device %s - bad inode magic/vsn " 359 "Device %s - bad inode magic/vsn "
357 "daddr %lld #%d (magic=%x)", 360 "daddr %lld #%d (magic=%x)",
358 XFS_BUFTARG_NAME(mp->m_ddev_targp), 361 XFS_BUFTARG_NAME(mp->m_ddev_targp),
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index e730328636c3..21ac1a67e3e0 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1413,7 +1413,7 @@ xlog_sync(xlog_t *log,
1413 ops = iclog->ic_header.h_num_logops; 1413 ops = iclog->ic_header.h_num_logops;
1414 INT_SET(iclog->ic_header.h_num_logops, ARCH_CONVERT, ops); 1414 INT_SET(iclog->ic_header.h_num_logops, ARCH_CONVERT, ops);
1415 1415
1416 bp = iclog->ic_bp; 1416 bp = iclog->ic_bp;
1417 ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1); 1417 ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1);
1418 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); 1418 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2);
1419 XFS_BUF_SET_ADDR(bp, BLOCK_LSN(INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT))); 1419 XFS_BUF_SET_ADDR(bp, BLOCK_LSN(INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT)));
@@ -1430,15 +1430,14 @@ xlog_sync(xlog_t *log,
1430 } 1430 }
1431 XFS_BUF_SET_PTR(bp, (xfs_caddr_t) &(iclog->ic_header), count); 1431 XFS_BUF_SET_PTR(bp, (xfs_caddr_t) &(iclog->ic_header), count);
1432 XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */ 1432 XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */
1433 XFS_BUF_ZEROFLAGS(bp);
1433 XFS_BUF_BUSY(bp); 1434 XFS_BUF_BUSY(bp);
1434 XFS_BUF_ASYNC(bp); 1435 XFS_BUF_ASYNC(bp);
1435 /* 1436 /*
1436 * Do an ordered write for the log block. 1437 * Do an ordered write for the log block.
1437 * 1438 * Its unnecessary to flush the first split block in the log wrap case.
1438 * It may not be needed to flush the first split block in the log wrap
1439 * case, but do it anyways to be safe -AK
1440 */ 1439 */
1441 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) 1440 if (!split && (log->l_mp->m_flags & XFS_MOUNT_BARRIER))
1442 XFS_BUF_ORDERED(bp); 1441 XFS_BUF_ORDERED(bp);
1443 1442
1444 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1443 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
@@ -1460,7 +1459,7 @@ xlog_sync(xlog_t *log,
1460 return error; 1459 return error;
1461 } 1460 }
1462 if (split) { 1461 if (split) {
1463 bp = iclog->ic_log->l_xbuf; 1462 bp = iclog->ic_log->l_xbuf;
1464 ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == 1463 ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) ==
1465 (unsigned long)1); 1464 (unsigned long)1);
1466 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); 1465 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2);
@@ -1468,6 +1467,7 @@ xlog_sync(xlog_t *log,
1468 XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+ 1467 XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+
1469 (__psint_t)count), split); 1468 (__psint_t)count), split);
1470 XFS_BUF_SET_FSPRIVATE(bp, iclog); 1469 XFS_BUF_SET_FSPRIVATE(bp, iclog);
1470 XFS_BUF_ZEROFLAGS(bp);
1471 XFS_BUF_BUSY(bp); 1471 XFS_BUF_BUSY(bp);
1472 XFS_BUF_ASYNC(bp); 1472 XFS_BUF_ASYNC(bp);
1473 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) 1473 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 6c96391f3f1a..b427d220a169 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -515,7 +515,7 @@ xfs_mount(
515 if (error) 515 if (error)
516 goto error2; 516 goto error2;
517 517
518 if ((mp->m_flags & XFS_MOUNT_BARRIER) && !(vfsp->vfs_flag & VFS_RDONLY)) 518 if (mp->m_flags & XFS_MOUNT_BARRIER)
519 xfs_mountfs_check_barriers(mp); 519 xfs_mountfs_check_barriers(mp);
520 520
521 error = XFS_IOINIT(vfsp, args, flags); 521 error = XFS_IOINIT(vfsp, args, flags);