diff options
Diffstat (limited to 'fs')
135 files changed, 3413 insertions, 1449 deletions
diff --git a/fs/9p/conv.c b/fs/9p/conv.c index 1e898144eb7c..56d88c1a09c5 100644 --- a/fs/9p/conv.c +++ b/fs/9p/conv.c | |||
@@ -673,8 +673,10 @@ struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode, | |||
673 | struct cbuf *bufp = &buffer; | 673 | struct cbuf *bufp = &buffer; |
674 | 674 | ||
675 | size = 4 + 2 + strlen(name) + 4 + 1; /* fid[4] name[s] perm[4] mode[1] */ | 675 | size = 4 + 2 + strlen(name) + 4 + 1; /* fid[4] name[s] perm[4] mode[1] */ |
676 | if (extended && extension!=NULL) | 676 | if (extended) { |
677 | size += 2 + strlen(extension); /* extension[s] */ | 677 | size += 2 + /* extension[s] */ |
678 | (extension == NULL ? 0 : strlen(extension)); | ||
679 | } | ||
678 | 680 | ||
679 | fc = v9fs_create_common(bufp, size, TCREATE); | 681 | fc = v9fs_create_common(bufp, size, TCREATE); |
680 | if (IS_ERR(fc)) | 682 | if (IS_ERR(fc)) |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 2f580a197b8d..eae50c9d6dc4 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -434,11 +434,11 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) | |||
434 | result = v9fs_t_remove(v9ses, fid, &fcall); | 434 | result = v9fs_t_remove(v9ses, fid, &fcall); |
435 | if (result < 0) { | 435 | if (result < 0) { |
436 | PRINT_FCALL_ERROR("remove fails", fcall); | 436 | PRINT_FCALL_ERROR("remove fails", fcall); |
437 | } else { | ||
438 | v9fs_put_idpool(fid, &v9ses->fidpool); | ||
439 | v9fs_fid_destroy(v9fid); | ||
440 | } | 437 | } |
441 | 438 | ||
439 | v9fs_put_idpool(fid, &v9ses->fidpool); | ||
440 | v9fs_fid_destroy(v9fid); | ||
441 | |||
442 | kfree(fcall); | 442 | kfree(fcall); |
443 | return result; | 443 | return result; |
444 | } | 444 | } |
diff --git a/fs/Kconfig b/fs/Kconfig index 53f5c6d61121..3f00a9faabcb 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -1801,6 +1801,7 @@ config CIFS_POSIX | |||
1801 | 1801 | ||
1802 | config CIFS_DEBUG2 | 1802 | config CIFS_DEBUG2 |
1803 | bool "Enable additional CIFS debugging routines" | 1803 | bool "Enable additional CIFS debugging routines" |
1804 | depends on CIFS | ||
1804 | help | 1805 | help |
1805 | Enabling this option adds a few more debugging routines | 1806 | Enabling this option adds a few more debugging routines |
1806 | to the cifs code which slightly increases the size of | 1807 | to the cifs code which slightly increases the size of |
diff --git a/fs/adfs/super.c b/fs/adfs/super.c index ba1c88af49fe..82011019494c 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c | |||
@@ -308,7 +308,7 @@ static struct adfs_discmap *adfs_read_map(struct super_block *sb, struct adfs_di | |||
308 | if (adfs_checkmap(sb, dm)) | 308 | if (adfs_checkmap(sb, dm)) |
309 | return dm; | 309 | return dm; |
310 | 310 | ||
311 | adfs_error(sb, NULL, "map corrupted"); | 311 | adfs_error(sb, "map corrupted"); |
312 | 312 | ||
313 | error_free: | 313 | error_free: |
314 | while (--zone >= 0) | 314 | while (--zone >= 0) |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index fcaeead9696b..50cfca5c7efd 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -512,7 +512,11 @@ befs_utf2nls(struct super_block *sb, const char *in, | |||
512 | wchar_t uni; | 512 | wchar_t uni; |
513 | int unilen, utflen; | 513 | int unilen, utflen; |
514 | char *result; | 514 | char *result; |
515 | int maxlen = in_len; /* The utf8->nls conversion can't make more chars */ | 515 | /* The utf8->nls conversion won't make the final nls string bigger |
516 | * than the utf one, but if the string is pure ascii they'll have the | ||
517 | * same width and an extra char is needed to save the additional \0 | ||
518 | */ | ||
519 | int maxlen = in_len + 1; | ||
516 | 520 | ||
517 | befs_debug(sb, "---> utf2nls()"); | 521 | befs_debug(sb, "---> utf2nls()"); |
518 | 522 | ||
@@ -588,7 +592,10 @@ befs_nls2utf(struct super_block *sb, const char *in, | |||
588 | wchar_t uni; | 592 | wchar_t uni; |
589 | int unilen, utflen; | 593 | int unilen, utflen; |
590 | char *result; | 594 | char *result; |
591 | int maxlen = 3 * in_len; | 595 | /* There're nls characters that will translate to 3-chars-wide UTF-8 |
596 | * characters, a additional byte is needed to save the final \0 | ||
597 | * in special cases */ | ||
598 | int maxlen = (3 * in_len) + 1; | ||
592 | 599 | ||
593 | befs_debug(sb, "---> nls2utf()\n"); | 600 | befs_debug(sb, "---> nls2utf()\n"); |
594 | 601 | ||
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d0434406eaeb..672a3b90bc55 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -84,7 +84,7 @@ static struct linux_binfmt elf_format = { | |||
84 | .min_coredump = ELF_EXEC_PAGESIZE | 84 | .min_coredump = ELF_EXEC_PAGESIZE |
85 | }; | 85 | }; |
86 | 86 | ||
87 | #define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE) | 87 | #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) |
88 | 88 | ||
89 | static int set_brk(unsigned long start, unsigned long end) | 89 | static int set_brk(unsigned long start, unsigned long end) |
90 | { | 90 | { |
@@ -394,7 +394,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
394 | * <= p_memsize so it's only necessary to check p_memsz. | 394 | * <= p_memsize so it's only necessary to check p_memsz. |
395 | */ | 395 | */ |
396 | k = load_addr + eppnt->p_vaddr; | 396 | k = load_addr + eppnt->p_vaddr; |
397 | if (k > TASK_SIZE || | 397 | if (BAD_ADDR(k) || |
398 | eppnt->p_filesz > eppnt->p_memsz || | 398 | eppnt->p_filesz > eppnt->p_memsz || |
399 | eppnt->p_memsz > TASK_SIZE || | 399 | eppnt->p_memsz > TASK_SIZE || |
400 | TASK_SIZE - eppnt->p_memsz < k) { | 400 | TASK_SIZE - eppnt->p_memsz < k) { |
@@ -887,7 +887,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
887 | * allowed task size. Note that p_filesz must always be | 887 | * allowed task size. Note that p_filesz must always be |
888 | * <= p_memsz so it is only necessary to check p_memsz. | 888 | * <= p_memsz so it is only necessary to check p_memsz. |
889 | */ | 889 | */ |
890 | if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz || | 890 | if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz || |
891 | elf_ppnt->p_memsz > TASK_SIZE || | 891 | elf_ppnt->p_memsz > TASK_SIZE || |
892 | TASK_SIZE - elf_ppnt->p_memsz < k) { | 892 | TASK_SIZE - elf_ppnt->p_memsz < k) { |
893 | /* set_brk can never work. Avoid overflows. */ | 893 | /* set_brk can never work. Avoid overflows. */ |
@@ -941,10 +941,9 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
941 | interpreter, | 941 | interpreter, |
942 | &interp_load_addr); | 942 | &interp_load_addr); |
943 | if (BAD_ADDR(elf_entry)) { | 943 | if (BAD_ADDR(elf_entry)) { |
944 | printk(KERN_ERR "Unable to load interpreter %.128s\n", | ||
945 | elf_interpreter); | ||
946 | force_sig(SIGSEGV, current); | 944 | force_sig(SIGSEGV, current); |
947 | retval = -ENOEXEC; /* Nobody gets to see this, but.. */ | 945 | retval = IS_ERR((void *)elf_entry) ? |
946 | (int)elf_entry : -EINVAL; | ||
948 | goto out_free_dentry; | 947 | goto out_free_dentry; |
949 | } | 948 | } |
950 | reloc_func_desc = interp_load_addr; | 949 | reloc_func_desc = interp_load_addr; |
@@ -955,8 +954,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
955 | } else { | 954 | } else { |
956 | elf_entry = loc->elf_ex.e_entry; | 955 | elf_entry = loc->elf_ex.e_entry; |
957 | if (BAD_ADDR(elf_entry)) { | 956 | if (BAD_ADDR(elf_entry)) { |
958 | send_sig(SIGSEGV, current, 0); | 957 | force_sig(SIGSEGV, current); |
959 | retval = -ENOEXEC; /* Nobody gets to see this, but.. */ | 958 | retval = -EINVAL; |
960 | goto out_free_dentry; | 959 | goto out_free_dentry; |
961 | } | 960 | } |
962 | } | 961 | } |
@@ -1186,8 +1185,6 @@ static int maydump(struct vm_area_struct *vma) | |||
1186 | return 1; | 1185 | return 1; |
1187 | } | 1186 | } |
1188 | 1187 | ||
1189 | #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) | ||
1190 | |||
1191 | /* An ELF note in memory */ | 1188 | /* An ELF note in memory */ |
1192 | struct memelfnote | 1189 | struct memelfnote |
1193 | { | 1190 | { |
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index eba4e23b9ca0..2f3365829229 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* binfmt_elf_fdpic.c: FDPIC ELF binary format | 1 | /* binfmt_elf_fdpic.c: FDPIC ELF binary format |
2 | * | 2 | * |
3 | * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved. | 3 | * Copyright (C) 2003, 2004, 2006 Red Hat, Inc. All Rights Reserved. |
4 | * Written by David Howells (dhowells@redhat.com) | 4 | * Written by David Howells (dhowells@redhat.com) |
5 | * Derived from binfmt_elf.c | 5 | * Derived from binfmt_elf.c |
6 | * | 6 | * |
@@ -24,7 +24,9 @@ | |||
24 | #include <linux/file.h> | 24 | #include <linux/file.h> |
25 | #include <linux/fcntl.h> | 25 | #include <linux/fcntl.h> |
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/pagemap.h> | ||
27 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
29 | #include <linux/highuid.h> | ||
28 | #include <linux/personality.h> | 30 | #include <linux/personality.h> |
29 | #include <linux/ptrace.h> | 31 | #include <linux/ptrace.h> |
30 | #include <linux/init.h> | 32 | #include <linux/init.h> |
@@ -48,45 +50,59 @@ typedef char *elf_caddr_t; | |||
48 | #define kdebug(fmt, ...) do {} while(0) | 50 | #define kdebug(fmt, ...) do {} while(0) |
49 | #endif | 51 | #endif |
50 | 52 | ||
53 | #if 0 | ||
54 | #define kdcore(fmt, ...) printk("FDPIC "fmt"\n" ,##__VA_ARGS__ ) | ||
55 | #else | ||
56 | #define kdcore(fmt, ...) do {} while(0) | ||
57 | #endif | ||
58 | |||
51 | MODULE_LICENSE("GPL"); | 59 | MODULE_LICENSE("GPL"); |
52 | 60 | ||
53 | static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs); | 61 | static int load_elf_fdpic_binary(struct linux_binprm *, struct pt_regs *); |
54 | //static int load_elf_fdpic_library(struct file *); | 62 | static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *, struct file *); |
55 | static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct file *file); | 63 | static int elf_fdpic_map_file(struct elf_fdpic_params *, struct file *, |
56 | static int elf_fdpic_map_file(struct elf_fdpic_params *params, | 64 | struct mm_struct *, const char *); |
57 | struct file *file, | ||
58 | struct mm_struct *mm, | ||
59 | const char *what); | ||
60 | 65 | ||
61 | static int create_elf_fdpic_tables(struct linux_binprm *bprm, | 66 | static int create_elf_fdpic_tables(struct linux_binprm *, struct mm_struct *, |
62 | struct mm_struct *mm, | 67 | struct elf_fdpic_params *, |
63 | struct elf_fdpic_params *exec_params, | 68 | struct elf_fdpic_params *); |
64 | struct elf_fdpic_params *interp_params); | ||
65 | 69 | ||
66 | #ifndef CONFIG_MMU | 70 | #ifndef CONFIG_MMU |
67 | static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm, unsigned long *_sp); | 71 | static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *, |
68 | static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *params, | 72 | unsigned long *); |
69 | struct file *file, | 73 | static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *, |
70 | struct mm_struct *mm); | 74 | struct file *, |
75 | struct mm_struct *); | ||
71 | #endif | 76 | #endif |
72 | 77 | ||
73 | static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, | 78 | static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *, |
74 | struct file *file, | 79 | struct file *, struct mm_struct *); |
75 | struct mm_struct *mm); | 80 | |
81 | #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) | ||
82 | static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *); | ||
83 | #endif | ||
76 | 84 | ||
77 | static struct linux_binfmt elf_fdpic_format = { | 85 | static struct linux_binfmt elf_fdpic_format = { |
78 | .module = THIS_MODULE, | 86 | .module = THIS_MODULE, |
79 | .load_binary = load_elf_fdpic_binary, | 87 | .load_binary = load_elf_fdpic_binary, |
80 | // .load_shlib = load_elf_fdpic_library, | 88 | #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) |
81 | // .core_dump = elf_fdpic_core_dump, | 89 | .core_dump = elf_fdpic_core_dump, |
90 | #endif | ||
82 | .min_coredump = ELF_EXEC_PAGESIZE, | 91 | .min_coredump = ELF_EXEC_PAGESIZE, |
83 | }; | 92 | }; |
84 | 93 | ||
85 | static int __init init_elf_fdpic_binfmt(void) { return register_binfmt(&elf_fdpic_format); } | 94 | static int __init init_elf_fdpic_binfmt(void) |
86 | static void __exit exit_elf_fdpic_binfmt(void) { unregister_binfmt(&elf_fdpic_format); } | 95 | { |
96 | return register_binfmt(&elf_fdpic_format); | ||
97 | } | ||
98 | |||
99 | static void __exit exit_elf_fdpic_binfmt(void) | ||
100 | { | ||
101 | unregister_binfmt(&elf_fdpic_format); | ||
102 | } | ||
87 | 103 | ||
88 | module_init(init_elf_fdpic_binfmt) | 104 | core_initcall(init_elf_fdpic_binfmt); |
89 | module_exit(exit_elf_fdpic_binfmt) | 105 | module_exit(exit_elf_fdpic_binfmt); |
90 | 106 | ||
91 | static int is_elf_fdpic(struct elfhdr *hdr, struct file *file) | 107 | static int is_elf_fdpic(struct elfhdr *hdr, struct file *file) |
92 | { | 108 | { |
@@ -105,7 +121,8 @@ static int is_elf_fdpic(struct elfhdr *hdr, struct file *file) | |||
105 | /* | 121 | /* |
106 | * read the program headers table into memory | 122 | * read the program headers table into memory |
107 | */ | 123 | */ |
108 | static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct file *file) | 124 | static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, |
125 | struct file *file) | ||
109 | { | 126 | { |
110 | struct elf32_phdr *phdr; | 127 | struct elf32_phdr *phdr; |
111 | unsigned long size; | 128 | unsigned long size; |
@@ -121,7 +138,8 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct file *f | |||
121 | if (!params->phdrs) | 138 | if (!params->phdrs) |
122 | return -ENOMEM; | 139 | return -ENOMEM; |
123 | 140 | ||
124 | retval = kernel_read(file, params->hdr.e_phoff, (char *) params->phdrs, size); | 141 | retval = kernel_read(file, params->hdr.e_phoff, |
142 | (char *) params->phdrs, size); | ||
125 | if (retval < 0) | 143 | if (retval < 0) |
126 | return retval; | 144 | return retval; |
127 | 145 | ||
@@ -141,17 +159,24 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, struct file *f | |||
141 | } | 159 | } |
142 | 160 | ||
143 | return 0; | 161 | return 0; |
144 | } /* end elf_fdpic_fetch_phdrs() */ | 162 | } |
145 | 163 | ||
146 | /*****************************************************************************/ | 164 | /*****************************************************************************/ |
147 | /* | 165 | /* |
148 | * load an fdpic binary into various bits of memory | 166 | * load an fdpic binary into various bits of memory |
149 | */ | 167 | */ |
150 | static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs) | 168 | static int load_elf_fdpic_binary(struct linux_binprm *bprm, |
169 | struct pt_regs *regs) | ||
151 | { | 170 | { |
152 | struct elf_fdpic_params exec_params, interp_params; | 171 | struct elf_fdpic_params exec_params, interp_params; |
153 | struct elf_phdr *phdr; | 172 | struct elf_phdr *phdr; |
154 | unsigned long stack_size; | 173 | unsigned long stack_size, entryaddr; |
174 | #ifndef CONFIG_MMU | ||
175 | unsigned long fullsize; | ||
176 | #endif | ||
177 | #ifdef ELF_FDPIC_PLAT_INIT | ||
178 | unsigned long dynaddr; | ||
179 | #endif | ||
155 | struct file *interpreter = NULL; /* to shut gcc up */ | 180 | struct file *interpreter = NULL; /* to shut gcc up */ |
156 | char *interpreter_name = NULL; | 181 | char *interpreter_name = NULL; |
157 | int executable_stack; | 182 | int executable_stack; |
@@ -212,7 +237,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs | |||
212 | goto error; | 237 | goto error; |
213 | } | 238 | } |
214 | 239 | ||
215 | retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE); | 240 | retval = kernel_read(interpreter, 0, bprm->buf, |
241 | BINPRM_BUF_SIZE); | ||
216 | if (retval < 0) | 242 | if (retval < 0) |
217 | goto error; | 243 | goto error; |
218 | 244 | ||
@@ -295,7 +321,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs | |||
295 | ¤t->mm->start_stack, | 321 | ¤t->mm->start_stack, |
296 | ¤t->mm->start_brk); | 322 | ¤t->mm->start_brk); |
297 | 323 | ||
298 | retval = setup_arg_pages(bprm, current->mm->start_stack, executable_stack); | 324 | retval = setup_arg_pages(bprm, current->mm->start_stack, |
325 | executable_stack); | ||
299 | if (retval < 0) { | 326 | if (retval < 0) { |
300 | send_sig(SIGKILL, current, 0); | 327 | send_sig(SIGKILL, current, 0); |
301 | goto error_kill; | 328 | goto error_kill; |
@@ -303,7 +330,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs | |||
303 | #endif | 330 | #endif |
304 | 331 | ||
305 | /* load the executable and interpreter into memory */ | 332 | /* load the executable and interpreter into memory */ |
306 | retval = elf_fdpic_map_file(&exec_params, bprm->file, current->mm, "executable"); | 333 | retval = elf_fdpic_map_file(&exec_params, bprm->file, current->mm, |
334 | "executable"); | ||
307 | if (retval < 0) | 335 | if (retval < 0) |
308 | goto error_kill; | 336 | goto error_kill; |
309 | 337 | ||
@@ -324,7 +352,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs | |||
324 | if (!current->mm->start_brk) | 352 | if (!current->mm->start_brk) |
325 | current->mm->start_brk = current->mm->end_data; | 353 | current->mm->start_brk = current->mm->end_data; |
326 | 354 | ||
327 | current->mm->brk = current->mm->start_brk = PAGE_ALIGN(current->mm->start_brk); | 355 | current->mm->brk = current->mm->start_brk = |
356 | PAGE_ALIGN(current->mm->start_brk); | ||
328 | 357 | ||
329 | #else | 358 | #else |
330 | /* create a stack and brk area big enough for everyone | 359 | /* create a stack and brk area big enough for everyone |
@@ -336,47 +365,45 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs | |||
336 | stack_size = PAGE_SIZE * 2; | 365 | stack_size = PAGE_SIZE * 2; |
337 | 366 | ||
338 | down_write(¤t->mm->mmap_sem); | 367 | down_write(¤t->mm->mmap_sem); |
339 | current->mm->start_brk = do_mmap(NULL, | 368 | current->mm->start_brk = do_mmap(NULL, 0, stack_size, |
340 | 0, | ||
341 | stack_size, | ||
342 | PROT_READ | PROT_WRITE | PROT_EXEC, | 369 | PROT_READ | PROT_WRITE | PROT_EXEC, |
343 | MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN, | 370 | MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN, |
344 | 0); | 371 | 0); |
345 | 372 | ||
346 | if (IS_ERR((void *) current->mm->start_brk)) { | 373 | if (IS_ERR_VALUE(current->mm->start_brk)) { |
347 | up_write(¤t->mm->mmap_sem); | 374 | up_write(¤t->mm->mmap_sem); |
348 | retval = current->mm->start_brk; | 375 | retval = current->mm->start_brk; |
349 | current->mm->start_brk = 0; | 376 | current->mm->start_brk = 0; |
350 | goto error_kill; | 377 | goto error_kill; |
351 | } | 378 | } |
352 | 379 | ||
353 | if (do_mremap(current->mm->start_brk, | 380 | /* expand the stack mapping to use up the entire allocation granule */ |
354 | stack_size, | 381 | fullsize = ksize((char *) current->mm->start_brk); |
355 | ksize((char *) current->mm->start_brk), | 382 | if (!IS_ERR_VALUE(do_mremap(current->mm->start_brk, stack_size, |
356 | 0, 0 | 383 | fullsize, 0, 0))) |
357 | ) == current->mm->start_brk | 384 | stack_size = fullsize; |
358 | ) | ||
359 | stack_size = ksize((char *) current->mm->start_brk); | ||
360 | up_write(¤t->mm->mmap_sem); | 385 | up_write(¤t->mm->mmap_sem); |
361 | 386 | ||
362 | current->mm->brk = current->mm->start_brk; | 387 | current->mm->brk = current->mm->start_brk; |
363 | current->mm->context.end_brk = current->mm->start_brk; | 388 | current->mm->context.end_brk = current->mm->start_brk; |
364 | current->mm->context.end_brk += (stack_size > PAGE_SIZE) ? (stack_size - PAGE_SIZE) : 0; | 389 | current->mm->context.end_brk += |
390 | (stack_size > PAGE_SIZE) ? (stack_size - PAGE_SIZE) : 0; | ||
365 | current->mm->start_stack = current->mm->start_brk + stack_size; | 391 | current->mm->start_stack = current->mm->start_brk + stack_size; |
366 | #endif | 392 | #endif |
367 | 393 | ||
368 | compute_creds(bprm); | 394 | compute_creds(bprm); |
369 | current->flags &= ~PF_FORKNOEXEC; | 395 | current->flags &= ~PF_FORKNOEXEC; |
370 | if (create_elf_fdpic_tables(bprm, current->mm, &exec_params, &interp_params) < 0) | 396 | if (create_elf_fdpic_tables(bprm, current->mm, |
397 | &exec_params, &interp_params) < 0) | ||
371 | goto error_kill; | 398 | goto error_kill; |
372 | 399 | ||
373 | kdebug("- start_code %lx", (long) current->mm->start_code); | 400 | kdebug("- start_code %lx", current->mm->start_code); |
374 | kdebug("- end_code %lx", (long) current->mm->end_code); | 401 | kdebug("- end_code %lx", current->mm->end_code); |
375 | kdebug("- start_data %lx", (long) current->mm->start_data); | 402 | kdebug("- start_data %lx", current->mm->start_data); |
376 | kdebug("- end_data %lx", (long) current->mm->end_data); | 403 | kdebug("- end_data %lx", current->mm->end_data); |
377 | kdebug("- start_brk %lx", (long) current->mm->start_brk); | 404 | kdebug("- start_brk %lx", current->mm->start_brk); |
378 | kdebug("- brk %lx", (long) current->mm->brk); | 405 | kdebug("- brk %lx", current->mm->brk); |
379 | kdebug("- start_stack %lx", (long) current->mm->start_stack); | 406 | kdebug("- start_stack %lx", current->mm->start_stack); |
380 | 407 | ||
381 | #ifdef ELF_FDPIC_PLAT_INIT | 408 | #ifdef ELF_FDPIC_PLAT_INIT |
382 | /* | 409 | /* |
@@ -385,21 +412,18 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct pt_regs *regs | |||
385 | * example. This macro performs whatever initialization to | 412 | * example. This macro performs whatever initialization to |
386 | * the regs structure is required. | 413 | * the regs structure is required. |
387 | */ | 414 | */ |
388 | ELF_FDPIC_PLAT_INIT(regs, | 415 | dynaddr = interp_params.dynamic_addr ?: exec_params.dynamic_addr; |
389 | exec_params.map_addr, | 416 | ELF_FDPIC_PLAT_INIT(regs, exec_params.map_addr, interp_params.map_addr, |
390 | interp_params.map_addr, | 417 | dynaddr); |
391 | interp_params.dynamic_addr ?: exec_params.dynamic_addr | ||
392 | ); | ||
393 | #endif | 418 | #endif |
394 | 419 | ||
395 | /* everything is now ready... get the userspace context ready to roll */ | 420 | /* everything is now ready... get the userspace context ready to roll */ |
396 | start_thread(regs, | 421 | entryaddr = interp_params.entry_addr ?: exec_params.entry_addr; |
397 | interp_params.entry_addr ?: exec_params.entry_addr, | 422 | start_thread(regs, entryaddr, current->mm->start_stack); |
398 | current->mm->start_stack); | ||
399 | 423 | ||
400 | if (unlikely(current->ptrace & PT_PTRACED)) { | 424 | if (unlikely(current->ptrace & PT_PTRACED)) { |
401 | if (current->ptrace & PT_TRACE_EXEC) | 425 | if (current->ptrace & PT_TRACE_EXEC) |
402 | ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP); | 426 | ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP); |
403 | else | 427 | else |
404 | send_sig(SIGTRAP, current, 0); | 428 | send_sig(SIGTRAP, current, 0); |
405 | } | 429 | } |
@@ -419,11 +443,11 @@ error: | |||
419 | return retval; | 443 | return retval; |
420 | 444 | ||
421 | /* unrecoverable error - kill the process */ | 445 | /* unrecoverable error - kill the process */ |
422 | error_kill: | 446 | error_kill: |
423 | send_sig(SIGSEGV, current, 0); | 447 | send_sig(SIGSEGV, current, 0); |
424 | goto error; | 448 | goto error; |
425 | 449 | ||
426 | } /* end load_elf_fdpic_binary() */ | 450 | } |
427 | 451 | ||
428 | /*****************************************************************************/ | 452 | /*****************************************************************************/ |
429 | /* | 453 | /* |
@@ -459,6 +483,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
459 | */ | 483 | */ |
460 | hwcap = ELF_HWCAP; | 484 | hwcap = ELF_HWCAP; |
461 | k_platform = ELF_PLATFORM; | 485 | k_platform = ELF_PLATFORM; |
486 | u_platform = NULL; | ||
462 | 487 | ||
463 | if (k_platform) { | 488 | if (k_platform) { |
464 | platform_len = strlen(k_platform) + 1; | 489 | platform_len = strlen(k_platform) + 1; |
@@ -470,11 +495,11 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
470 | 495 | ||
471 | #if defined(__i386__) && defined(CONFIG_SMP) | 496 | #if defined(__i386__) && defined(CONFIG_SMP) |
472 | /* in some cases (e.g. Hyper-Threading), we want to avoid L1 evictions | 497 | /* in some cases (e.g. Hyper-Threading), we want to avoid L1 evictions |
473 | * by the processes running on the same package. One thing we can do | 498 | * by the processes running on the same package. One thing we can do is |
474 | * is to shuffle the initial stack for them. | 499 | * to shuffle the initial stack for them. |
475 | * | 500 | * |
476 | * the conditionals here are unneeded, but kept in to make the | 501 | * the conditionals here are unneeded, but kept in to make the code |
477 | * code behaviour the same as pre change unless we have hyperthreaded | 502 | * behaviour the same as pre change unless we have hyperthreaded |
478 | * processors. This keeps Mr Marcelo Person happier but should be | 503 | * processors. This keeps Mr Marcelo Person happier but should be |
479 | * removed for 2.5 | 504 | * removed for 2.5 |
480 | */ | 505 | */ |
@@ -497,11 +522,13 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
497 | 522 | ||
498 | if (interp_params->loadmap) { | 523 | if (interp_params->loadmap) { |
499 | len = sizeof(struct elf32_fdpic_loadmap); | 524 | len = sizeof(struct elf32_fdpic_loadmap); |
500 | len += sizeof(struct elf32_fdpic_loadseg) * interp_params->loadmap->nsegs; | 525 | len += sizeof(struct elf32_fdpic_loadseg) * |
526 | interp_params->loadmap->nsegs; | ||
501 | sp = (sp - len) & ~7UL; | 527 | sp = (sp - len) & ~7UL; |
502 | interp_params->map_addr = sp; | 528 | interp_params->map_addr = sp; |
503 | 529 | ||
504 | if (copy_to_user((void __user *) sp, interp_params->loadmap, len) != 0) | 530 | if (copy_to_user((void __user *) sp, interp_params->loadmap, |
531 | len) != 0) | ||
505 | return -EFAULT; | 532 | return -EFAULT; |
506 | 533 | ||
507 | current->mm->context.interp_fdpic_loadmap = (unsigned long) sp; | 534 | current->mm->context.interp_fdpic_loadmap = (unsigned long) sp; |
@@ -525,34 +552,37 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
525 | sp -= sp & 15UL; | 552 | sp -= sp & 15UL; |
526 | 553 | ||
527 | /* put the ELF interpreter info on the stack */ | 554 | /* put the ELF interpreter info on the stack */ |
528 | #define NEW_AUX_ENT(nr, id, val) \ | 555 | #define NEW_AUX_ENT(nr, id, val) \ |
529 | do { \ | 556 | do { \ |
530 | struct { unsigned long _id, _val; } __user *ent = (void __user *) csp; \ | 557 | struct { unsigned long _id, _val; } __user *ent; \ |
531 | __put_user((id), &ent[nr]._id); \ | 558 | \ |
532 | __put_user((val), &ent[nr]._val); \ | 559 | ent = (void __user *) csp; \ |
560 | __put_user((id), &ent[nr]._id); \ | ||
561 | __put_user((val), &ent[nr]._val); \ | ||
533 | } while (0) | 562 | } while (0) |
534 | 563 | ||
535 | csp -= 2 * sizeof(unsigned long); | 564 | csp -= 2 * sizeof(unsigned long); |
536 | NEW_AUX_ENT(0, AT_NULL, 0); | 565 | NEW_AUX_ENT(0, AT_NULL, 0); |
537 | if (k_platform) { | 566 | if (k_platform) { |
538 | csp -= 2 * sizeof(unsigned long); | 567 | csp -= 2 * sizeof(unsigned long); |
539 | NEW_AUX_ENT(0, AT_PLATFORM, (elf_addr_t)(unsigned long) u_platform); | 568 | NEW_AUX_ENT(0, AT_PLATFORM, |
569 | (elf_addr_t) (unsigned long) u_platform); | ||
540 | } | 570 | } |
541 | 571 | ||
542 | csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long); | 572 | csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long); |
543 | NEW_AUX_ENT( 0, AT_HWCAP, hwcap); | 573 | NEW_AUX_ENT( 0, AT_HWCAP, hwcap); |
544 | NEW_AUX_ENT( 1, AT_PAGESZ, PAGE_SIZE); | 574 | NEW_AUX_ENT( 1, AT_PAGESZ, PAGE_SIZE); |
545 | NEW_AUX_ENT( 2, AT_CLKTCK, CLOCKS_PER_SEC); | 575 | NEW_AUX_ENT( 2, AT_CLKTCK, CLOCKS_PER_SEC); |
546 | NEW_AUX_ENT( 3, AT_PHDR, exec_params->ph_addr); | 576 | NEW_AUX_ENT( 3, AT_PHDR, exec_params->ph_addr); |
547 | NEW_AUX_ENT( 4, AT_PHENT, sizeof(struct elf_phdr)); | 577 | NEW_AUX_ENT( 4, AT_PHENT, sizeof(struct elf_phdr)); |
548 | NEW_AUX_ENT( 5, AT_PHNUM, exec_params->hdr.e_phnum); | 578 | NEW_AUX_ENT( 5, AT_PHNUM, exec_params->hdr.e_phnum); |
549 | NEW_AUX_ENT( 6, AT_BASE, interp_params->elfhdr_addr); | 579 | NEW_AUX_ENT( 6, AT_BASE, interp_params->elfhdr_addr); |
550 | NEW_AUX_ENT( 7, AT_FLAGS, 0); | 580 | NEW_AUX_ENT( 7, AT_FLAGS, 0); |
551 | NEW_AUX_ENT( 8, AT_ENTRY, exec_params->entry_addr); | 581 | NEW_AUX_ENT( 8, AT_ENTRY, exec_params->entry_addr); |
552 | NEW_AUX_ENT( 9, AT_UID, (elf_addr_t) current->uid); | 582 | NEW_AUX_ENT( 9, AT_UID, (elf_addr_t) current->uid); |
553 | NEW_AUX_ENT(10, AT_EUID, (elf_addr_t) current->euid); | 583 | NEW_AUX_ENT(10, AT_EUID, (elf_addr_t) current->euid); |
554 | NEW_AUX_ENT(11, AT_GID, (elf_addr_t) current->gid); | 584 | NEW_AUX_ENT(11, AT_GID, (elf_addr_t) current->gid); |
555 | NEW_AUX_ENT(12, AT_EGID, (elf_addr_t) current->egid); | 585 | NEW_AUX_ENT(12, AT_EGID, (elf_addr_t) current->egid); |
556 | 586 | ||
557 | #ifdef ARCH_DLINFO | 587 | #ifdef ARCH_DLINFO |
558 | /* ARCH_DLINFO must come last so platform specific code can enforce | 588 | /* ARCH_DLINFO must come last so platform specific code can enforce |
@@ -578,7 +608,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
578 | #ifdef CONFIG_MMU | 608 | #ifdef CONFIG_MMU |
579 | current->mm->arg_start = bprm->p; | 609 | current->mm->arg_start = bprm->p; |
580 | #else | 610 | #else |
581 | current->mm->arg_start = current->mm->start_stack - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p); | 611 | current->mm->arg_start = current->mm->start_stack - |
612 | (MAX_ARG_PAGES * PAGE_SIZE - bprm->p); | ||
582 | #endif | 613 | #endif |
583 | 614 | ||
584 | p = (char __user *) current->mm->arg_start; | 615 | p = (char __user *) current->mm->arg_start; |
@@ -606,7 +637,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
606 | 637 | ||
607 | mm->start_stack = (unsigned long) sp; | 638 | mm->start_stack = (unsigned long) sp; |
608 | return 0; | 639 | return 0; |
609 | } /* end create_elf_fdpic_tables() */ | 640 | } |
610 | 641 | ||
611 | /*****************************************************************************/ | 642 | /*****************************************************************************/ |
612 | /* | 643 | /* |
@@ -614,7 +645,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
614 | * the stack | 645 | * the stack |
615 | */ | 646 | */ |
616 | #ifndef CONFIG_MMU | 647 | #ifndef CONFIG_MMU |
617 | static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm, unsigned long *_sp) | 648 | static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm, |
649 | unsigned long *_sp) | ||
618 | { | 650 | { |
619 | unsigned long index, stop, sp; | 651 | unsigned long index, stop, sp; |
620 | char *src; | 652 | char *src; |
@@ -635,9 +667,9 @@ static int elf_fdpic_transfer_args_to_stack(struct linux_binprm *bprm, unsigned | |||
635 | 667 | ||
636 | *_sp = (*_sp - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p)) & ~15; | 668 | *_sp = (*_sp - (MAX_ARG_PAGES * PAGE_SIZE - bprm->p)) & ~15; |
637 | 669 | ||
638 | out: | 670 | out: |
639 | return ret; | 671 | return ret; |
640 | } /* end elf_fdpic_transfer_args_to_stack() */ | 672 | } |
641 | #endif | 673 | #endif |
642 | 674 | ||
643 | /*****************************************************************************/ | 675 | /*****************************************************************************/ |
@@ -712,17 +744,18 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params, | |||
712 | seg = loadmap->segs; | 744 | seg = loadmap->segs; |
713 | for (loop = loadmap->nsegs; loop > 0; loop--, seg++) { | 745 | for (loop = loadmap->nsegs; loop > 0; loop--, seg++) { |
714 | if (params->hdr.e_entry >= seg->p_vaddr && | 746 | if (params->hdr.e_entry >= seg->p_vaddr && |
715 | params->hdr.e_entry < seg->p_vaddr + seg->p_memsz | 747 | params->hdr.e_entry < seg->p_vaddr + seg->p_memsz) { |
716 | ) { | ||
717 | params->entry_addr = | 748 | params->entry_addr = |
718 | (params->hdr.e_entry - seg->p_vaddr) + seg->addr; | 749 | (params->hdr.e_entry - seg->p_vaddr) + |
750 | seg->addr; | ||
719 | break; | 751 | break; |
720 | } | 752 | } |
721 | } | 753 | } |
722 | } | 754 | } |
723 | 755 | ||
724 | /* determine where the program header table has wound up if mapped */ | 756 | /* determine where the program header table has wound up if mapped */ |
725 | stop = params->hdr.e_phoff + params->hdr.e_phnum * sizeof (struct elf_phdr); | 757 | stop = params->hdr.e_phoff; |
758 | stop += params->hdr.e_phnum * sizeof (struct elf_phdr); | ||
726 | phdr = params->phdrs; | 759 | phdr = params->phdrs; |
727 | 760 | ||
728 | for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) { | 761 | for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) { |
@@ -736,9 +769,11 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params, | |||
736 | seg = loadmap->segs; | 769 | seg = loadmap->segs; |
737 | for (loop = loadmap->nsegs; loop > 0; loop--, seg++) { | 770 | for (loop = loadmap->nsegs; loop > 0; loop--, seg++) { |
738 | if (phdr->p_vaddr >= seg->p_vaddr && | 771 | if (phdr->p_vaddr >= seg->p_vaddr && |
739 | phdr->p_vaddr + phdr->p_filesz <= seg->p_vaddr + seg->p_memsz | 772 | phdr->p_vaddr + phdr->p_filesz <= |
740 | ) { | 773 | seg->p_vaddr + seg->p_memsz) { |
741 | params->ph_addr = (phdr->p_vaddr - seg->p_vaddr) + seg->addr + | 774 | params->ph_addr = |
775 | (phdr->p_vaddr - seg->p_vaddr) + | ||
776 | seg->addr + | ||
742 | params->hdr.e_phoff - phdr->p_offset; | 777 | params->hdr.e_phoff - phdr->p_offset; |
743 | break; | 778 | break; |
744 | } | 779 | } |
@@ -755,18 +790,22 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params, | |||
755 | seg = loadmap->segs; | 790 | seg = loadmap->segs; |
756 | for (loop = loadmap->nsegs; loop > 0; loop--, seg++) { | 791 | for (loop = loadmap->nsegs; loop > 0; loop--, seg++) { |
757 | if (phdr->p_vaddr >= seg->p_vaddr && | 792 | if (phdr->p_vaddr >= seg->p_vaddr && |
758 | phdr->p_vaddr + phdr->p_memsz <= seg->p_vaddr + seg->p_memsz | 793 | phdr->p_vaddr + phdr->p_memsz <= |
759 | ) { | 794 | seg->p_vaddr + seg->p_memsz) { |
760 | params->dynamic_addr = (phdr->p_vaddr - seg->p_vaddr) + seg->addr; | 795 | params->dynamic_addr = |
761 | 796 | (phdr->p_vaddr - seg->p_vaddr) + | |
762 | /* check the dynamic section contains at least one item, and that | 797 | seg->addr; |
763 | * the last item is a NULL entry */ | 798 | |
799 | /* check the dynamic section contains at least | ||
800 | * one item, and that the last item is a NULL | ||
801 | * entry */ | ||
764 | if (phdr->p_memsz == 0 || | 802 | if (phdr->p_memsz == 0 || |
765 | phdr->p_memsz % sizeof(Elf32_Dyn) != 0) | 803 | phdr->p_memsz % sizeof(Elf32_Dyn) != 0) |
766 | goto dynamic_error; | 804 | goto dynamic_error; |
767 | 805 | ||
768 | tmp = phdr->p_memsz / sizeof(Elf32_Dyn); | 806 | tmp = phdr->p_memsz / sizeof(Elf32_Dyn); |
769 | if (((Elf32_Dyn *) params->dynamic_addr)[tmp - 1].d_tag != 0) | 807 | if (((Elf32_Dyn *) |
808 | params->dynamic_addr)[tmp - 1].d_tag != 0) | ||
770 | goto dynamic_error; | 809 | goto dynamic_error; |
771 | break; | 810 | break; |
772 | } | 811 | } |
@@ -775,8 +814,8 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params, | |||
775 | } | 814 | } |
776 | 815 | ||
777 | /* now elide adjacent segments in the load map on MMU linux | 816 | /* now elide adjacent segments in the load map on MMU linux |
778 | * - on uClinux the holes between may actually be filled with system stuff or stuff from | 817 | * - on uClinux the holes between may actually be filled with system |
779 | * other processes | 818 | * stuff or stuff from other processes |
780 | */ | 819 | */ |
781 | #ifdef CONFIG_MMU | 820 | #ifdef CONFIG_MMU |
782 | nloads = loadmap->nsegs; | 821 | nloads = loadmap->nsegs; |
@@ -787,7 +826,9 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params, | |||
787 | if (seg->p_vaddr - mseg->p_vaddr == seg->addr - mseg->addr) { | 826 | if (seg->p_vaddr - mseg->p_vaddr == seg->addr - mseg->addr) { |
788 | load_addr = PAGE_ALIGN(mseg->addr + mseg->p_memsz); | 827 | load_addr = PAGE_ALIGN(mseg->addr + mseg->p_memsz); |
789 | if (load_addr == (seg->addr & PAGE_MASK)) { | 828 | if (load_addr == (seg->addr & PAGE_MASK)) { |
790 | mseg->p_memsz += load_addr - (mseg->addr + mseg->p_memsz); | 829 | mseg->p_memsz += |
830 | load_addr - | ||
831 | (mseg->addr + mseg->p_memsz); | ||
791 | mseg->p_memsz += seg->addr & ~PAGE_MASK; | 832 | mseg->p_memsz += seg->addr & ~PAGE_MASK; |
792 | mseg->p_memsz += seg->p_memsz; | 833 | mseg->p_memsz += seg->p_memsz; |
793 | loadmap->nsegs--; | 834 | loadmap->nsegs--; |
@@ -815,20 +856,21 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params, | |||
815 | 856 | ||
816 | return 0; | 857 | return 0; |
817 | 858 | ||
818 | dynamic_error: | 859 | dynamic_error: |
819 | printk("ELF FDPIC %s with invalid DYNAMIC section (inode=%lu)\n", | 860 | printk("ELF FDPIC %s with invalid DYNAMIC section (inode=%lu)\n", |
820 | what, file->f_dentry->d_inode->i_ino); | 861 | what, file->f_dentry->d_inode->i_ino); |
821 | return -ELIBBAD; | 862 | return -ELIBBAD; |
822 | } /* end elf_fdpic_map_file() */ | 863 | } |
823 | 864 | ||
824 | /*****************************************************************************/ | 865 | /*****************************************************************************/ |
825 | /* | 866 | /* |
826 | * map a file with constant displacement under uClinux | 867 | * map a file with constant displacement under uClinux |
827 | */ | 868 | */ |
828 | #ifndef CONFIG_MMU | 869 | #ifndef CONFIG_MMU |
829 | static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *params, | 870 | static int elf_fdpic_map_file_constdisp_on_uclinux( |
830 | struct file *file, | 871 | struct elf_fdpic_params *params, |
831 | struct mm_struct *mm) | 872 | struct file *file, |
873 | struct mm_struct *mm) | ||
832 | { | 874 | { |
833 | struct elf32_fdpic_loadseg *seg; | 875 | struct elf32_fdpic_loadseg *seg; |
834 | struct elf32_phdr *phdr; | 876 | struct elf32_phdr *phdr; |
@@ -839,7 +881,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para | |||
839 | load_addr = params->load_addr; | 881 | load_addr = params->load_addr; |
840 | seg = params->loadmap->segs; | 882 | seg = params->loadmap->segs; |
841 | 883 | ||
842 | /* determine the bounds of the contiguous overall allocation we must make */ | 884 | /* determine the bounds of the contiguous overall allocation we must |
885 | * make */ | ||
843 | phdr = params->phdrs; | 886 | phdr = params->phdrs; |
844 | for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) { | 887 | for (loop = 0; loop < params->hdr.e_phnum; loop++, phdr++) { |
845 | if (params->phdrs[loop].p_type != PT_LOAD) | 888 | if (params->phdrs[loop].p_type != PT_LOAD) |
@@ -860,7 +903,7 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para | |||
860 | maddr = do_mmap(NULL, load_addr, top - base, | 903 | maddr = do_mmap(NULL, load_addr, top - base, |
861 | PROT_READ | PROT_WRITE | PROT_EXEC, mflags, 0); | 904 | PROT_READ | PROT_WRITE | PROT_EXEC, mflags, 0); |
862 | up_write(&mm->mmap_sem); | 905 | up_write(&mm->mmap_sem); |
863 | if (IS_ERR((void *) maddr)) | 906 | if (IS_ERR_VALUE(maddr)) |
864 | return (int) maddr; | 907 | return (int) maddr; |
865 | 908 | ||
866 | if (load_addr != 0) | 909 | if (load_addr != 0) |
@@ -878,7 +921,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para | |||
878 | seg->p_vaddr = phdr->p_vaddr; | 921 | seg->p_vaddr = phdr->p_vaddr; |
879 | seg->p_memsz = phdr->p_memsz; | 922 | seg->p_memsz = phdr->p_memsz; |
880 | 923 | ||
881 | ret = file->f_op->read(file, (void *) seg->addr, phdr->p_filesz, &fpos); | 924 | ret = file->f_op->read(file, (void *) seg->addr, |
925 | phdr->p_filesz, &fpos); | ||
882 | if (ret < 0) | 926 | if (ret < 0) |
883 | return ret; | 927 | return ret; |
884 | 928 | ||
@@ -895,8 +939,7 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para | |||
895 | if (phdr->p_flags & PF_X) { | 939 | if (phdr->p_flags & PF_X) { |
896 | mm->start_code = seg->addr; | 940 | mm->start_code = seg->addr; |
897 | mm->end_code = seg->addr + phdr->p_memsz; | 941 | mm->end_code = seg->addr + phdr->p_memsz; |
898 | } | 942 | } else if (!mm->start_data) { |
899 | else if (!mm->start_data) { | ||
900 | mm->start_data = seg->addr; | 943 | mm->start_data = seg->addr; |
901 | #ifndef CONFIG_MMU | 944 | #ifndef CONFIG_MMU |
902 | mm->end_data = seg->addr + phdr->p_memsz; | 945 | mm->end_data = seg->addr + phdr->p_memsz; |
@@ -913,7 +956,7 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(struct elf_fdpic_params *para | |||
913 | } | 956 | } |
914 | 957 | ||
915 | return 0; | 958 | return 0; |
916 | } /* end elf_fdpic_map_file_constdisp_on_uclinux() */ | 959 | } |
917 | #endif | 960 | #endif |
918 | 961 | ||
919 | /*****************************************************************************/ | 962 | /*****************************************************************************/ |
@@ -974,14 +1017,14 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, | |||
974 | 1017 | ||
975 | case ELF_FDPIC_FLAG_CONSTDISP: | 1018 | case ELF_FDPIC_FLAG_CONSTDISP: |
976 | /* constant displacement | 1019 | /* constant displacement |
977 | * - can be mapped anywhere, but must be mapped as a unit | 1020 | * - can be mapped anywhere, but must be mapped as a |
1021 | * unit | ||
978 | */ | 1022 | */ |
979 | if (!dvset) { | 1023 | if (!dvset) { |
980 | maddr = load_addr; | 1024 | maddr = load_addr; |
981 | delta_vaddr = phdr->p_vaddr; | 1025 | delta_vaddr = phdr->p_vaddr; |
982 | dvset = 1; | 1026 | dvset = 1; |
983 | } | 1027 | } else { |
984 | else { | ||
985 | maddr = load_addr + phdr->p_vaddr - delta_vaddr; | 1028 | maddr = load_addr + phdr->p_vaddr - delta_vaddr; |
986 | flags |= MAP_FIXED; | 1029 | flags |= MAP_FIXED; |
987 | } | 1030 | } |
@@ -1005,13 +1048,14 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, | |||
1005 | up_write(&mm->mmap_sem); | 1048 | up_write(&mm->mmap_sem); |
1006 | 1049 | ||
1007 | kdebug("mmap[%d] <file> sz=%lx pr=%x fl=%x of=%lx --> %08lx", | 1050 | kdebug("mmap[%d] <file> sz=%lx pr=%x fl=%x of=%lx --> %08lx", |
1008 | loop, phdr->p_memsz + disp, prot, flags, phdr->p_offset - disp, | 1051 | loop, phdr->p_memsz + disp, prot, flags, |
1009 | maddr); | 1052 | phdr->p_offset - disp, maddr); |
1010 | 1053 | ||
1011 | if (IS_ERR((void *) maddr)) | 1054 | if (IS_ERR_VALUE(maddr)) |
1012 | return (int) maddr; | 1055 | return (int) maddr; |
1013 | 1056 | ||
1014 | if ((params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) == ELF_FDPIC_FLAG_CONTIGUOUS) | 1057 | if ((params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) == |
1058 | ELF_FDPIC_FLAG_CONTIGUOUS) | ||
1015 | load_addr += PAGE_ALIGN(phdr->p_memsz + disp); | 1059 | load_addr += PAGE_ALIGN(phdr->p_memsz + disp); |
1016 | 1060 | ||
1017 | seg->addr = maddr + disp; | 1061 | seg->addr = maddr + disp; |
@@ -1022,7 +1066,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, | |||
1022 | if (phdr->p_offset == 0) | 1066 | if (phdr->p_offset == 0) |
1023 | params->elfhdr_addr = seg->addr; | 1067 | params->elfhdr_addr = seg->addr; |
1024 | 1068 | ||
1025 | /* clear the bit between beginning of mapping and beginning of PT_LOAD */ | 1069 | /* clear the bit between beginning of mapping and beginning of |
1070 | * PT_LOAD */ | ||
1026 | if (prot & PROT_WRITE && disp > 0) { | 1071 | if (prot & PROT_WRITE && disp > 0) { |
1027 | kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp); | 1072 | kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp); |
1028 | clear_user((void __user *) maddr, disp); | 1073 | clear_user((void __user *) maddr, disp); |
@@ -1038,19 +1083,20 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, | |||
1038 | excess1 = PAGE_SIZE - ((maddr + phdr->p_filesz) & ~PAGE_MASK); | 1083 | excess1 = PAGE_SIZE - ((maddr + phdr->p_filesz) & ~PAGE_MASK); |
1039 | 1084 | ||
1040 | #ifdef CONFIG_MMU | 1085 | #ifdef CONFIG_MMU |
1041 | |||
1042 | if (excess > excess1) { | 1086 | if (excess > excess1) { |
1043 | unsigned long xaddr = maddr + phdr->p_filesz + excess1; | 1087 | unsigned long xaddr = maddr + phdr->p_filesz + excess1; |
1044 | unsigned long xmaddr; | 1088 | unsigned long xmaddr; |
1045 | 1089 | ||
1046 | flags |= MAP_FIXED | MAP_ANONYMOUS; | 1090 | flags |= MAP_FIXED | MAP_ANONYMOUS; |
1047 | down_write(&mm->mmap_sem); | 1091 | down_write(&mm->mmap_sem); |
1048 | xmaddr = do_mmap(NULL, xaddr, excess - excess1, prot, flags, 0); | 1092 | xmaddr = do_mmap(NULL, xaddr, excess - excess1, |
1093 | prot, flags, 0); | ||
1049 | up_write(&mm->mmap_sem); | 1094 | up_write(&mm->mmap_sem); |
1050 | 1095 | ||
1051 | kdebug("mmap[%d] <anon>" | 1096 | kdebug("mmap[%d] <anon>" |
1052 | " ad=%lx sz=%lx pr=%x fl=%x of=0 --> %08lx", | 1097 | " ad=%lx sz=%lx pr=%x fl=%x of=0 --> %08lx", |
1053 | loop, xaddr, excess - excess1, prot, flags, xmaddr); | 1098 | loop, xaddr, excess - excess1, prot, flags, |
1099 | xmaddr); | ||
1054 | 1100 | ||
1055 | if (xmaddr != xaddr) | 1101 | if (xmaddr != xaddr) |
1056 | return -ENOMEM; | 1102 | return -ENOMEM; |
@@ -1059,7 +1105,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, | |||
1059 | if (prot & PROT_WRITE && excess1 > 0) { | 1105 | if (prot & PROT_WRITE && excess1 > 0) { |
1060 | kdebug("clear[%d] ad=%lx sz=%lx", | 1106 | kdebug("clear[%d] ad=%lx sz=%lx", |
1061 | loop, maddr + phdr->p_filesz, excess1); | 1107 | loop, maddr + phdr->p_filesz, excess1); |
1062 | clear_user((void __user *) maddr + phdr->p_filesz, excess1); | 1108 | clear_user((void __user *) maddr + phdr->p_filesz, |
1109 | excess1); | ||
1063 | } | 1110 | } |
1064 | 1111 | ||
1065 | #else | 1112 | #else |
@@ -1074,8 +1121,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, | |||
1074 | if (phdr->p_flags & PF_X) { | 1121 | if (phdr->p_flags & PF_X) { |
1075 | mm->start_code = maddr; | 1122 | mm->start_code = maddr; |
1076 | mm->end_code = maddr + phdr->p_memsz; | 1123 | mm->end_code = maddr + phdr->p_memsz; |
1077 | } | 1124 | } else if (!mm->start_data) { |
1078 | else if (!mm->start_data) { | ||
1079 | mm->start_data = maddr; | 1125 | mm->start_data = maddr; |
1080 | mm->end_data = maddr + phdr->p_memsz; | 1126 | mm->end_data = maddr + phdr->p_memsz; |
1081 | } | 1127 | } |
@@ -1085,4 +1131,662 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, | |||
1085 | } | 1131 | } |
1086 | 1132 | ||
1087 | return 0; | 1133 | return 0; |
1088 | } /* end elf_fdpic_map_file_by_direct_mmap() */ | 1134 | } |
1135 | |||
1136 | /*****************************************************************************/ | ||
1137 | /* | ||
1138 | * ELF-FDPIC core dumper | ||
1139 | * | ||
1140 | * Modelled on fs/exec.c:aout_core_dump() | ||
1141 | * Jeremy Fitzhardinge <jeremy@sw.oz.au> | ||
1142 | * | ||
1143 | * Modelled on fs/binfmt_elf.c core dumper | ||
1144 | */ | ||
1145 | #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) | ||
1146 | |||
1147 | /* | ||
1148 | * These are the only things you should do on a core-file: use only these | ||
1149 | * functions to write out all the necessary info. | ||
1150 | */ | ||
1151 | static int dump_write(struct file *file, const void *addr, int nr) | ||
1152 | { | ||
1153 | return file->f_op->write(file, addr, nr, &file->f_pos) == nr; | ||
1154 | } | ||
1155 | |||
1156 | static int dump_seek(struct file *file, loff_t off) | ||
1157 | { | ||
1158 | if (file->f_op->llseek) { | ||
1159 | if (file->f_op->llseek(file, off, SEEK_SET) != off) | ||
1160 | return 0; | ||
1161 | } else { | ||
1162 | file->f_pos = off; | ||
1163 | } | ||
1164 | return 1; | ||
1165 | } | ||
1166 | |||
1167 | /* | ||
1168 | * Decide whether a segment is worth dumping; default is yes to be | ||
1169 | * sure (missing info is worse than too much; etc). | ||
1170 | * Personally I'd include everything, and use the coredump limit... | ||
1171 | * | ||
1172 | * I think we should skip something. But I am not sure how. H.J. | ||
1173 | */ | ||
1174 | static int maydump(struct vm_area_struct *vma) | ||
1175 | { | ||
1176 | /* Do not dump I/O mapped devices or special mappings */ | ||
1177 | if (vma->vm_flags & (VM_IO | VM_RESERVED)) { | ||
1178 | kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags); | ||
1179 | return 0; | ||
1180 | } | ||
1181 | |||
1182 | /* If we may not read the contents, don't allow us to dump | ||
1183 | * them either. "dump_write()" can't handle it anyway. | ||
1184 | */ | ||
1185 | if (!(vma->vm_flags & VM_READ)) { | ||
1186 | kdcore("%08lx: %08lx: no (!read)", vma->vm_start, vma->vm_flags); | ||
1187 | return 0; | ||
1188 | } | ||
1189 | |||
1190 | /* Dump shared memory only if mapped from an anonymous file. */ | ||
1191 | if (vma->vm_flags & VM_SHARED) { | ||
1192 | if (vma->vm_file->f_dentry->d_inode->i_nlink == 0) { | ||
1193 | kdcore("%08lx: %08lx: no (share)", vma->vm_start, vma->vm_flags); | ||
1194 | return 1; | ||
1195 | } | ||
1196 | |||
1197 | kdcore("%08lx: %08lx: no (share)", vma->vm_start, vma->vm_flags); | ||
1198 | return 0; | ||
1199 | } | ||
1200 | |||
1201 | #ifdef CONFIG_MMU | ||
1202 | /* If it hasn't been written to, don't write it out */ | ||
1203 | if (!vma->anon_vma) { | ||
1204 | kdcore("%08lx: %08lx: no (!anon)", vma->vm_start, vma->vm_flags); | ||
1205 | return 0; | ||
1206 | } | ||
1207 | #endif | ||
1208 | |||
1209 | kdcore("%08lx: %08lx: yes", vma->vm_start, vma->vm_flags); | ||
1210 | return 1; | ||
1211 | } | ||
1212 | |||
1213 | /* An ELF note in memory */ | ||
1214 | struct memelfnote | ||
1215 | { | ||
1216 | const char *name; | ||
1217 | int type; | ||
1218 | unsigned int datasz; | ||
1219 | void *data; | ||
1220 | }; | ||
1221 | |||
1222 | static int notesize(struct memelfnote *en) | ||
1223 | { | ||
1224 | int sz; | ||
1225 | |||
1226 | sz = sizeof(struct elf_note); | ||
1227 | sz += roundup(strlen(en->name) + 1, 4); | ||
1228 | sz += roundup(en->datasz, 4); | ||
1229 | |||
1230 | return sz; | ||
1231 | } | ||
1232 | |||
1233 | /* #define DEBUG */ | ||
1234 | |||
1235 | #define DUMP_WRITE(addr, nr) \ | ||
1236 | do { if (!dump_write(file, (addr), (nr))) return 0; } while(0) | ||
1237 | #define DUMP_SEEK(off) \ | ||
1238 | do { if (!dump_seek(file, (off))) return 0; } while(0) | ||
1239 | |||
1240 | static int writenote(struct memelfnote *men, struct file *file) | ||
1241 | { | ||
1242 | struct elf_note en; | ||
1243 | |||
1244 | en.n_namesz = strlen(men->name) + 1; | ||
1245 | en.n_descsz = men->datasz; | ||
1246 | en.n_type = men->type; | ||
1247 | |||
1248 | DUMP_WRITE(&en, sizeof(en)); | ||
1249 | DUMP_WRITE(men->name, en.n_namesz); | ||
1250 | /* XXX - cast from long long to long to avoid need for libgcc.a */ | ||
1251 | DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */ | ||
1252 | DUMP_WRITE(men->data, men->datasz); | ||
1253 | DUMP_SEEK(roundup((unsigned long)file->f_pos, 4)); /* XXX */ | ||
1254 | |||
1255 | return 1; | ||
1256 | } | ||
1257 | #undef DUMP_WRITE | ||
1258 | #undef DUMP_SEEK | ||
1259 | |||
1260 | #define DUMP_WRITE(addr, nr) \ | ||
1261 | if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \ | ||
1262 | goto end_coredump; | ||
1263 | #define DUMP_SEEK(off) \ | ||
1264 | if (!dump_seek(file, (off))) \ | ||
1265 | goto end_coredump; | ||
1266 | |||
1267 | static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs) | ||
1268 | { | ||
1269 | memcpy(elf->e_ident, ELFMAG, SELFMAG); | ||
1270 | elf->e_ident[EI_CLASS] = ELF_CLASS; | ||
1271 | elf->e_ident[EI_DATA] = ELF_DATA; | ||
1272 | elf->e_ident[EI_VERSION] = EV_CURRENT; | ||
1273 | elf->e_ident[EI_OSABI] = ELF_OSABI; | ||
1274 | memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); | ||
1275 | |||
1276 | elf->e_type = ET_CORE; | ||
1277 | elf->e_machine = ELF_ARCH; | ||
1278 | elf->e_version = EV_CURRENT; | ||
1279 | elf->e_entry = 0; | ||
1280 | elf->e_phoff = sizeof(struct elfhdr); | ||
1281 | elf->e_shoff = 0; | ||
1282 | elf->e_flags = ELF_FDPIC_CORE_EFLAGS; | ||
1283 | elf->e_ehsize = sizeof(struct elfhdr); | ||
1284 | elf->e_phentsize = sizeof(struct elf_phdr); | ||
1285 | elf->e_phnum = segs; | ||
1286 | elf->e_shentsize = 0; | ||
1287 | elf->e_shnum = 0; | ||
1288 | elf->e_shstrndx = 0; | ||
1289 | return; | ||
1290 | } | ||
1291 | |||
1292 | static inline void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset) | ||
1293 | { | ||
1294 | phdr->p_type = PT_NOTE; | ||
1295 | phdr->p_offset = offset; | ||
1296 | phdr->p_vaddr = 0; | ||
1297 | phdr->p_paddr = 0; | ||
1298 | phdr->p_filesz = sz; | ||
1299 | phdr->p_memsz = 0; | ||
1300 | phdr->p_flags = 0; | ||
1301 | phdr->p_align = 0; | ||
1302 | return; | ||
1303 | } | ||
1304 | |||
1305 | static inline void fill_note(struct memelfnote *note, const char *name, int type, | ||
1306 | unsigned int sz, void *data) | ||
1307 | { | ||
1308 | note->name = name; | ||
1309 | note->type = type; | ||
1310 | note->datasz = sz; | ||
1311 | note->data = data; | ||
1312 | return; | ||
1313 | } | ||
1314 | |||
1315 | /* | ||
1316 | * fill up all the fields in prstatus from the given task struct, except | ||
1317 | * registers which need to be filled up seperately. | ||
1318 | */ | ||
1319 | static void fill_prstatus(struct elf_prstatus *prstatus, | ||
1320 | struct task_struct *p, long signr) | ||
1321 | { | ||
1322 | prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; | ||
1323 | prstatus->pr_sigpend = p->pending.signal.sig[0]; | ||
1324 | prstatus->pr_sighold = p->blocked.sig[0]; | ||
1325 | prstatus->pr_pid = p->pid; | ||
1326 | prstatus->pr_ppid = p->parent->pid; | ||
1327 | prstatus->pr_pgrp = process_group(p); | ||
1328 | prstatus->pr_sid = p->signal->session; | ||
1329 | if (thread_group_leader(p)) { | ||
1330 | /* | ||
1331 | * This is the record for the group leader. Add in the | ||
1332 | * cumulative times of previous dead threads. This total | ||
1333 | * won't include the time of each live thread whose state | ||
1334 | * is included in the core dump. The final total reported | ||
1335 | * to our parent process when it calls wait4 will include | ||
1336 | * those sums as well as the little bit more time it takes | ||
1337 | * this and each other thread to finish dying after the | ||
1338 | * core dump synchronization phase. | ||
1339 | */ | ||
1340 | cputime_to_timeval(cputime_add(p->utime, p->signal->utime), | ||
1341 | &prstatus->pr_utime); | ||
1342 | cputime_to_timeval(cputime_add(p->stime, p->signal->stime), | ||
1343 | &prstatus->pr_stime); | ||
1344 | } else { | ||
1345 | cputime_to_timeval(p->utime, &prstatus->pr_utime); | ||
1346 | cputime_to_timeval(p->stime, &prstatus->pr_stime); | ||
1347 | } | ||
1348 | cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); | ||
1349 | cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); | ||
1350 | |||
1351 | prstatus->pr_exec_fdpic_loadmap = p->mm->context.exec_fdpic_loadmap; | ||
1352 | prstatus->pr_interp_fdpic_loadmap = p->mm->context.interp_fdpic_loadmap; | ||
1353 | } | ||
1354 | |||
1355 | static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, | ||
1356 | struct mm_struct *mm) | ||
1357 | { | ||
1358 | unsigned int i, len; | ||
1359 | |||
1360 | /* first copy the parameters from user space */ | ||
1361 | memset(psinfo, 0, sizeof(struct elf_prpsinfo)); | ||
1362 | |||
1363 | len = mm->arg_end - mm->arg_start; | ||
1364 | if (len >= ELF_PRARGSZ) | ||
1365 | len = ELF_PRARGSZ - 1; | ||
1366 | if (copy_from_user(&psinfo->pr_psargs, | ||
1367 | (const char __user *) mm->arg_start, len)) | ||
1368 | return -EFAULT; | ||
1369 | for (i = 0; i < len; i++) | ||
1370 | if (psinfo->pr_psargs[i] == 0) | ||
1371 | psinfo->pr_psargs[i] = ' '; | ||
1372 | psinfo->pr_psargs[len] = 0; | ||
1373 | |||
1374 | psinfo->pr_pid = p->pid; | ||
1375 | psinfo->pr_ppid = p->parent->pid; | ||
1376 | psinfo->pr_pgrp = process_group(p); | ||
1377 | psinfo->pr_sid = p->signal->session; | ||
1378 | |||
1379 | i = p->state ? ffz(~p->state) + 1 : 0; | ||
1380 | psinfo->pr_state = i; | ||
1381 | psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i]; | ||
1382 | psinfo->pr_zomb = psinfo->pr_sname == 'Z'; | ||
1383 | psinfo->pr_nice = task_nice(p); | ||
1384 | psinfo->pr_flag = p->flags; | ||
1385 | SET_UID(psinfo->pr_uid, p->uid); | ||
1386 | SET_GID(psinfo->pr_gid, p->gid); | ||
1387 | strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); | ||
1388 | |||
1389 | return 0; | ||
1390 | } | ||
1391 | |||
1392 | /* Here is the structure in which status of each thread is captured. */ | ||
1393 | struct elf_thread_status | ||
1394 | { | ||
1395 | struct list_head list; | ||
1396 | struct elf_prstatus prstatus; /* NT_PRSTATUS */ | ||
1397 | elf_fpregset_t fpu; /* NT_PRFPREG */ | ||
1398 | struct task_struct *thread; | ||
1399 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1400 | elf_fpxregset_t xfpu; /* NT_PRXFPREG */ | ||
1401 | #endif | ||
1402 | struct memelfnote notes[3]; | ||
1403 | int num_notes; | ||
1404 | }; | ||
1405 | |||
1406 | /* | ||
1407 | * In order to add the specific thread information for the elf file format, | ||
1408 | * we need to keep a linked list of every thread's pr_status and then create | ||
1409 | * a single section for them in the final core file. | ||
1410 | */ | ||
1411 | static int elf_dump_thread_status(long signr, struct elf_thread_status *t) | ||
1412 | { | ||
1413 | struct task_struct *p = t->thread; | ||
1414 | int sz = 0; | ||
1415 | |||
1416 | t->num_notes = 0; | ||
1417 | |||
1418 | fill_prstatus(&t->prstatus, p, signr); | ||
1419 | elf_core_copy_task_regs(p, &t->prstatus.pr_reg); | ||
1420 | |||
1421 | fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), | ||
1422 | &t->prstatus); | ||
1423 | t->num_notes++; | ||
1424 | sz += notesize(&t->notes[0]); | ||
1425 | |||
1426 | t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, &t->fpu); | ||
1427 | if (t->prstatus.pr_fpvalid) { | ||
1428 | fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), | ||
1429 | &t->fpu); | ||
1430 | t->num_notes++; | ||
1431 | sz += notesize(&t->notes[1]); | ||
1432 | } | ||
1433 | |||
1434 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1435 | if (elf_core_copy_task_xfpregs(p, &t->xfpu)) { | ||
1436 | fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu), | ||
1437 | &t->xfpu); | ||
1438 | t->num_notes++; | ||
1439 | sz += notesize(&t->notes[2]); | ||
1440 | } | ||
1441 | #endif | ||
1442 | return sz; | ||
1443 | } | ||
1444 | |||
1445 | /* | ||
1446 | * dump the segments for an MMU process | ||
1447 | */ | ||
1448 | #ifdef CONFIG_MMU | ||
1449 | static int elf_fdpic_dump_segments(struct file *file, struct mm_struct *mm, | ||
1450 | size_t *size, unsigned long *limit) | ||
1451 | { | ||
1452 | struct vm_area_struct *vma; | ||
1453 | |||
1454 | for (vma = current->mm->mmap; vma; vma = vma->vm_next) { | ||
1455 | unsigned long addr; | ||
1456 | |||
1457 | if (!maydump(vma)) | ||
1458 | continue; | ||
1459 | |||
1460 | for (addr = vma->vm_start; | ||
1461 | addr < vma->vm_end; | ||
1462 | addr += PAGE_SIZE | ||
1463 | ) { | ||
1464 | struct vm_area_struct *vma; | ||
1465 | struct page *page; | ||
1466 | |||
1467 | if (get_user_pages(current, current->mm, addr, 1, 0, 1, | ||
1468 | &page, &vma) <= 0) { | ||
1469 | DUMP_SEEK(file->f_pos + PAGE_SIZE); | ||
1470 | } | ||
1471 | else if (page == ZERO_PAGE(addr)) { | ||
1472 | DUMP_SEEK(file->f_pos + PAGE_SIZE); | ||
1473 | page_cache_release(page); | ||
1474 | } | ||
1475 | else { | ||
1476 | void *kaddr; | ||
1477 | |||
1478 | flush_cache_page(vma, addr, page_to_pfn(page)); | ||
1479 | kaddr = kmap(page); | ||
1480 | if ((*size += PAGE_SIZE) > *limit || | ||
1481 | !dump_write(file, kaddr, PAGE_SIZE) | ||
1482 | ) { | ||
1483 | kunmap(page); | ||
1484 | page_cache_release(page); | ||
1485 | return -EIO; | ||
1486 | } | ||
1487 | kunmap(page); | ||
1488 | page_cache_release(page); | ||
1489 | } | ||
1490 | } | ||
1491 | } | ||
1492 | |||
1493 | return 0; | ||
1494 | |||
1495 | end_coredump: | ||
1496 | return -EFBIG; | ||
1497 | } | ||
1498 | #endif | ||
1499 | |||
1500 | /* | ||
1501 | * dump the segments for a NOMMU process | ||
1502 | */ | ||
1503 | #ifndef CONFIG_MMU | ||
1504 | static int elf_fdpic_dump_segments(struct file *file, struct mm_struct *mm, | ||
1505 | size_t *size, unsigned long *limit) | ||
1506 | { | ||
1507 | struct vm_list_struct *vml; | ||
1508 | |||
1509 | for (vml = current->mm->context.vmlist; vml; vml = vml->next) { | ||
1510 | struct vm_area_struct *vma = vml->vma; | ||
1511 | |||
1512 | if (!maydump(vma)) | ||
1513 | continue; | ||
1514 | |||
1515 | if ((*size += PAGE_SIZE) > *limit) | ||
1516 | return -EFBIG; | ||
1517 | |||
1518 | if (!dump_write(file, (void *) vma->vm_start, | ||
1519 | vma->vm_end - vma->vm_start)) | ||
1520 | return -EIO; | ||
1521 | } | ||
1522 | |||
1523 | return 0; | ||
1524 | } | ||
1525 | #endif | ||
1526 | |||
1527 | /* | ||
1528 | * Actual dumper | ||
1529 | * | ||
1530 | * This is a two-pass process; first we find the offsets of the bits, | ||
1531 | * and then they are actually written out. If we run out of core limit | ||
1532 | * we just truncate. | ||
1533 | */ | ||
1534 | static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, | ||
1535 | struct file *file) | ||
1536 | { | ||
1537 | #define NUM_NOTES 6 | ||
1538 | int has_dumped = 0; | ||
1539 | mm_segment_t fs; | ||
1540 | int segs; | ||
1541 | size_t size = 0; | ||
1542 | int i; | ||
1543 | struct vm_area_struct *vma; | ||
1544 | struct elfhdr *elf = NULL; | ||
1545 | loff_t offset = 0, dataoff; | ||
1546 | unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; | ||
1547 | int numnote; | ||
1548 | struct memelfnote *notes = NULL; | ||
1549 | struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */ | ||
1550 | struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */ | ||
1551 | struct task_struct *g, *p; | ||
1552 | LIST_HEAD(thread_list); | ||
1553 | struct list_head *t; | ||
1554 | elf_fpregset_t *fpu = NULL; | ||
1555 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1556 | elf_fpxregset_t *xfpu = NULL; | ||
1557 | #endif | ||
1558 | int thread_status_size = 0; | ||
1559 | #ifndef CONFIG_MMU | ||
1560 | struct vm_list_struct *vml; | ||
1561 | #endif | ||
1562 | elf_addr_t *auxv; | ||
1563 | |||
1564 | /* | ||
1565 | * We no longer stop all VM operations. | ||
1566 | * | ||
1567 | * This is because those proceses that could possibly change map_count | ||
1568 | * or the mmap / vma pages are now blocked in do_exit on current | ||
1569 | * finishing this core dump. | ||
1570 | * | ||
1571 | * Only ptrace can touch these memory addresses, but it doesn't change | ||
1572 | * the map_count or the pages allocated. So no possibility of crashing | ||
1573 | * exists while dumping the mm->vm_next areas to the core file. | ||
1574 | */ | ||
1575 | |||
1576 | /* alloc memory for large data structures: too large to be on stack */ | ||
1577 | elf = kmalloc(sizeof(*elf), GFP_KERNEL); | ||
1578 | if (!elf) | ||
1579 | goto cleanup; | ||
1580 | prstatus = kzalloc(sizeof(*prstatus), GFP_KERNEL); | ||
1581 | if (!prstatus) | ||
1582 | goto cleanup; | ||
1583 | psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); | ||
1584 | if (!psinfo) | ||
1585 | goto cleanup; | ||
1586 | notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL); | ||
1587 | if (!notes) | ||
1588 | goto cleanup; | ||
1589 | fpu = kmalloc(sizeof(*fpu), GFP_KERNEL); | ||
1590 | if (!fpu) | ||
1591 | goto cleanup; | ||
1592 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1593 | xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL); | ||
1594 | if (!xfpu) | ||
1595 | goto cleanup; | ||
1596 | #endif | ||
1597 | |||
1598 | if (signr) { | ||
1599 | struct elf_thread_status *tmp; | ||
1600 | read_lock(&tasklist_lock); | ||
1601 | do_each_thread(g,p) | ||
1602 | if (current->mm == p->mm && current != p) { | ||
1603 | tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); | ||
1604 | if (!tmp) { | ||
1605 | read_unlock(&tasklist_lock); | ||
1606 | goto cleanup; | ||
1607 | } | ||
1608 | INIT_LIST_HEAD(&tmp->list); | ||
1609 | tmp->thread = p; | ||
1610 | list_add(&tmp->list, &thread_list); | ||
1611 | } | ||
1612 | while_each_thread(g,p); | ||
1613 | read_unlock(&tasklist_lock); | ||
1614 | list_for_each(t, &thread_list) { | ||
1615 | struct elf_thread_status *tmp; | ||
1616 | int sz; | ||
1617 | |||
1618 | tmp = list_entry(t, struct elf_thread_status, list); | ||
1619 | sz = elf_dump_thread_status(signr, tmp); | ||
1620 | thread_status_size += sz; | ||
1621 | } | ||
1622 | } | ||
1623 | |||
1624 | /* now collect the dump for the current */ | ||
1625 | fill_prstatus(prstatus, current, signr); | ||
1626 | elf_core_copy_regs(&prstatus->pr_reg, regs); | ||
1627 | |||
1628 | #ifdef CONFIG_MMU | ||
1629 | segs = current->mm->map_count; | ||
1630 | #else | ||
1631 | segs = 0; | ||
1632 | for (vml = current->mm->context.vmlist; vml; vml = vml->next) | ||
1633 | segs++; | ||
1634 | #endif | ||
1635 | #ifdef ELF_CORE_EXTRA_PHDRS | ||
1636 | segs += ELF_CORE_EXTRA_PHDRS; | ||
1637 | #endif | ||
1638 | |||
1639 | /* Set up header */ | ||
1640 | fill_elf_fdpic_header(elf, segs + 1); /* including notes section */ | ||
1641 | |||
1642 | has_dumped = 1; | ||
1643 | current->flags |= PF_DUMPCORE; | ||
1644 | |||
1645 | /* | ||
1646 | * Set up the notes in similar form to SVR4 core dumps made | ||
1647 | * with info from their /proc. | ||
1648 | */ | ||
1649 | |||
1650 | fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus); | ||
1651 | fill_psinfo(psinfo, current->group_leader, current->mm); | ||
1652 | fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); | ||
1653 | |||
1654 | numnote = 2; | ||
1655 | |||
1656 | auxv = (elf_addr_t *) current->mm->saved_auxv; | ||
1657 | |||
1658 | i = 0; | ||
1659 | do | ||
1660 | i += 2; | ||
1661 | while (auxv[i - 2] != AT_NULL); | ||
1662 | fill_note(¬es[numnote++], "CORE", NT_AUXV, | ||
1663 | i * sizeof(elf_addr_t), auxv); | ||
1664 | |||
1665 | /* Try to dump the FPU. */ | ||
1666 | if ((prstatus->pr_fpvalid = | ||
1667 | elf_core_copy_task_fpregs(current, regs, fpu))) | ||
1668 | fill_note(notes + numnote++, | ||
1669 | "CORE", NT_PRFPREG, sizeof(*fpu), fpu); | ||
1670 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1671 | if (elf_core_copy_task_xfpregs(current, xfpu)) | ||
1672 | fill_note(notes + numnote++, | ||
1673 | "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu); | ||
1674 | #endif | ||
1675 | |||
1676 | fs = get_fs(); | ||
1677 | set_fs(KERNEL_DS); | ||
1678 | |||
1679 | DUMP_WRITE(elf, sizeof(*elf)); | ||
1680 | offset += sizeof(*elf); /* Elf header */ | ||
1681 | offset += (segs+1) * sizeof(struct elf_phdr); /* Program headers */ | ||
1682 | |||
1683 | /* Write notes phdr entry */ | ||
1684 | { | ||
1685 | struct elf_phdr phdr; | ||
1686 | int sz = 0; | ||
1687 | |||
1688 | for (i = 0; i < numnote; i++) | ||
1689 | sz += notesize(notes + i); | ||
1690 | |||
1691 | sz += thread_status_size; | ||
1692 | |||
1693 | fill_elf_note_phdr(&phdr, sz, offset); | ||
1694 | offset += sz; | ||
1695 | DUMP_WRITE(&phdr, sizeof(phdr)); | ||
1696 | } | ||
1697 | |||
1698 | /* Page-align dumped data */ | ||
1699 | dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); | ||
1700 | |||
1701 | /* write program headers for segments dump */ | ||
1702 | for ( | ||
1703 | #ifdef CONFIG_MMU | ||
1704 | vma = current->mm->mmap; vma; vma = vma->vm_next | ||
1705 | #else | ||
1706 | vml = current->mm->context.vmlist; vml; vml = vml->next | ||
1707 | #endif | ||
1708 | ) { | ||
1709 | struct elf_phdr phdr; | ||
1710 | size_t sz; | ||
1711 | |||
1712 | #ifndef CONFIG_MMU | ||
1713 | vma = vml->vma; | ||
1714 | #endif | ||
1715 | |||
1716 | sz = vma->vm_end - vma->vm_start; | ||
1717 | |||
1718 | phdr.p_type = PT_LOAD; | ||
1719 | phdr.p_offset = offset; | ||
1720 | phdr.p_vaddr = vma->vm_start; | ||
1721 | phdr.p_paddr = 0; | ||
1722 | phdr.p_filesz = maydump(vma) ? sz : 0; | ||
1723 | phdr.p_memsz = sz; | ||
1724 | offset += phdr.p_filesz; | ||
1725 | phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0; | ||
1726 | if (vma->vm_flags & VM_WRITE) | ||
1727 | phdr.p_flags |= PF_W; | ||
1728 | if (vma->vm_flags & VM_EXEC) | ||
1729 | phdr.p_flags |= PF_X; | ||
1730 | phdr.p_align = ELF_EXEC_PAGESIZE; | ||
1731 | |||
1732 | DUMP_WRITE(&phdr, sizeof(phdr)); | ||
1733 | } | ||
1734 | |||
1735 | #ifdef ELF_CORE_WRITE_EXTRA_PHDRS | ||
1736 | ELF_CORE_WRITE_EXTRA_PHDRS; | ||
1737 | #endif | ||
1738 | |||
1739 | /* write out the notes section */ | ||
1740 | for (i = 0; i < numnote; i++) | ||
1741 | if (!writenote(notes + i, file)) | ||
1742 | goto end_coredump; | ||
1743 | |||
1744 | /* write out the thread status notes section */ | ||
1745 | list_for_each(t, &thread_list) { | ||
1746 | struct elf_thread_status *tmp = | ||
1747 | list_entry(t, struct elf_thread_status, list); | ||
1748 | |||
1749 | for (i = 0; i < tmp->num_notes; i++) | ||
1750 | if (!writenote(&tmp->notes[i], file)) | ||
1751 | goto end_coredump; | ||
1752 | } | ||
1753 | |||
1754 | DUMP_SEEK(dataoff); | ||
1755 | |||
1756 | if (elf_fdpic_dump_segments(file, current->mm, &size, &limit) < 0) | ||
1757 | goto end_coredump; | ||
1758 | |||
1759 | #ifdef ELF_CORE_WRITE_EXTRA_DATA | ||
1760 | ELF_CORE_WRITE_EXTRA_DATA; | ||
1761 | #endif | ||
1762 | |||
1763 | if (file->f_pos != offset) { | ||
1764 | /* Sanity check */ | ||
1765 | printk(KERN_WARNING | ||
1766 | "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n", | ||
1767 | file->f_pos, offset); | ||
1768 | } | ||
1769 | |||
1770 | end_coredump: | ||
1771 | set_fs(fs); | ||
1772 | |||
1773 | cleanup: | ||
1774 | while (!list_empty(&thread_list)) { | ||
1775 | struct list_head *tmp = thread_list.next; | ||
1776 | list_del(tmp); | ||
1777 | kfree(list_entry(tmp, struct elf_thread_status, list)); | ||
1778 | } | ||
1779 | |||
1780 | kfree(elf); | ||
1781 | kfree(prstatus); | ||
1782 | kfree(psinfo); | ||
1783 | kfree(notes); | ||
1784 | kfree(fpu); | ||
1785 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1786 | kfree(xfpu); | ||
1787 | #endif | ||
1788 | return has_dumped; | ||
1789 | #undef NUM_NOTES | ||
1790 | } | ||
1791 | |||
1792 | #endif /* USE_ELF_CORE_DUMP */ | ||
diff --git a/fs/block_dev.c b/fs/block_dev.c index 9633a490dab0..045f98854f14 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -739,7 +739,7 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, | |||
739 | if (!bo) | 739 | if (!bo) |
740 | return -ENOMEM; | 740 | return -ENOMEM; |
741 | 741 | ||
742 | mutex_lock(&bdev->bd_mutex); | 742 | mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); |
743 | res = bd_claim(bdev, holder); | 743 | res = bd_claim(bdev, holder); |
744 | if (res || !add_bd_holder(bdev, bo)) | 744 | if (res || !add_bd_holder(bdev, bo)) |
745 | free_bd_holder(bo); | 745 | free_bd_holder(bo); |
@@ -764,7 +764,7 @@ static void bd_release_from_kobject(struct block_device *bdev, | |||
764 | if (!kobj) | 764 | if (!kobj) |
765 | return; | 765 | return; |
766 | 766 | ||
767 | mutex_lock(&bdev->bd_mutex); | 767 | mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); |
768 | bd_release(bdev); | 768 | bd_release(bdev); |
769 | if ((bo = del_bd_holder(bdev, kobj))) | 769 | if ((bo = del_bd_holder(bdev, kobj))) |
770 | free_bd_holder(bo); | 770 | free_bd_holder(bo); |
@@ -822,6 +822,22 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode) | |||
822 | 822 | ||
823 | EXPORT_SYMBOL(open_by_devnum); | 823 | EXPORT_SYMBOL(open_by_devnum); |
824 | 824 | ||
825 | static int | ||
826 | blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags); | ||
827 | |||
828 | struct block_device *open_partition_by_devnum(dev_t dev, unsigned mode) | ||
829 | { | ||
830 | struct block_device *bdev = bdget(dev); | ||
831 | int err = -ENOMEM; | ||
832 | int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY; | ||
833 | if (bdev) | ||
834 | err = blkdev_get_partition(bdev, mode, flags); | ||
835 | return err ? ERR_PTR(err) : bdev; | ||
836 | } | ||
837 | |||
838 | EXPORT_SYMBOL(open_partition_by_devnum); | ||
839 | |||
840 | |||
825 | /* | 841 | /* |
826 | * This routine checks whether a removable media has been changed, | 842 | * This routine checks whether a removable media has been changed, |
827 | * and invalidates all buffer-cache-entries in that case. This | 843 | * and invalidates all buffer-cache-entries in that case. This |
@@ -868,7 +884,66 @@ void bd_set_size(struct block_device *bdev, loff_t size) | |||
868 | } | 884 | } |
869 | EXPORT_SYMBOL(bd_set_size); | 885 | EXPORT_SYMBOL(bd_set_size); |
870 | 886 | ||
871 | static int do_open(struct block_device *bdev, struct file *file) | 887 | static int __blkdev_put(struct block_device *bdev, unsigned int subclass) |
888 | { | ||
889 | int ret = 0; | ||
890 | struct inode *bd_inode = bdev->bd_inode; | ||
891 | struct gendisk *disk = bdev->bd_disk; | ||
892 | |||
893 | mutex_lock_nested(&bdev->bd_mutex, subclass); | ||
894 | lock_kernel(); | ||
895 | if (!--bdev->bd_openers) { | ||
896 | sync_blockdev(bdev); | ||
897 | kill_bdev(bdev); | ||
898 | } | ||
899 | if (bdev->bd_contains == bdev) { | ||
900 | if (disk->fops->release) | ||
901 | ret = disk->fops->release(bd_inode, NULL); | ||
902 | } else { | ||
903 | mutex_lock_nested(&bdev->bd_contains->bd_mutex, | ||
904 | subclass + 1); | ||
905 | bdev->bd_contains->bd_part_count--; | ||
906 | mutex_unlock(&bdev->bd_contains->bd_mutex); | ||
907 | } | ||
908 | if (!bdev->bd_openers) { | ||
909 | struct module *owner = disk->fops->owner; | ||
910 | |||
911 | put_disk(disk); | ||
912 | module_put(owner); | ||
913 | |||
914 | if (bdev->bd_contains != bdev) { | ||
915 | kobject_put(&bdev->bd_part->kobj); | ||
916 | bdev->bd_part = NULL; | ||
917 | } | ||
918 | bdev->bd_disk = NULL; | ||
919 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | ||
920 | if (bdev != bdev->bd_contains) | ||
921 | __blkdev_put(bdev->bd_contains, subclass + 1); | ||
922 | bdev->bd_contains = NULL; | ||
923 | } | ||
924 | unlock_kernel(); | ||
925 | mutex_unlock(&bdev->bd_mutex); | ||
926 | bdput(bdev); | ||
927 | return ret; | ||
928 | } | ||
929 | |||
930 | int blkdev_put(struct block_device *bdev) | ||
931 | { | ||
932 | return __blkdev_put(bdev, BD_MUTEX_NORMAL); | ||
933 | } | ||
934 | EXPORT_SYMBOL(blkdev_put); | ||
935 | |||
936 | int blkdev_put_partition(struct block_device *bdev) | ||
937 | { | ||
938 | return __blkdev_put(bdev, BD_MUTEX_PARTITION); | ||
939 | } | ||
940 | EXPORT_SYMBOL(blkdev_put_partition); | ||
941 | |||
942 | static int | ||
943 | blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags); | ||
944 | |||
945 | static int | ||
946 | do_open(struct block_device *bdev, struct file *file, unsigned int subclass) | ||
872 | { | 947 | { |
873 | struct module *owner = NULL; | 948 | struct module *owner = NULL; |
874 | struct gendisk *disk; | 949 | struct gendisk *disk; |
@@ -885,7 +960,8 @@ static int do_open(struct block_device *bdev, struct file *file) | |||
885 | } | 960 | } |
886 | owner = disk->fops->owner; | 961 | owner = disk->fops->owner; |
887 | 962 | ||
888 | mutex_lock(&bdev->bd_mutex); | 963 | mutex_lock_nested(&bdev->bd_mutex, subclass); |
964 | |||
889 | if (!bdev->bd_openers) { | 965 | if (!bdev->bd_openers) { |
890 | bdev->bd_disk = disk; | 966 | bdev->bd_disk = disk; |
891 | bdev->bd_contains = bdev; | 967 | bdev->bd_contains = bdev; |
@@ -912,11 +988,11 @@ static int do_open(struct block_device *bdev, struct file *file) | |||
912 | ret = -ENOMEM; | 988 | ret = -ENOMEM; |
913 | if (!whole) | 989 | if (!whole) |
914 | goto out_first; | 990 | goto out_first; |
915 | ret = blkdev_get(whole, file->f_mode, file->f_flags); | 991 | ret = blkdev_get_whole(whole, file->f_mode, file->f_flags); |
916 | if (ret) | 992 | if (ret) |
917 | goto out_first; | 993 | goto out_first; |
918 | bdev->bd_contains = whole; | 994 | bdev->bd_contains = whole; |
919 | mutex_lock(&whole->bd_mutex); | 995 | mutex_lock_nested(&whole->bd_mutex, BD_MUTEX_WHOLE); |
920 | whole->bd_part_count++; | 996 | whole->bd_part_count++; |
921 | p = disk->part[part - 1]; | 997 | p = disk->part[part - 1]; |
922 | bdev->bd_inode->i_data.backing_dev_info = | 998 | bdev->bd_inode->i_data.backing_dev_info = |
@@ -944,7 +1020,8 @@ static int do_open(struct block_device *bdev, struct file *file) | |||
944 | if (bdev->bd_invalidated) | 1020 | if (bdev->bd_invalidated) |
945 | rescan_partitions(bdev->bd_disk, bdev); | 1021 | rescan_partitions(bdev->bd_disk, bdev); |
946 | } else { | 1022 | } else { |
947 | mutex_lock(&bdev->bd_contains->bd_mutex); | 1023 | mutex_lock_nested(&bdev->bd_contains->bd_mutex, |
1024 | BD_MUTEX_PARTITION); | ||
948 | bdev->bd_contains->bd_part_count++; | 1025 | bdev->bd_contains->bd_part_count++; |
949 | mutex_unlock(&bdev->bd_contains->bd_mutex); | 1026 | mutex_unlock(&bdev->bd_contains->bd_mutex); |
950 | } | 1027 | } |
@@ -958,7 +1035,7 @@ out_first: | |||
958 | bdev->bd_disk = NULL; | 1035 | bdev->bd_disk = NULL; |
959 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1036 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; |
960 | if (bdev != bdev->bd_contains) | 1037 | if (bdev != bdev->bd_contains) |
961 | blkdev_put(bdev->bd_contains); | 1038 | __blkdev_put(bdev->bd_contains, BD_MUTEX_WHOLE); |
962 | bdev->bd_contains = NULL; | 1039 | bdev->bd_contains = NULL; |
963 | put_disk(disk); | 1040 | put_disk(disk); |
964 | module_put(owner); | 1041 | module_put(owner); |
@@ -985,11 +1062,49 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) | |||
985 | fake_file.f_dentry = &fake_dentry; | 1062 | fake_file.f_dentry = &fake_dentry; |
986 | fake_dentry.d_inode = bdev->bd_inode; | 1063 | fake_dentry.d_inode = bdev->bd_inode; |
987 | 1064 | ||
988 | return do_open(bdev, &fake_file); | 1065 | return do_open(bdev, &fake_file, BD_MUTEX_NORMAL); |
989 | } | 1066 | } |
990 | 1067 | ||
991 | EXPORT_SYMBOL(blkdev_get); | 1068 | EXPORT_SYMBOL(blkdev_get); |
992 | 1069 | ||
1070 | static int | ||
1071 | blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags) | ||
1072 | { | ||
1073 | /* | ||
1074 | * This crockload is due to bad choice of ->open() type. | ||
1075 | * It will go away. | ||
1076 | * For now, block device ->open() routine must _not_ | ||
1077 | * examine anything in 'inode' argument except ->i_rdev. | ||
1078 | */ | ||
1079 | struct file fake_file = {}; | ||
1080 | struct dentry fake_dentry = {}; | ||
1081 | fake_file.f_mode = mode; | ||
1082 | fake_file.f_flags = flags; | ||
1083 | fake_file.f_dentry = &fake_dentry; | ||
1084 | fake_dentry.d_inode = bdev->bd_inode; | ||
1085 | |||
1086 | return do_open(bdev, &fake_file, BD_MUTEX_WHOLE); | ||
1087 | } | ||
1088 | |||
1089 | static int | ||
1090 | blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags) | ||
1091 | { | ||
1092 | /* | ||
1093 | * This crockload is due to bad choice of ->open() type. | ||
1094 | * It will go away. | ||
1095 | * For now, block device ->open() routine must _not_ | ||
1096 | * examine anything in 'inode' argument except ->i_rdev. | ||
1097 | */ | ||
1098 | struct file fake_file = {}; | ||
1099 | struct dentry fake_dentry = {}; | ||
1100 | fake_file.f_mode = mode; | ||
1101 | fake_file.f_flags = flags; | ||
1102 | fake_file.f_dentry = &fake_dentry; | ||
1103 | fake_dentry.d_inode = bdev->bd_inode; | ||
1104 | |||
1105 | return do_open(bdev, &fake_file, BD_MUTEX_PARTITION); | ||
1106 | } | ||
1107 | |||
993 | static int blkdev_open(struct inode * inode, struct file * filp) | 1108 | static int blkdev_open(struct inode * inode, struct file * filp) |
994 | { | 1109 | { |
995 | struct block_device *bdev; | 1110 | struct block_device *bdev; |
@@ -1005,7 +1120,7 @@ static int blkdev_open(struct inode * inode, struct file * filp) | |||
1005 | 1120 | ||
1006 | bdev = bd_acquire(inode); | 1121 | bdev = bd_acquire(inode); |
1007 | 1122 | ||
1008 | res = do_open(bdev, filp); | 1123 | res = do_open(bdev, filp, BD_MUTEX_NORMAL); |
1009 | if (res) | 1124 | if (res) |
1010 | return res; | 1125 | return res; |
1011 | 1126 | ||
@@ -1019,51 +1134,6 @@ static int blkdev_open(struct inode * inode, struct file * filp) | |||
1019 | return res; | 1134 | return res; |
1020 | } | 1135 | } |
1021 | 1136 | ||
1022 | int blkdev_put(struct block_device *bdev) | ||
1023 | { | ||
1024 | int ret = 0; | ||
1025 | struct inode *bd_inode = bdev->bd_inode; | ||
1026 | struct gendisk *disk = bdev->bd_disk; | ||
1027 | |||
1028 | mutex_lock(&bdev->bd_mutex); | ||
1029 | lock_kernel(); | ||
1030 | if (!--bdev->bd_openers) { | ||
1031 | sync_blockdev(bdev); | ||
1032 | kill_bdev(bdev); | ||
1033 | } | ||
1034 | if (bdev->bd_contains == bdev) { | ||
1035 | if (disk->fops->release) | ||
1036 | ret = disk->fops->release(bd_inode, NULL); | ||
1037 | } else { | ||
1038 | mutex_lock(&bdev->bd_contains->bd_mutex); | ||
1039 | bdev->bd_contains->bd_part_count--; | ||
1040 | mutex_unlock(&bdev->bd_contains->bd_mutex); | ||
1041 | } | ||
1042 | if (!bdev->bd_openers) { | ||
1043 | struct module *owner = disk->fops->owner; | ||
1044 | |||
1045 | put_disk(disk); | ||
1046 | module_put(owner); | ||
1047 | |||
1048 | if (bdev->bd_contains != bdev) { | ||
1049 | kobject_put(&bdev->bd_part->kobj); | ||
1050 | bdev->bd_part = NULL; | ||
1051 | } | ||
1052 | bdev->bd_disk = NULL; | ||
1053 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | ||
1054 | if (bdev != bdev->bd_contains) { | ||
1055 | blkdev_put(bdev->bd_contains); | ||
1056 | } | ||
1057 | bdev->bd_contains = NULL; | ||
1058 | } | ||
1059 | unlock_kernel(); | ||
1060 | mutex_unlock(&bdev->bd_mutex); | ||
1061 | bdput(bdev); | ||
1062 | return ret; | ||
1063 | } | ||
1064 | |||
1065 | EXPORT_SYMBOL(blkdev_put); | ||
1066 | |||
1067 | static int blkdev_close(struct inode * inode, struct file * filp) | 1137 | static int blkdev_close(struct inode * inode, struct file * filp) |
1068 | { | 1138 | { |
1069 | struct block_device *bdev = I_BDEV(filp->f_mapping->host); | 1139 | struct block_device *bdev = I_BDEV(filp->f_mapping->host); |
diff --git a/fs/buffer.c b/fs/buffer.c index 3660dcb97591..71649ef9b658 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -470,13 +470,18 @@ out: | |||
470 | pass does the actual I/O. */ | 470 | pass does the actual I/O. */ |
471 | void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) | 471 | void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) |
472 | { | 472 | { |
473 | struct address_space *mapping = bdev->bd_inode->i_mapping; | ||
474 | |||
475 | if (mapping->nrpages == 0) | ||
476 | return; | ||
477 | |||
473 | invalidate_bh_lrus(); | 478 | invalidate_bh_lrus(); |
474 | /* | 479 | /* |
475 | * FIXME: what about destroy_dirty_buffers? | 480 | * FIXME: what about destroy_dirty_buffers? |
476 | * We really want to use invalidate_inode_pages2() for | 481 | * We really want to use invalidate_inode_pages2() for |
477 | * that, but not until that's cleaned up. | 482 | * that, but not until that's cleaned up. |
478 | */ | 483 | */ |
479 | invalidate_inode_pages(bdev->bd_inode->i_mapping); | 484 | invalidate_inode_pages(mapping); |
480 | } | 485 | } |
481 | 486 | ||
482 | /* | 487 | /* |
diff --git a/fs/char_dev.c b/fs/char_dev.c index a4cbc6706ef0..3483d3cf8087 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -182,6 +182,28 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count, | |||
182 | return 0; | 182 | return 0; |
183 | } | 183 | } |
184 | 184 | ||
185 | /** | ||
186 | * register_chrdev() - Register a major number for character devices. | ||
187 | * @major: major device number or 0 for dynamic allocation | ||
188 | * @name: name of this range of devices | ||
189 | * @fops: file operations associated with this devices | ||
190 | * | ||
191 | * If @major == 0 this functions will dynamically allocate a major and return | ||
192 | * its number. | ||
193 | * | ||
194 | * If @major > 0 this function will attempt to reserve a device with the given | ||
195 | * major number and will return zero on success. | ||
196 | * | ||
197 | * Returns a -ve errno on failure. | ||
198 | * | ||
199 | * The name of this device has nothing to do with the name of the device in | ||
200 | * /dev. It only helps to keep track of the different owners of devices. If | ||
201 | * your module name has only one type of devices it's ok to use e.g. the name | ||
202 | * of the module here. | ||
203 | * | ||
204 | * This function registers a range of 256 minor numbers. The first minor number | ||
205 | * is 0. | ||
206 | */ | ||
185 | int register_chrdev(unsigned int major, const char *name, | 207 | int register_chrdev(unsigned int major, const char *name, |
186 | const struct file_operations *fops) | 208 | const struct file_operations *fops) |
187 | { | 209 | { |
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index a61d17ed1827..0feb3bd49cb8 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES | |||
@@ -1,3 +1,13 @@ | |||
1 | Version 1.45 | ||
2 | ------------ | ||
3 | Do not time out lockw calls when using posix extensions. Do not | ||
4 | time out requests if server still responding reasonably fast | ||
5 | on requests on other threads. Improve POSIX locking emulation, | ||
6 | (lock cancel now works, and unlock of merged range works even | ||
7 | to Windows servers now). Fix oops on mount to lanman servers | ||
8 | (win9x, os/2 etc.) when null password. Do not send listxattr | ||
9 | (SMB to query all EAs) if nouser_xattr specified. | ||
10 | |||
1 | Version 1.44 | 11 | Version 1.44 |
2 | ------------ | 12 | ------------ |
3 | Rewritten sessionsetup support, including support for legacy SMB | 13 | Rewritten sessionsetup support, including support for legacy SMB |
diff --git a/fs/cifs/README b/fs/cifs/README index 7986d0d97ace..5f0e1bd64fee 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -408,7 +408,7 @@ A partial list of the supported mount options follows: | |||
408 | user_xattr Allow getting and setting user xattrs as OS/2 EAs (extended | 408 | user_xattr Allow getting and setting user xattrs as OS/2 EAs (extended |
409 | attributes) to the server (default) e.g. via setfattr | 409 | attributes) to the server (default) e.g. via setfattr |
410 | and getfattr utilities. | 410 | and getfattr utilities. |
411 | nouser_xattr Do not allow getfattr/setfattr to get/set xattrs | 411 | nouser_xattr Do not allow getfattr/setfattr to get/set/list xattrs |
412 | mapchars Translate six of the seven reserved characters (not backslash) | 412 | mapchars Translate six of the seven reserved characters (not backslash) |
413 | *?<>|: | 413 | *?<>|: |
414 | to the remap range (above 0xF000), which also | 414 | to the remap range (above 0xF000), which also |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index a89efaf78a26..4bc250b2d9fc 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -277,7 +277,8 @@ void calc_lanman_hash(struct cifsSesInfo * ses, char * lnm_session_key) | |||
277 | return; | 277 | return; |
278 | 278 | ||
279 | memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); | 279 | memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); |
280 | strncpy(password_with_pad, ses->password, CIFS_ENCPWD_SIZE); | 280 | if(ses->password) |
281 | strncpy(password_with_pad, ses->password, CIFS_ENCPWD_SIZE); | ||
281 | 282 | ||
282 | if((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) | 283 | if((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) |
283 | if(extended_security & CIFSSEC_MAY_PLNTXT) { | 284 | if(extended_security & CIFSSEC_MAY_PLNTXT) { |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c28ede599946..3cd750029be2 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -402,7 +402,6 @@ static struct quotactl_ops cifs_quotactl_ops = { | |||
402 | }; | 402 | }; |
403 | #endif | 403 | #endif |
404 | 404 | ||
405 | #ifdef CONFIG_CIFS_EXPERIMENTAL | ||
406 | static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags) | 405 | static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags) |
407 | { | 406 | { |
408 | struct cifs_sb_info *cifs_sb; | 407 | struct cifs_sb_info *cifs_sb; |
@@ -422,7 +421,7 @@ static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags) | |||
422 | tcon->tidStatus = CifsExiting; | 421 | tcon->tidStatus = CifsExiting; |
423 | up(&tcon->tconSem); | 422 | up(&tcon->tconSem); |
424 | 423 | ||
425 | /* cancel_brl_requests(tcon); */ | 424 | /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ |
426 | /* cancel_notify_requests(tcon); */ | 425 | /* cancel_notify_requests(tcon); */ |
427 | if(tcon->ses && tcon->ses->server) | 426 | if(tcon->ses && tcon->ses->server) |
428 | { | 427 | { |
@@ -438,7 +437,6 @@ static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags) | |||
438 | 437 | ||
439 | return; | 438 | return; |
440 | } | 439 | } |
441 | #endif | ||
442 | 440 | ||
443 | static int cifs_remount(struct super_block *sb, int *flags, char *data) | 441 | static int cifs_remount(struct super_block *sb, int *flags, char *data) |
444 | { | 442 | { |
@@ -457,9 +455,7 @@ struct super_operations cifs_super_ops = { | |||
457 | unless later we add lazy close of inodes or unless the kernel forgets to call | 455 | unless later we add lazy close of inodes or unless the kernel forgets to call |
458 | us with the same number of releases (closes) as opens */ | 456 | us with the same number of releases (closes) as opens */ |
459 | .show_options = cifs_show_options, | 457 | .show_options = cifs_show_options, |
460 | #ifdef CONFIG_CIFS_EXPERIMENTAL | ||
461 | .umount_begin = cifs_umount_begin, | 458 | .umount_begin = cifs_umount_begin, |
462 | #endif | ||
463 | .remount_fs = cifs_remount, | 459 | .remount_fs = cifs_remount, |
464 | }; | 460 | }; |
465 | 461 | ||
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 8f75c6f24701..39ee8ef3bdeb 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -100,5 +100,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); | |||
100 | extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); | 100 | extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); |
101 | extern int cifs_ioctl (struct inode * inode, struct file * filep, | 101 | extern int cifs_ioctl (struct inode * inode, struct file * filep, |
102 | unsigned int command, unsigned long arg); | 102 | unsigned int command, unsigned long arg); |
103 | #define CIFS_VERSION "1.44" | 103 | #define CIFS_VERSION "1.45" |
104 | #endif /* _CIFSFS_H */ | 104 | #endif /* _CIFSFS_H */ |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6d7cf5f3bc0b..b24006c47df1 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -3,6 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Copyright (C) International Business Machines Corp., 2002,2006 | 4 | * Copyright (C) International Business Machines Corp., 2002,2006 |
5 | * Author(s): Steve French (sfrench@us.ibm.com) | 5 | * Author(s): Steve French (sfrench@us.ibm.com) |
6 | * Jeremy Allison (jra@samba.org) | ||
6 | * | 7 | * |
7 | * This library is free software; you can redistribute it and/or modify | 8 | * This library is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU Lesser General Public License as published | 9 | * it under the terms of the GNU Lesser General Public License as published |
@@ -158,7 +159,8 @@ struct TCP_Server_Info { | |||
158 | /* 16th byte of RFC1001 workstation name is always null */ | 159 | /* 16th byte of RFC1001 workstation name is always null */ |
159 | char workstation_RFC1001_name[SERVER_NAME_LEN_WITH_NULL]; | 160 | char workstation_RFC1001_name[SERVER_NAME_LEN_WITH_NULL]; |
160 | __u32 sequence_number; /* needed for CIFS PDU signature */ | 161 | __u32 sequence_number; /* needed for CIFS PDU signature */ |
161 | char mac_signing_key[CIFS_SESS_KEY_SIZE + 16]; | 162 | char mac_signing_key[CIFS_SESS_KEY_SIZE + 16]; |
163 | unsigned long lstrp; /* when we got last response from this server */ | ||
162 | }; | 164 | }; |
163 | 165 | ||
164 | /* | 166 | /* |
@@ -266,14 +268,14 @@ struct cifsTconInfo { | |||
266 | }; | 268 | }; |
267 | 269 | ||
268 | /* | 270 | /* |
269 | * This info hangs off the cifsFileInfo structure. This is used to track | 271 | * This info hangs off the cifsFileInfo structure, pointed to by llist. |
270 | * byte stream locks on the file | 272 | * This is used to track byte stream locks on the file |
271 | */ | 273 | */ |
272 | struct cifsLockInfo { | 274 | struct cifsLockInfo { |
273 | struct cifsLockInfo *next; | 275 | struct list_head llist; /* pointer to next cifsLockInfo */ |
274 | int start; | 276 | __u64 offset; |
275 | int length; | 277 | __u64 length; |
276 | int type; | 278 | __u8 type; |
277 | }; | 279 | }; |
278 | 280 | ||
279 | /* | 281 | /* |
@@ -304,6 +306,8 @@ struct cifsFileInfo { | |||
304 | /* lock scope id (0 if none) */ | 306 | /* lock scope id (0 if none) */ |
305 | struct file * pfile; /* needed for writepage */ | 307 | struct file * pfile; /* needed for writepage */ |
306 | struct inode * pInode; /* needed for oplock break */ | 308 | struct inode * pInode; /* needed for oplock break */ |
309 | struct semaphore lock_sem; | ||
310 | struct list_head llist; /* list of byte range locks we have. */ | ||
307 | unsigned closePend:1; /* file is marked to close */ | 311 | unsigned closePend:1; /* file is marked to close */ |
308 | unsigned invalidHandle:1; /* file closed via session abend */ | 312 | unsigned invalidHandle:1; /* file closed via session abend */ |
309 | atomic_t wrtPending; /* handle in use - defer close */ | 313 | atomic_t wrtPending; /* handle in use - defer close */ |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index a5ddc62d6fe6..b35c55c3c8bb 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -50,6 +50,10 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *, | |||
50 | extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *, | 50 | extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *, |
51 | struct kvec *, int /* nvec to send */, | 51 | struct kvec *, int /* nvec to send */, |
52 | int * /* type of buf returned */ , const int long_op); | 52 | int * /* type of buf returned */ , const int long_op); |
53 | extern int SendReceiveBlockingLock(const unsigned int /* xid */ , struct cifsTconInfo *, | ||
54 | struct smb_hdr * /* input */ , | ||
55 | struct smb_hdr * /* out */ , | ||
56 | int * /* bytes returned */); | ||
53 | extern int checkSMBhdr(struct smb_hdr *smb, __u16 mid); | 57 | extern int checkSMBhdr(struct smb_hdr *smb, __u16 mid); |
54 | extern int checkSMB(struct smb_hdr *smb, __u16 mid, int length); | 58 | extern int checkSMB(struct smb_hdr *smb, __u16 mid, int length); |
55 | extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *); | 59 | extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *); |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 19678c575dfc..075d8fb3d376 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -477,7 +477,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) | |||
477 | /* BB get server time for time conversions and add | 477 | /* BB get server time for time conversions and add |
478 | code to use it and timezone since this is not UTC */ | 478 | code to use it and timezone since this is not UTC */ |
479 | 479 | ||
480 | if (rsp->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { | 480 | if (rsp->EncryptionKeyLength == cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { |
481 | memcpy(server->cryptKey, rsp->EncryptionKey, | 481 | memcpy(server->cryptKey, rsp->EncryptionKey, |
482 | CIFS_CRYPTO_KEY_SIZE); | 482 | CIFS_CRYPTO_KEY_SIZE); |
483 | } else if (server->secMode & SECMODE_PW_ENCRYPT) { | 483 | } else if (server->secMode & SECMODE_PW_ENCRYPT) { |
@@ -1460,8 +1460,13 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, | |||
1460 | pSMB->hdr.smb_buf_length += count; | 1460 | pSMB->hdr.smb_buf_length += count; |
1461 | pSMB->ByteCount = cpu_to_le16(count); | 1461 | pSMB->ByteCount = cpu_to_le16(count); |
1462 | 1462 | ||
1463 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | 1463 | if (waitFlag) { |
1464 | rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, | ||
1465 | (struct smb_hdr *) pSMBr, &bytes_returned); | ||
1466 | } else { | ||
1467 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | ||
1464 | (struct smb_hdr *) pSMBr, &bytes_returned, timeout); | 1468 | (struct smb_hdr *) pSMBr, &bytes_returned, timeout); |
1469 | } | ||
1465 | cifs_stats_inc(&tcon->num_locks); | 1470 | cifs_stats_inc(&tcon->num_locks); |
1466 | if (rc) { | 1471 | if (rc) { |
1467 | cFYI(1, ("Send error in Lock = %d", rc)); | 1472 | cFYI(1, ("Send error in Lock = %d", rc)); |
@@ -1484,6 +1489,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, | |||
1484 | char *data_offset; | 1489 | char *data_offset; |
1485 | struct cifs_posix_lock *parm_data; | 1490 | struct cifs_posix_lock *parm_data; |
1486 | int rc = 0; | 1491 | int rc = 0; |
1492 | int timeout = 0; | ||
1487 | int bytes_returned = 0; | 1493 | int bytes_returned = 0; |
1488 | __u16 params, param_offset, offset, byte_count, count; | 1494 | __u16 params, param_offset, offset, byte_count, count; |
1489 | 1495 | ||
@@ -1503,7 +1509,6 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, | |||
1503 | pSMB->MaxSetupCount = 0; | 1509 | pSMB->MaxSetupCount = 0; |
1504 | pSMB->Reserved = 0; | 1510 | pSMB->Reserved = 0; |
1505 | pSMB->Flags = 0; | 1511 | pSMB->Flags = 0; |
1506 | pSMB->Timeout = 0; | ||
1507 | pSMB->Reserved2 = 0; | 1512 | pSMB->Reserved2 = 0; |
1508 | param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; | 1513 | param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; |
1509 | offset = param_offset + params; | 1514 | offset = param_offset + params; |
@@ -1529,8 +1534,13 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, | |||
1529 | (((char *) &pSMB->hdr.Protocol) + offset); | 1534 | (((char *) &pSMB->hdr.Protocol) + offset); |
1530 | 1535 | ||
1531 | parm_data->lock_type = cpu_to_le16(lock_type); | 1536 | parm_data->lock_type = cpu_to_le16(lock_type); |
1532 | if(waitFlag) | 1537 | if(waitFlag) { |
1538 | timeout = 3; /* blocking operation, no timeout */ | ||
1533 | parm_data->lock_flags = cpu_to_le16(1); | 1539 | parm_data->lock_flags = cpu_to_le16(1); |
1540 | pSMB->Timeout = cpu_to_le32(-1); | ||
1541 | } else | ||
1542 | pSMB->Timeout = 0; | ||
1543 | |||
1534 | parm_data->pid = cpu_to_le32(current->tgid); | 1544 | parm_data->pid = cpu_to_le32(current->tgid); |
1535 | parm_data->start = cpu_to_le64(pLockData->fl_start); | 1545 | parm_data->start = cpu_to_le64(pLockData->fl_start); |
1536 | parm_data->length = cpu_to_le64(len); /* normalize negative numbers */ | 1546 | parm_data->length = cpu_to_le64(len); /* normalize negative numbers */ |
@@ -1541,8 +1551,14 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, | |||
1541 | pSMB->Reserved4 = 0; | 1551 | pSMB->Reserved4 = 0; |
1542 | pSMB->hdr.smb_buf_length += byte_count; | 1552 | pSMB->hdr.smb_buf_length += byte_count; |
1543 | pSMB->ByteCount = cpu_to_le16(byte_count); | 1553 | pSMB->ByteCount = cpu_to_le16(byte_count); |
1544 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | 1554 | if (waitFlag) { |
1545 | (struct smb_hdr *) pSMBr, &bytes_returned, 0); | 1555 | rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, |
1556 | (struct smb_hdr *) pSMBr, &bytes_returned); | ||
1557 | } else { | ||
1558 | rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, | ||
1559 | (struct smb_hdr *) pSMBr, &bytes_returned, timeout); | ||
1560 | } | ||
1561 | |||
1546 | if (rc) { | 1562 | if (rc) { |
1547 | cFYI(1, ("Send error in Posix Lock = %d", rc)); | 1563 | cFYI(1, ("Send error in Posix Lock = %d", rc)); |
1548 | } else if (get_flag) { | 1564 | } else if (get_flag) { |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 876eb9ef85fe..5d394c726860 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -182,6 +182,7 @@ cifs_reconnect(struct TCP_Server_Info *server) | |||
182 | 182 | ||
183 | while ((server->tcpStatus != CifsExiting) && (server->tcpStatus != CifsGood)) | 183 | while ((server->tcpStatus != CifsExiting) && (server->tcpStatus != CifsGood)) |
184 | { | 184 | { |
185 | try_to_freeze(); | ||
185 | if(server->protocolType == IPV6) { | 186 | if(server->protocolType == IPV6) { |
186 | rc = ipv6_connect(&server->addr.sockAddr6,&server->ssocket); | 187 | rc = ipv6_connect(&server->addr.sockAddr6,&server->ssocket); |
187 | } else { | 188 | } else { |
@@ -612,6 +613,10 @@ multi_t2_fnd: | |||
612 | #ifdef CONFIG_CIFS_STATS2 | 613 | #ifdef CONFIG_CIFS_STATS2 |
613 | mid_entry->when_received = jiffies; | 614 | mid_entry->when_received = jiffies; |
614 | #endif | 615 | #endif |
616 | /* so we do not time out requests to server | ||
617 | which is still responding (since server could | ||
618 | be busy but not dead) */ | ||
619 | server->lstrp = jiffies; | ||
615 | break; | 620 | break; |
616 | } | 621 | } |
617 | } | 622 | } |
@@ -1266,33 +1271,35 @@ find_unc(__be32 new_target_ip_addr, char *uncName, char *userName) | |||
1266 | 1271 | ||
1267 | read_lock(&GlobalSMBSeslock); | 1272 | read_lock(&GlobalSMBSeslock); |
1268 | list_for_each(tmp, &GlobalTreeConnectionList) { | 1273 | list_for_each(tmp, &GlobalTreeConnectionList) { |
1269 | cFYI(1, ("Next tcon - ")); | 1274 | cFYI(1, ("Next tcon")); |
1270 | tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); | 1275 | tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); |
1271 | if (tcon->ses) { | 1276 | if (tcon->ses) { |
1272 | if (tcon->ses->server) { | 1277 | if (tcon->ses->server) { |
1273 | cFYI(1, | 1278 | cFYI(1, |
1274 | (" old ip addr: %x == new ip %x ?", | 1279 | ("old ip addr: %x == new ip %x ?", |
1275 | tcon->ses->server->addr.sockAddr.sin_addr. | 1280 | tcon->ses->server->addr.sockAddr.sin_addr. |
1276 | s_addr, new_target_ip_addr)); | 1281 | s_addr, new_target_ip_addr)); |
1277 | if (tcon->ses->server->addr.sockAddr.sin_addr. | 1282 | if (tcon->ses->server->addr.sockAddr.sin_addr. |
1278 | s_addr == new_target_ip_addr) { | 1283 | s_addr == new_target_ip_addr) { |
1279 | /* BB lock tcon and server and tcp session and increment use count here? */ | 1284 | /* BB lock tcon, server and tcp session and increment use count here? */ |
1280 | /* found a match on the TCP session */ | 1285 | /* found a match on the TCP session */ |
1281 | /* BB check if reconnection needed */ | 1286 | /* BB check if reconnection needed */ |
1282 | cFYI(1,("Matched ip, old UNC: %s == new: %s ?", | 1287 | cFYI(1,("IP match, old UNC: %s new: %s", |
1283 | tcon->treeName, uncName)); | 1288 | tcon->treeName, uncName)); |
1284 | if (strncmp | 1289 | if (strncmp |
1285 | (tcon->treeName, uncName, | 1290 | (tcon->treeName, uncName, |
1286 | MAX_TREE_SIZE) == 0) { | 1291 | MAX_TREE_SIZE) == 0) { |
1287 | cFYI(1, | 1292 | cFYI(1, |
1288 | ("Matched UNC, old user: %s == new: %s ?", | 1293 | ("and old usr: %s new: %s", |
1289 | tcon->treeName, uncName)); | 1294 | tcon->treeName, uncName)); |
1290 | if (strncmp | 1295 | if (strncmp |
1291 | (tcon->ses->userName, | 1296 | (tcon->ses->userName, |
1292 | userName, | 1297 | userName, |
1293 | MAX_USERNAME_SIZE) == 0) { | 1298 | MAX_USERNAME_SIZE) == 0) { |
1294 | read_unlock(&GlobalSMBSeslock); | 1299 | read_unlock(&GlobalSMBSeslock); |
1295 | return tcon;/* also matched user (smb session)*/ | 1300 | /* matched smb session |
1301 | (user name */ | ||
1302 | return tcon; | ||
1296 | } | 1303 | } |
1297 | } | 1304 | } |
1298 | } | 1305 | } |
@@ -1969,7 +1976,18 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | |||
1969 | } | 1976 | } |
1970 | 1977 | ||
1971 | cFYI(1,("Negotiate caps 0x%x",(int)cap)); | 1978 | cFYI(1,("Negotiate caps 0x%x",(int)cap)); |
1972 | 1979 | #ifdef CONFIG_CIFS_DEBUG2 | |
1980 | if(cap & CIFS_UNIX_FCNTL_CAP) | ||
1981 | cFYI(1,("FCNTL cap")); | ||
1982 | if(cap & CIFS_UNIX_EXTATTR_CAP) | ||
1983 | cFYI(1,("EXTATTR cap")); | ||
1984 | if(cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) | ||
1985 | cFYI(1,("POSIX path cap")); | ||
1986 | if(cap & CIFS_UNIX_XATTR_CAP) | ||
1987 | cFYI(1,("XATTR cap")); | ||
1988 | if(cap & CIFS_UNIX_POSIX_ACL_CAP) | ||
1989 | cFYI(1,("POSIX ACL cap")); | ||
1990 | #endif /* CIFS_DEBUG2 */ | ||
1973 | if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) { | 1991 | if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) { |
1974 | cFYI(1,("setting capabilities failed")); | 1992 | cFYI(1,("setting capabilities failed")); |
1975 | } | 1993 | } |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index ba4cbe9b0684..914239d53634 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -267,6 +267,10 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, | |||
267 | pCifsFile->invalidHandle = FALSE; | 267 | pCifsFile->invalidHandle = FALSE; |
268 | pCifsFile->closePend = FALSE; | 268 | pCifsFile->closePend = FALSE; |
269 | init_MUTEX(&pCifsFile->fh_sem); | 269 | init_MUTEX(&pCifsFile->fh_sem); |
270 | init_MUTEX(&pCifsFile->lock_sem); | ||
271 | INIT_LIST_HEAD(&pCifsFile->llist); | ||
272 | atomic_set(&pCifsFile->wrtPending,0); | ||
273 | |||
270 | /* set the following in open now | 274 | /* set the following in open now |
271 | pCifsFile->pfile = file; */ | 275 | pCifsFile->pfile = file; */ |
272 | write_lock(&GlobalSMBSeslock); | 276 | write_lock(&GlobalSMBSeslock); |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 944d2b9e092d..e9c5ba9084fc 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * | 5 | * |
6 | * Copyright (C) International Business Machines Corp., 2002,2003 | 6 | * Copyright (C) International Business Machines Corp., 2002,2003 |
7 | * Author(s): Steve French (sfrench@us.ibm.com) | 7 | * Author(s): Steve French (sfrench@us.ibm.com) |
8 | * Jeremy Allison (jra@samba.org) | ||
8 | * | 9 | * |
9 | * This library is free software; you can redistribute it and/or modify | 10 | * This library is free software; you can redistribute it and/or modify |
10 | * it under the terms of the GNU Lesser General Public License as published | 11 | * it under the terms of the GNU Lesser General Public License as published |
@@ -47,6 +48,8 @@ static inline struct cifsFileInfo *cifs_init_private( | |||
47 | private_data->netfid = netfid; | 48 | private_data->netfid = netfid; |
48 | private_data->pid = current->tgid; | 49 | private_data->pid = current->tgid; |
49 | init_MUTEX(&private_data->fh_sem); | 50 | init_MUTEX(&private_data->fh_sem); |
51 | init_MUTEX(&private_data->lock_sem); | ||
52 | INIT_LIST_HEAD(&private_data->llist); | ||
50 | private_data->pfile = file; /* needed for writepage */ | 53 | private_data->pfile = file; /* needed for writepage */ |
51 | private_data->pInode = inode; | 54 | private_data->pInode = inode; |
52 | private_data->invalidHandle = FALSE; | 55 | private_data->invalidHandle = FALSE; |
@@ -473,6 +476,8 @@ int cifs_close(struct inode *inode, struct file *file) | |||
473 | cifs_sb = CIFS_SB(inode->i_sb); | 476 | cifs_sb = CIFS_SB(inode->i_sb); |
474 | pTcon = cifs_sb->tcon; | 477 | pTcon = cifs_sb->tcon; |
475 | if (pSMBFile) { | 478 | if (pSMBFile) { |
479 | struct cifsLockInfo *li, *tmp; | ||
480 | |||
476 | pSMBFile->closePend = TRUE; | 481 | pSMBFile->closePend = TRUE; |
477 | if (pTcon) { | 482 | if (pTcon) { |
478 | /* no sense reconnecting to close a file that is | 483 | /* no sense reconnecting to close a file that is |
@@ -496,6 +501,16 @@ int cifs_close(struct inode *inode, struct file *file) | |||
496 | pSMBFile->netfid); | 501 | pSMBFile->netfid); |
497 | } | 502 | } |
498 | } | 503 | } |
504 | |||
505 | /* Delete any outstanding lock records. | ||
506 | We'll lose them when the file is closed anyway. */ | ||
507 | down(&pSMBFile->lock_sem); | ||
508 | list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) { | ||
509 | list_del(&li->llist); | ||
510 | kfree(li); | ||
511 | } | ||
512 | up(&pSMBFile->lock_sem); | ||
513 | |||
499 | write_lock(&GlobalSMBSeslock); | 514 | write_lock(&GlobalSMBSeslock); |
500 | list_del(&pSMBFile->flist); | 515 | list_del(&pSMBFile->flist); |
501 | list_del(&pSMBFile->tlist); | 516 | list_del(&pSMBFile->tlist); |
@@ -570,6 +585,21 @@ int cifs_closedir(struct inode *inode, struct file *file) | |||
570 | return rc; | 585 | return rc; |
571 | } | 586 | } |
572 | 587 | ||
588 | static int store_file_lock(struct cifsFileInfo *fid, __u64 len, | ||
589 | __u64 offset, __u8 lockType) | ||
590 | { | ||
591 | struct cifsLockInfo *li = kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); | ||
592 | if (li == NULL) | ||
593 | return -ENOMEM; | ||
594 | li->offset = offset; | ||
595 | li->length = len; | ||
596 | li->type = lockType; | ||
597 | down(&fid->lock_sem); | ||
598 | list_add(&li->llist, &fid->llist); | ||
599 | up(&fid->lock_sem); | ||
600 | return 0; | ||
601 | } | ||
602 | |||
573 | int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) | 603 | int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) |
574 | { | 604 | { |
575 | int rc, xid; | 605 | int rc, xid; |
@@ -581,6 +611,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) | |||
581 | struct cifsTconInfo *pTcon; | 611 | struct cifsTconInfo *pTcon; |
582 | __u16 netfid; | 612 | __u16 netfid; |
583 | __u8 lockType = LOCKING_ANDX_LARGE_FILES; | 613 | __u8 lockType = LOCKING_ANDX_LARGE_FILES; |
614 | int posix_locking; | ||
584 | 615 | ||
585 | length = 1 + pfLock->fl_end - pfLock->fl_start; | 616 | length = 1 + pfLock->fl_end - pfLock->fl_start; |
586 | rc = -EACCES; | 617 | rc = -EACCES; |
@@ -639,15 +670,14 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) | |||
639 | } | 670 | } |
640 | netfid = ((struct cifsFileInfo *)file->private_data)->netfid; | 671 | netfid = ((struct cifsFileInfo *)file->private_data)->netfid; |
641 | 672 | ||
673 | posix_locking = (cifs_sb->tcon->ses->capabilities & CAP_UNIX) && | ||
674 | (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability)); | ||
642 | 675 | ||
643 | /* BB add code here to normalize offset and length to | 676 | /* BB add code here to normalize offset and length to |
644 | account for negative length which we can not accept over the | 677 | account for negative length which we can not accept over the |
645 | wire */ | 678 | wire */ |
646 | if (IS_GETLK(cmd)) { | 679 | if (IS_GETLK(cmd)) { |
647 | if(experimEnabled && | 680 | if(posix_locking) { |
648 | (cifs_sb->tcon->ses->capabilities & CAP_UNIX) && | ||
649 | (CIFS_UNIX_FCNTL_CAP & | ||
650 | le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability))) { | ||
651 | int posix_lock_type; | 681 | int posix_lock_type; |
652 | if(lockType & LOCKING_ANDX_SHARED_LOCK) | 682 | if(lockType & LOCKING_ANDX_SHARED_LOCK) |
653 | posix_lock_type = CIFS_RDLCK; | 683 | posix_lock_type = CIFS_RDLCK; |
@@ -683,10 +713,15 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) | |||
683 | FreeXid(xid); | 713 | FreeXid(xid); |
684 | return rc; | 714 | return rc; |
685 | } | 715 | } |
686 | if (experimEnabled && | 716 | |
687 | (cifs_sb->tcon->ses->capabilities & CAP_UNIX) && | 717 | if (!numLock && !numUnlock) { |
688 | (CIFS_UNIX_FCNTL_CAP & | 718 | /* if no lock or unlock then nothing |
689 | le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability))) { | 719 | to do since we do not know what it is */ |
720 | FreeXid(xid); | ||
721 | return -EOPNOTSUPP; | ||
722 | } | ||
723 | |||
724 | if (posix_locking) { | ||
690 | int posix_lock_type; | 725 | int posix_lock_type; |
691 | if(lockType & LOCKING_ANDX_SHARED_LOCK) | 726 | if(lockType & LOCKING_ANDX_SHARED_LOCK) |
692 | posix_lock_type = CIFS_RDLCK; | 727 | posix_lock_type = CIFS_RDLCK; |
@@ -695,18 +730,46 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) | |||
695 | 730 | ||
696 | if(numUnlock == 1) | 731 | if(numUnlock == 1) |
697 | posix_lock_type = CIFS_UNLCK; | 732 | posix_lock_type = CIFS_UNLCK; |
698 | else if(numLock == 0) { | 733 | |
699 | /* if no lock or unlock then nothing | ||
700 | to do since we do not know what it is */ | ||
701 | FreeXid(xid); | ||
702 | return -EOPNOTSUPP; | ||
703 | } | ||
704 | rc = CIFSSMBPosixLock(xid, pTcon, netfid, 0 /* set */, | 734 | rc = CIFSSMBPosixLock(xid, pTcon, netfid, 0 /* set */, |
705 | length, pfLock, | 735 | length, pfLock, |
706 | posix_lock_type, wait_flag); | 736 | posix_lock_type, wait_flag); |
707 | } else | 737 | } else { |
708 | rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start, | 738 | struct cifsFileInfo *fid = (struct cifsFileInfo *)file->private_data; |
709 | numUnlock, numLock, lockType, wait_flag); | 739 | |
740 | if (numLock) { | ||
741 | rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start, | ||
742 | 0, numLock, lockType, wait_flag); | ||
743 | |||
744 | if (rc == 0) { | ||
745 | /* For Windows locks we must store them. */ | ||
746 | rc = store_file_lock(fid, length, | ||
747 | pfLock->fl_start, lockType); | ||
748 | } | ||
749 | } else if (numUnlock) { | ||
750 | /* For each stored lock that this unlock overlaps | ||
751 | completely, unlock it. */ | ||
752 | int stored_rc = 0; | ||
753 | struct cifsLockInfo *li, *tmp; | ||
754 | |||
755 | down(&fid->lock_sem); | ||
756 | list_for_each_entry_safe(li, tmp, &fid->llist, llist) { | ||
757 | if (pfLock->fl_start <= li->offset && | ||
758 | length >= li->length) { | ||
759 | stored_rc = CIFSSMBLock(xid, pTcon, netfid, | ||
760 | li->length, li->offset, | ||
761 | 1, 0, li->type, FALSE); | ||
762 | if (stored_rc) | ||
763 | rc = stored_rc; | ||
764 | |||
765 | list_del(&li->llist); | ||
766 | kfree(li); | ||
767 | } | ||
768 | } | ||
769 | up(&fid->lock_sem); | ||
770 | } | ||
771 | } | ||
772 | |||
710 | if (pfLock->fl_flags & FL_POSIX) | 773 | if (pfLock->fl_flags & FL_POSIX) |
711 | posix_lock_file_wait(file, pfLock); | 774 | posix_lock_file_wait(file, pfLock); |
712 | FreeXid(xid); | 775 | FreeXid(xid); |
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index b66eff5dc624..ce87550e918f 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c | |||
@@ -72,6 +72,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = { | |||
72 | {ERRinvlevel,-EOPNOTSUPP}, | 72 | {ERRinvlevel,-EOPNOTSUPP}, |
73 | {ERRdirnotempty, -ENOTEMPTY}, | 73 | {ERRdirnotempty, -ENOTEMPTY}, |
74 | {ERRnotlocked, -ENOLCK}, | 74 | {ERRnotlocked, -ENOLCK}, |
75 | {ERRcancelviolation, -ENOLCK}, | ||
75 | {ERRalreadyexists, -EEXIST}, | 76 | {ERRalreadyexists, -EEXIST}, |
76 | {ERRmoredata, -EOVERFLOW}, | 77 | {ERRmoredata, -EOVERFLOW}, |
77 | {ERReasnotsupported,-EOPNOTSUPP}, | 78 | {ERReasnotsupported,-EOPNOTSUPP}, |
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 03bbcb377913..9aeb58a7d369 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -82,7 +82,6 @@ static int construct_dentry(struct qstr *qstring, struct file *file, | |||
82 | if(*ptmp_inode == NULL) | 82 | if(*ptmp_inode == NULL) |
83 | return rc; | 83 | return rc; |
84 | rc = 1; | 84 | rc = 1; |
85 | d_instantiate(tmp_dentry, *ptmp_inode); | ||
86 | } | 85 | } |
87 | } else { | 86 | } else { |
88 | tmp_dentry = d_alloc(file->f_dentry, qstring); | 87 | tmp_dentry = d_alloc(file->f_dentry, qstring); |
@@ -99,9 +98,7 @@ static int construct_dentry(struct qstr *qstring, struct file *file, | |||
99 | tmp_dentry->d_op = &cifs_dentry_ops; | 98 | tmp_dentry->d_op = &cifs_dentry_ops; |
100 | if(*ptmp_inode == NULL) | 99 | if(*ptmp_inode == NULL) |
101 | return rc; | 100 | return rc; |
102 | rc = 1; | 101 | rc = 2; |
103 | d_instantiate(tmp_dentry, *ptmp_inode); | ||
104 | d_rehash(tmp_dentry); | ||
105 | } | 102 | } |
106 | 103 | ||
107 | tmp_dentry->d_time = jiffies; | 104 | tmp_dentry->d_time = jiffies; |
@@ -556,7 +553,7 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile) | |||
556 | FIND_FILE_STANDARD_INFO * pFindData = | 553 | FIND_FILE_STANDARD_INFO * pFindData = |
557 | (FIND_FILE_STANDARD_INFO *)current_entry; | 554 | (FIND_FILE_STANDARD_INFO *)current_entry; |
558 | filename = &pFindData->FileName[0]; | 555 | filename = &pFindData->FileName[0]; |
559 | len = le32_to_cpu(pFindData->FileNameLength); | 556 | len = pFindData->FileNameLength; |
560 | } else { | 557 | } else { |
561 | cFYI(1,("Unknown findfirst level %d",cfile->srch_inf.info_level)); | 558 | cFYI(1,("Unknown findfirst level %d",cfile->srch_inf.info_level)); |
562 | } | 559 | } |
@@ -870,6 +867,12 @@ static int cifs_filldir(char *pfindEntry, struct file *file, | |||
870 | pfindEntry, &obj_type, rc); | 867 | pfindEntry, &obj_type, rc); |
871 | else | 868 | else |
872 | fill_in_inode(tmp_inode, 1 /* NT */, pfindEntry, &obj_type, rc); | 869 | fill_in_inode(tmp_inode, 1 /* NT */, pfindEntry, &obj_type, rc); |
870 | |||
871 | if(rc) /* new inode - needs to be tied to dentry */ { | ||
872 | d_instantiate(tmp_dentry, tmp_inode); | ||
873 | if(rc == 2) | ||
874 | d_rehash(tmp_dentry); | ||
875 | } | ||
873 | 876 | ||
874 | 877 | ||
875 | rc = filldir(direntry,qstring.name,qstring.len,file->f_pos, | 878 | rc = filldir(direntry,qstring.name,qstring.len,file->f_pos, |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 7202d534ef0b..d1705ab8136e 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -372,7 +372,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
372 | 372 | ||
373 | /* no capabilities flags in old lanman negotiation */ | 373 | /* no capabilities flags in old lanman negotiation */ |
374 | 374 | ||
375 | pSMB->old_req.PasswordLength = CIFS_SESS_KEY_SIZE; | 375 | pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); |
376 | /* BB calculate hash with password */ | 376 | /* BB calculate hash with password */ |
377 | /* and copy into bcc */ | 377 | /* and copy into bcc */ |
378 | 378 | ||
diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h index cd41c67ff8d3..212c3c296409 100644 --- a/fs/cifs/smberr.h +++ b/fs/cifs/smberr.h | |||
@@ -95,6 +95,7 @@ | |||
95 | #define ERRinvlevel 124 | 95 | #define ERRinvlevel 124 |
96 | #define ERRdirnotempty 145 | 96 | #define ERRdirnotempty 145 |
97 | #define ERRnotlocked 158 | 97 | #define ERRnotlocked 158 |
98 | #define ERRcancelviolation 173 | ||
98 | #define ERRalreadyexists 183 | 99 | #define ERRalreadyexists 183 |
99 | #define ERRbadpipe 230 | 100 | #define ERRbadpipe 230 |
100 | #define ERRpipebusy 231 | 101 | #define ERRpipebusy 231 |
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 17ba329e2b3d..48d47b46b1fb 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
@@ -3,7 +3,8 @@ | |||
3 | * | 3 | * |
4 | * Copyright (C) International Business Machines Corp., 2002,2005 | 4 | * Copyright (C) International Business Machines Corp., 2002,2005 |
5 | * Author(s): Steve French (sfrench@us.ibm.com) | 5 | * Author(s): Steve French (sfrench@us.ibm.com) |
6 | * | 6 | * Jeremy Allison (jra@samba.org) 2006. |
7 | * | ||
7 | * This library is free software; you can redistribute it and/or modify | 8 | * This library is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU Lesser General Public License as published | 9 | * it under the terms of the GNU Lesser General Public License as published |
9 | * by the Free Software Foundation; either version 2.1 of the License, or | 10 | * by the Free Software Foundation; either version 2.1 of the License, or |
@@ -36,7 +37,7 @@ extern mempool_t *cifs_mid_poolp; | |||
36 | extern kmem_cache_t *cifs_oplock_cachep; | 37 | extern kmem_cache_t *cifs_oplock_cachep; |
37 | 38 | ||
38 | static struct mid_q_entry * | 39 | static struct mid_q_entry * |
39 | AllocMidQEntry(struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) | 40 | AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) |
40 | { | 41 | { |
41 | struct mid_q_entry *temp; | 42 | struct mid_q_entry *temp; |
42 | 43 | ||
@@ -203,6 +204,10 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, | |||
203 | rc = 0; | 204 | rc = 0; |
204 | } | 205 | } |
205 | 206 | ||
207 | /* Don't want to modify the buffer as a | ||
208 | side effect of this call. */ | ||
209 | smb_buffer->smb_buf_length = smb_buf_length; | ||
210 | |||
206 | return rc; | 211 | return rc; |
207 | } | 212 | } |
208 | 213 | ||
@@ -217,6 +222,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, | |||
217 | unsigned int len = iov[0].iov_len; | 222 | unsigned int len = iov[0].iov_len; |
218 | unsigned int total_len; | 223 | unsigned int total_len; |
219 | int first_vec = 0; | 224 | int first_vec = 0; |
225 | unsigned int smb_buf_length = smb_buffer->smb_buf_length; | ||
220 | 226 | ||
221 | if(ssocket == NULL) | 227 | if(ssocket == NULL) |
222 | return -ENOTSOCK; /* BB eventually add reconnect code here */ | 228 | return -ENOTSOCK; /* BB eventually add reconnect code here */ |
@@ -293,36 +299,15 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, | |||
293 | } else | 299 | } else |
294 | rc = 0; | 300 | rc = 0; |
295 | 301 | ||
302 | /* Don't want to modify the buffer as a | ||
303 | side effect of this call. */ | ||
304 | smb_buffer->smb_buf_length = smb_buf_length; | ||
305 | |||
296 | return rc; | 306 | return rc; |
297 | } | 307 | } |
298 | 308 | ||
299 | int | 309 | static int wait_for_free_request(struct cifsSesInfo *ses, const int long_op) |
300 | SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, | ||
301 | struct kvec *iov, int n_vec, int * pRespBufType /* ret */, | ||
302 | const int long_op) | ||
303 | { | 310 | { |
304 | int rc = 0; | ||
305 | unsigned int receive_len; | ||
306 | unsigned long timeout; | ||
307 | struct mid_q_entry *midQ; | ||
308 | struct smb_hdr *in_buf = iov[0].iov_base; | ||
309 | |||
310 | *pRespBufType = CIFS_NO_BUFFER; /* no response buf yet */ | ||
311 | |||
312 | if ((ses == NULL) || (ses->server == NULL)) { | ||
313 | cifs_small_buf_release(in_buf); | ||
314 | cERROR(1,("Null session")); | ||
315 | return -EIO; | ||
316 | } | ||
317 | |||
318 | if(ses->server->tcpStatus == CifsExiting) { | ||
319 | cifs_small_buf_release(in_buf); | ||
320 | return -ENOENT; | ||
321 | } | ||
322 | |||
323 | /* Ensure that we do not send more than 50 overlapping requests | ||
324 | to the same server. We may make this configurable later or | ||
325 | use ses->maxReq */ | ||
326 | if(long_op == -1) { | 311 | if(long_op == -1) { |
327 | /* oplock breaks must not be held up */ | 312 | /* oplock breaks must not be held up */ |
328 | atomic_inc(&ses->server->inFlight); | 313 | atomic_inc(&ses->server->inFlight); |
@@ -345,53 +330,140 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, | |||
345 | } else { | 330 | } else { |
346 | if(ses->server->tcpStatus == CifsExiting) { | 331 | if(ses->server->tcpStatus == CifsExiting) { |
347 | spin_unlock(&GlobalMid_Lock); | 332 | spin_unlock(&GlobalMid_Lock); |
348 | cifs_small_buf_release(in_buf); | ||
349 | return -ENOENT; | 333 | return -ENOENT; |
350 | } | 334 | } |
351 | 335 | ||
352 | /* can not count locking commands against total since | 336 | /* can not count locking commands against total since |
353 | they are allowed to block on server */ | 337 | they are allowed to block on server */ |
354 | 338 | ||
355 | if(long_op < 3) { | ||
356 | /* update # of requests on the wire to server */ | 339 | /* update # of requests on the wire to server */ |
340 | if (long_op < 3) | ||
357 | atomic_inc(&ses->server->inFlight); | 341 | atomic_inc(&ses->server->inFlight); |
358 | } | ||
359 | spin_unlock(&GlobalMid_Lock); | 342 | spin_unlock(&GlobalMid_Lock); |
360 | break; | 343 | break; |
361 | } | 344 | } |
362 | } | 345 | } |
363 | } | 346 | } |
364 | /* make sure that we sign in the same order that we send on this socket | 347 | return 0; |
365 | and avoid races inside tcp sendmsg code that could cause corruption | 348 | } |
366 | of smb data */ | ||
367 | |||
368 | down(&ses->server->tcpSem); | ||
369 | 349 | ||
350 | static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf, | ||
351 | struct mid_q_entry **ppmidQ) | ||
352 | { | ||
370 | if (ses->server->tcpStatus == CifsExiting) { | 353 | if (ses->server->tcpStatus == CifsExiting) { |
371 | rc = -ENOENT; | 354 | return -ENOENT; |
372 | goto out_unlock2; | ||
373 | } else if (ses->server->tcpStatus == CifsNeedReconnect) { | 355 | } else if (ses->server->tcpStatus == CifsNeedReconnect) { |
374 | cFYI(1,("tcp session dead - return to caller to retry")); | 356 | cFYI(1,("tcp session dead - return to caller to retry")); |
375 | rc = -EAGAIN; | 357 | return -EAGAIN; |
376 | goto out_unlock2; | ||
377 | } else if (ses->status != CifsGood) { | 358 | } else if (ses->status != CifsGood) { |
378 | /* check if SMB session is bad because we are setting it up */ | 359 | /* check if SMB session is bad because we are setting it up */ |
379 | if((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && | 360 | if((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && |
380 | (in_buf->Command != SMB_COM_NEGOTIATE)) { | 361 | (in_buf->Command != SMB_COM_NEGOTIATE)) { |
381 | rc = -EAGAIN; | 362 | return -EAGAIN; |
382 | goto out_unlock2; | ||
383 | } /* else ok - we are setting up session */ | 363 | } /* else ok - we are setting up session */ |
384 | } | 364 | } |
385 | midQ = AllocMidQEntry(in_buf, ses); | 365 | *ppmidQ = AllocMidQEntry(in_buf, ses); |
386 | if (midQ == NULL) { | 366 | if (*ppmidQ == NULL) { |
367 | return -ENOMEM; | ||
368 | } | ||
369 | return 0; | ||
370 | } | ||
371 | |||
372 | static int wait_for_response(struct cifsSesInfo *ses, | ||
373 | struct mid_q_entry *midQ, | ||
374 | unsigned long timeout, | ||
375 | unsigned long time_to_wait) | ||
376 | { | ||
377 | unsigned long curr_timeout; | ||
378 | |||
379 | for (;;) { | ||
380 | curr_timeout = timeout + jiffies; | ||
381 | wait_event(ses->server->response_q, | ||
382 | (!(midQ->midState == MID_REQUEST_SUBMITTED)) || | ||
383 | time_after(jiffies, curr_timeout) || | ||
384 | ((ses->server->tcpStatus != CifsGood) && | ||
385 | (ses->server->tcpStatus != CifsNew))); | ||
386 | |||
387 | if (time_after(jiffies, curr_timeout) && | ||
388 | (midQ->midState == MID_REQUEST_SUBMITTED) && | ||
389 | ((ses->server->tcpStatus == CifsGood) || | ||
390 | (ses->server->tcpStatus == CifsNew))) { | ||
391 | |||
392 | unsigned long lrt; | ||
393 | |||
394 | /* We timed out. Is the server still | ||
395 | sending replies ? */ | ||
396 | spin_lock(&GlobalMid_Lock); | ||
397 | lrt = ses->server->lstrp; | ||
398 | spin_unlock(&GlobalMid_Lock); | ||
399 | |||
400 | /* Calculate time_to_wait past last receive time. | ||
401 | Although we prefer not to time out if the | ||
402 | server is still responding - we will time | ||
403 | out if the server takes more than 15 (or 45 | ||
404 | or 180) seconds to respond to this request | ||
405 | and has not responded to any request from | ||
406 | other threads on the client within 10 seconds */ | ||
407 | lrt += time_to_wait; | ||
408 | if (time_after(jiffies, lrt)) { | ||
409 | /* No replies for time_to_wait. */ | ||
410 | cERROR(1,("server not responding")); | ||
411 | return -1; | ||
412 | } | ||
413 | } else { | ||
414 | return 0; | ||
415 | } | ||
416 | } | ||
417 | } | ||
418 | |||
419 | int | ||
420 | SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, | ||
421 | struct kvec *iov, int n_vec, int * pRespBufType /* ret */, | ||
422 | const int long_op) | ||
423 | { | ||
424 | int rc = 0; | ||
425 | unsigned int receive_len; | ||
426 | unsigned long timeout; | ||
427 | struct mid_q_entry *midQ; | ||
428 | struct smb_hdr *in_buf = iov[0].iov_base; | ||
429 | |||
430 | *pRespBufType = CIFS_NO_BUFFER; /* no response buf yet */ | ||
431 | |||
432 | if ((ses == NULL) || (ses->server == NULL)) { | ||
433 | cifs_small_buf_release(in_buf); | ||
434 | cERROR(1,("Null session")); | ||
435 | return -EIO; | ||
436 | } | ||
437 | |||
438 | if(ses->server->tcpStatus == CifsExiting) { | ||
439 | cifs_small_buf_release(in_buf); | ||
440 | return -ENOENT; | ||
441 | } | ||
442 | |||
443 | /* Ensure that we do not send more than 50 overlapping requests | ||
444 | to the same server. We may make this configurable later or | ||
445 | use ses->maxReq */ | ||
446 | |||
447 | rc = wait_for_free_request(ses, long_op); | ||
448 | if (rc) { | ||
449 | cifs_small_buf_release(in_buf); | ||
450 | return rc; | ||
451 | } | ||
452 | |||
453 | /* make sure that we sign in the same order that we send on this socket | ||
454 | and avoid races inside tcp sendmsg code that could cause corruption | ||
455 | of smb data */ | ||
456 | |||
457 | down(&ses->server->tcpSem); | ||
458 | |||
459 | rc = allocate_mid(ses, in_buf, &midQ); | ||
460 | if (rc) { | ||
387 | up(&ses->server->tcpSem); | 461 | up(&ses->server->tcpSem); |
388 | cifs_small_buf_release(in_buf); | 462 | cifs_small_buf_release(in_buf); |
389 | /* If not lock req, update # of requests on wire to server */ | 463 | /* Update # of requests on wire to server */ |
390 | if(long_op < 3) { | 464 | atomic_dec(&ses->server->inFlight); |
391 | atomic_dec(&ses->server->inFlight); | 465 | wake_up(&ses->server->request_q); |
392 | wake_up(&ses->server->request_q); | 466 | return rc; |
393 | } | ||
394 | return -ENOMEM; | ||
395 | } | 467 | } |
396 | 468 | ||
397 | rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number); | 469 | rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number); |
@@ -406,32 +478,23 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, | |||
406 | atomic_dec(&ses->server->inSend); | 478 | atomic_dec(&ses->server->inSend); |
407 | midQ->when_sent = jiffies; | 479 | midQ->when_sent = jiffies; |
408 | #endif | 480 | #endif |
409 | if(rc < 0) { | 481 | |
410 | DeleteMidQEntry(midQ); | 482 | up(&ses->server->tcpSem); |
411 | up(&ses->server->tcpSem); | 483 | cifs_small_buf_release(in_buf); |
412 | cifs_small_buf_release(in_buf); | 484 | |
413 | /* If not lock req, update # of requests on wire to server */ | 485 | if(rc < 0) |
414 | if(long_op < 3) { | 486 | goto out; |
415 | atomic_dec(&ses->server->inFlight); | ||
416 | wake_up(&ses->server->request_q); | ||
417 | } | ||
418 | return rc; | ||
419 | } else { | ||
420 | up(&ses->server->tcpSem); | ||
421 | cifs_small_buf_release(in_buf); | ||
422 | } | ||
423 | 487 | ||
424 | if (long_op == -1) | 488 | if (long_op == -1) |
425 | goto cifs_no_response_exit2; | 489 | goto out; |
426 | else if (long_op == 2) /* writes past end of file can take loong time */ | 490 | else if (long_op == 2) /* writes past end of file can take loong time */ |
427 | timeout = 180 * HZ; | 491 | timeout = 180 * HZ; |
428 | else if (long_op == 1) | 492 | else if (long_op == 1) |
429 | timeout = 45 * HZ; /* should be greater than | 493 | timeout = 45 * HZ; /* should be greater than |
430 | servers oplock break timeout (about 43 seconds) */ | 494 | servers oplock break timeout (about 43 seconds) */ |
431 | else if (long_op > 2) { | 495 | else |
432 | timeout = MAX_SCHEDULE_TIMEOUT; | ||
433 | } else | ||
434 | timeout = 15 * HZ; | 496 | timeout = 15 * HZ; |
497 | |||
435 | /* wait for 15 seconds or until woken up due to response arriving or | 498 | /* wait for 15 seconds or until woken up due to response arriving or |
436 | due to last connection to this server being unmounted */ | 499 | due to last connection to this server being unmounted */ |
437 | if (signal_pending(current)) { | 500 | if (signal_pending(current)) { |
@@ -441,19 +504,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, | |||
441 | } | 504 | } |
442 | 505 | ||
443 | /* No user interrupts in wait - wreaks havoc with performance */ | 506 | /* No user interrupts in wait - wreaks havoc with performance */ |
444 | if(timeout != MAX_SCHEDULE_TIMEOUT) { | 507 | wait_for_response(ses, midQ, timeout, 10 * HZ); |
445 | timeout += jiffies; | ||
446 | wait_event(ses->server->response_q, | ||
447 | (!(midQ->midState & MID_REQUEST_SUBMITTED)) || | ||
448 | time_after(jiffies, timeout) || | ||
449 | ((ses->server->tcpStatus != CifsGood) && | ||
450 | (ses->server->tcpStatus != CifsNew))); | ||
451 | } else { | ||
452 | wait_event(ses->server->response_q, | ||
453 | (!(midQ->midState & MID_REQUEST_SUBMITTED)) || | ||
454 | ((ses->server->tcpStatus != CifsGood) && | ||
455 | (ses->server->tcpStatus != CifsNew))); | ||
456 | } | ||
457 | 508 | ||
458 | spin_lock(&GlobalMid_Lock); | 509 | spin_lock(&GlobalMid_Lock); |
459 | if (midQ->resp_buf) { | 510 | if (midQ->resp_buf) { |
@@ -481,11 +532,9 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, | |||
481 | } | 532 | } |
482 | spin_unlock(&GlobalMid_Lock); | 533 | spin_unlock(&GlobalMid_Lock); |
483 | DeleteMidQEntry(midQ); | 534 | DeleteMidQEntry(midQ); |
484 | /* If not lock req, update # of requests on wire to server */ | 535 | /* Update # of requests on wire to server */ |
485 | if(long_op < 3) { | 536 | atomic_dec(&ses->server->inFlight); |
486 | atomic_dec(&ses->server->inFlight); | 537 | wake_up(&ses->server->request_q); |
487 | wake_up(&ses->server->request_q); | ||
488 | } | ||
489 | return rc; | 538 | return rc; |
490 | } | 539 | } |
491 | 540 | ||
@@ -536,24 +585,12 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, | |||
536 | cFYI(1,("Bad MID state?")); | 585 | cFYI(1,("Bad MID state?")); |
537 | } | 586 | } |
538 | } | 587 | } |
539 | cifs_no_response_exit2: | ||
540 | DeleteMidQEntry(midQ); | ||
541 | |||
542 | if(long_op < 3) { | ||
543 | atomic_dec(&ses->server->inFlight); | ||
544 | wake_up(&ses->server->request_q); | ||
545 | } | ||
546 | 588 | ||
547 | return rc; | 589 | out: |
548 | 590 | ||
549 | out_unlock2: | 591 | DeleteMidQEntry(midQ); |
550 | up(&ses->server->tcpSem); | 592 | atomic_dec(&ses->server->inFlight); |
551 | cifs_small_buf_release(in_buf); | 593 | wake_up(&ses->server->request_q); |
552 | /* If not lock req, update # of requests on wire to server */ | ||
553 | if(long_op < 3) { | ||
554 | atomic_dec(&ses->server->inFlight); | ||
555 | wake_up(&ses->server->request_q); | ||
556 | } | ||
557 | 594 | ||
558 | return rc; | 595 | return rc; |
559 | } | 596 | } |
@@ -583,85 +620,34 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, | |||
583 | /* Ensure that we do not send more than 50 overlapping requests | 620 | /* Ensure that we do not send more than 50 overlapping requests |
584 | to the same server. We may make this configurable later or | 621 | to the same server. We may make this configurable later or |
585 | use ses->maxReq */ | 622 | use ses->maxReq */ |
586 | if(long_op == -1) { | ||
587 | /* oplock breaks must not be held up */ | ||
588 | atomic_inc(&ses->server->inFlight); | ||
589 | } else { | ||
590 | spin_lock(&GlobalMid_Lock); | ||
591 | while(1) { | ||
592 | if(atomic_read(&ses->server->inFlight) >= | ||
593 | cifs_max_pending){ | ||
594 | spin_unlock(&GlobalMid_Lock); | ||
595 | #ifdef CONFIG_CIFS_STATS2 | ||
596 | atomic_inc(&ses->server->num_waiters); | ||
597 | #endif | ||
598 | wait_event(ses->server->request_q, | ||
599 | atomic_read(&ses->server->inFlight) | ||
600 | < cifs_max_pending); | ||
601 | #ifdef CONFIG_CIFS_STATS2 | ||
602 | atomic_dec(&ses->server->num_waiters); | ||
603 | #endif | ||
604 | spin_lock(&GlobalMid_Lock); | ||
605 | } else { | ||
606 | if(ses->server->tcpStatus == CifsExiting) { | ||
607 | spin_unlock(&GlobalMid_Lock); | ||
608 | return -ENOENT; | ||
609 | } | ||
610 | 623 | ||
611 | /* can not count locking commands against total since | 624 | rc = wait_for_free_request(ses, long_op); |
612 | they are allowed to block on server */ | 625 | if (rc) |
613 | 626 | return rc; | |
614 | if(long_op < 3) { | 627 | |
615 | /* update # of requests on the wire to server */ | ||
616 | atomic_inc(&ses->server->inFlight); | ||
617 | } | ||
618 | spin_unlock(&GlobalMid_Lock); | ||
619 | break; | ||
620 | } | ||
621 | } | ||
622 | } | ||
623 | /* make sure that we sign in the same order that we send on this socket | 628 | /* make sure that we sign in the same order that we send on this socket |
624 | and avoid races inside tcp sendmsg code that could cause corruption | 629 | and avoid races inside tcp sendmsg code that could cause corruption |
625 | of smb data */ | 630 | of smb data */ |
626 | 631 | ||
627 | down(&ses->server->tcpSem); | 632 | down(&ses->server->tcpSem); |
628 | 633 | ||
629 | if (ses->server->tcpStatus == CifsExiting) { | 634 | rc = allocate_mid(ses, in_buf, &midQ); |
630 | rc = -ENOENT; | 635 | if (rc) { |
631 | goto out_unlock; | ||
632 | } else if (ses->server->tcpStatus == CifsNeedReconnect) { | ||
633 | cFYI(1,("tcp session dead - return to caller to retry")); | ||
634 | rc = -EAGAIN; | ||
635 | goto out_unlock; | ||
636 | } else if (ses->status != CifsGood) { | ||
637 | /* check if SMB session is bad because we are setting it up */ | ||
638 | if((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && | ||
639 | (in_buf->Command != SMB_COM_NEGOTIATE)) { | ||
640 | rc = -EAGAIN; | ||
641 | goto out_unlock; | ||
642 | } /* else ok - we are setting up session */ | ||
643 | } | ||
644 | midQ = AllocMidQEntry(in_buf, ses); | ||
645 | if (midQ == NULL) { | ||
646 | up(&ses->server->tcpSem); | 636 | up(&ses->server->tcpSem); |
647 | /* If not lock req, update # of requests on wire to server */ | 637 | /* Update # of requests on wire to server */ |
648 | if(long_op < 3) { | 638 | atomic_dec(&ses->server->inFlight); |
649 | atomic_dec(&ses->server->inFlight); | 639 | wake_up(&ses->server->request_q); |
650 | wake_up(&ses->server->request_q); | 640 | return rc; |
651 | } | ||
652 | return -ENOMEM; | ||
653 | } | 641 | } |
654 | 642 | ||
655 | if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { | 643 | if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { |
656 | up(&ses->server->tcpSem); | ||
657 | cERROR(1, ("Illegal length, greater than maximum frame, %d", | 644 | cERROR(1, ("Illegal length, greater than maximum frame, %d", |
658 | in_buf->smb_buf_length)); | 645 | in_buf->smb_buf_length)); |
659 | DeleteMidQEntry(midQ); | 646 | DeleteMidQEntry(midQ); |
660 | /* If not lock req, update # of requests on wire to server */ | 647 | up(&ses->server->tcpSem); |
661 | if(long_op < 3) { | 648 | /* Update # of requests on wire to server */ |
662 | atomic_dec(&ses->server->inFlight); | 649 | atomic_dec(&ses->server->inFlight); |
663 | wake_up(&ses->server->request_q); | 650 | wake_up(&ses->server->request_q); |
664 | } | ||
665 | return -EIO; | 651 | return -EIO; |
666 | } | 652 | } |
667 | 653 | ||
@@ -677,27 +663,19 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, | |||
677 | atomic_dec(&ses->server->inSend); | 663 | atomic_dec(&ses->server->inSend); |
678 | midQ->when_sent = jiffies; | 664 | midQ->when_sent = jiffies; |
679 | #endif | 665 | #endif |
680 | if(rc < 0) { | 666 | up(&ses->server->tcpSem); |
681 | DeleteMidQEntry(midQ); | 667 | |
682 | up(&ses->server->tcpSem); | 668 | if(rc < 0) |
683 | /* If not lock req, update # of requests on wire to server */ | 669 | goto out; |
684 | if(long_op < 3) { | 670 | |
685 | atomic_dec(&ses->server->inFlight); | ||
686 | wake_up(&ses->server->request_q); | ||
687 | } | ||
688 | return rc; | ||
689 | } else | ||
690 | up(&ses->server->tcpSem); | ||
691 | if (long_op == -1) | 671 | if (long_op == -1) |
692 | goto cifs_no_response_exit; | 672 | goto out; |
693 | else if (long_op == 2) /* writes past end of file can take loong time */ | 673 | else if (long_op == 2) /* writes past end of file can take loong time */ |
694 | timeout = 180 * HZ; | 674 | timeout = 180 * HZ; |
695 | else if (long_op == 1) | 675 | else if (long_op == 1) |
696 | timeout = 45 * HZ; /* should be greater than | 676 | timeout = 45 * HZ; /* should be greater than |
697 | servers oplock break timeout (about 43 seconds) */ | 677 | servers oplock break timeout (about 43 seconds) */ |
698 | else if (long_op > 2) { | 678 | else |
699 | timeout = MAX_SCHEDULE_TIMEOUT; | ||
700 | } else | ||
701 | timeout = 15 * HZ; | 679 | timeout = 15 * HZ; |
702 | /* wait for 15 seconds or until woken up due to response arriving or | 680 | /* wait for 15 seconds or until woken up due to response arriving or |
703 | due to last connection to this server being unmounted */ | 681 | due to last connection to this server being unmounted */ |
@@ -708,19 +686,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, | |||
708 | } | 686 | } |
709 | 687 | ||
710 | /* No user interrupts in wait - wreaks havoc with performance */ | 688 | /* No user interrupts in wait - wreaks havoc with performance */ |
711 | if(timeout != MAX_SCHEDULE_TIMEOUT) { | 689 | wait_for_response(ses, midQ, timeout, 10 * HZ); |
712 | timeout += jiffies; | ||
713 | wait_event(ses->server->response_q, | ||
714 | (!(midQ->midState & MID_REQUEST_SUBMITTED)) || | ||
715 | time_after(jiffies, timeout) || | ||
716 | ((ses->server->tcpStatus != CifsGood) && | ||
717 | (ses->server->tcpStatus != CifsNew))); | ||
718 | } else { | ||
719 | wait_event(ses->server->response_q, | ||
720 | (!(midQ->midState & MID_REQUEST_SUBMITTED)) || | ||
721 | ((ses->server->tcpStatus != CifsGood) && | ||
722 | (ses->server->tcpStatus != CifsNew))); | ||
723 | } | ||
724 | 690 | ||
725 | spin_lock(&GlobalMid_Lock); | 691 | spin_lock(&GlobalMid_Lock); |
726 | if (midQ->resp_buf) { | 692 | if (midQ->resp_buf) { |
@@ -748,11 +714,9 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, | |||
748 | } | 714 | } |
749 | spin_unlock(&GlobalMid_Lock); | 715 | spin_unlock(&GlobalMid_Lock); |
750 | DeleteMidQEntry(midQ); | 716 | DeleteMidQEntry(midQ); |
751 | /* If not lock req, update # of requests on wire to server */ | 717 | /* Update # of requests on wire to server */ |
752 | if(long_op < 3) { | 718 | atomic_dec(&ses->server->inFlight); |
753 | atomic_dec(&ses->server->inFlight); | 719 | wake_up(&ses->server->request_q); |
754 | wake_up(&ses->server->request_q); | ||
755 | } | ||
756 | return rc; | 720 | return rc; |
757 | } | 721 | } |
758 | 722 | ||
@@ -799,23 +763,253 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, | |||
799 | cERROR(1,("Bad MID state?")); | 763 | cERROR(1,("Bad MID state?")); |
800 | } | 764 | } |
801 | } | 765 | } |
802 | cifs_no_response_exit: | 766 | |
767 | out: | ||
768 | |||
803 | DeleteMidQEntry(midQ); | 769 | DeleteMidQEntry(midQ); |
770 | atomic_dec(&ses->server->inFlight); | ||
771 | wake_up(&ses->server->request_q); | ||
804 | 772 | ||
805 | if(long_op < 3) { | 773 | return rc; |
806 | atomic_dec(&ses->server->inFlight); | 774 | } |
807 | wake_up(&ses->server->request_q); | 775 | |
808 | } | 776 | /* Send an NT_CANCEL SMB to cause the POSIX blocking lock to return. */ |
777 | |||
778 | static int | ||
779 | send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf, | ||
780 | struct mid_q_entry *midQ) | ||
781 | { | ||
782 | int rc = 0; | ||
783 | struct cifsSesInfo *ses = tcon->ses; | ||
784 | __u16 mid = in_buf->Mid; | ||
809 | 785 | ||
786 | header_assemble(in_buf, SMB_COM_NT_CANCEL, tcon, 0); | ||
787 | in_buf->Mid = mid; | ||
788 | down(&ses->server->tcpSem); | ||
789 | rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); | ||
790 | if (rc) { | ||
791 | up(&ses->server->tcpSem); | ||
792 | return rc; | ||
793 | } | ||
794 | rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, | ||
795 | (struct sockaddr *) &(ses->server->addr.sockAddr)); | ||
796 | up(&ses->server->tcpSem); | ||
810 | return rc; | 797 | return rc; |
798 | } | ||
799 | |||
800 | /* We send a LOCKINGX_CANCEL_LOCK to cause the Windows | ||
801 | blocking lock to return. */ | ||
802 | |||
803 | static int | ||
804 | send_lock_cancel(const unsigned int xid, struct cifsTconInfo *tcon, | ||
805 | struct smb_hdr *in_buf, | ||
806 | struct smb_hdr *out_buf) | ||
807 | { | ||
808 | int bytes_returned; | ||
809 | struct cifsSesInfo *ses = tcon->ses; | ||
810 | LOCK_REQ *pSMB = (LOCK_REQ *)in_buf; | ||
811 | |||
812 | /* We just modify the current in_buf to change | ||
813 | the type of lock from LOCKING_ANDX_SHARED_LOCK | ||
814 | or LOCKING_ANDX_EXCLUSIVE_LOCK to | ||
815 | LOCKING_ANDX_CANCEL_LOCK. */ | ||
816 | |||
817 | pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES; | ||
818 | pSMB->Timeout = 0; | ||
819 | pSMB->hdr.Mid = GetNextMid(ses->server); | ||
820 | |||
821 | return SendReceive(xid, ses, in_buf, out_buf, | ||
822 | &bytes_returned, 0); | ||
823 | } | ||
811 | 824 | ||
812 | out_unlock: | 825 | int |
826 | SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, | ||
827 | struct smb_hdr *in_buf, struct smb_hdr *out_buf, | ||
828 | int *pbytes_returned) | ||
829 | { | ||
830 | int rc = 0; | ||
831 | int rstart = 0; | ||
832 | unsigned int receive_len; | ||
833 | struct mid_q_entry *midQ; | ||
834 | struct cifsSesInfo *ses; | ||
835 | |||
836 | if (tcon == NULL || tcon->ses == NULL) { | ||
837 | cERROR(1,("Null smb session")); | ||
838 | return -EIO; | ||
839 | } | ||
840 | ses = tcon->ses; | ||
841 | |||
842 | if(ses->server == NULL) { | ||
843 | cERROR(1,("Null tcp session")); | ||
844 | return -EIO; | ||
845 | } | ||
846 | |||
847 | if(ses->server->tcpStatus == CifsExiting) | ||
848 | return -ENOENT; | ||
849 | |||
850 | /* Ensure that we do not send more than 50 overlapping requests | ||
851 | to the same server. We may make this configurable later or | ||
852 | use ses->maxReq */ | ||
853 | |||
854 | rc = wait_for_free_request(ses, 3); | ||
855 | if (rc) | ||
856 | return rc; | ||
857 | |||
858 | /* make sure that we sign in the same order that we send on this socket | ||
859 | and avoid races inside tcp sendmsg code that could cause corruption | ||
860 | of smb data */ | ||
861 | |||
862 | down(&ses->server->tcpSem); | ||
863 | |||
864 | rc = allocate_mid(ses, in_buf, &midQ); | ||
865 | if (rc) { | ||
866 | up(&ses->server->tcpSem); | ||
867 | return rc; | ||
868 | } | ||
869 | |||
870 | if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { | ||
871 | up(&ses->server->tcpSem); | ||
872 | cERROR(1, ("Illegal length, greater than maximum frame, %d", | ||
873 | in_buf->smb_buf_length)); | ||
874 | DeleteMidQEntry(midQ); | ||
875 | return -EIO; | ||
876 | } | ||
877 | |||
878 | rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); | ||
879 | |||
880 | midQ->midState = MID_REQUEST_SUBMITTED; | ||
881 | #ifdef CONFIG_CIFS_STATS2 | ||
882 | atomic_inc(&ses->server->inSend); | ||
883 | #endif | ||
884 | rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, | ||
885 | (struct sockaddr *) &(ses->server->addr.sockAddr)); | ||
886 | #ifdef CONFIG_CIFS_STATS2 | ||
887 | atomic_dec(&ses->server->inSend); | ||
888 | midQ->when_sent = jiffies; | ||
889 | #endif | ||
813 | up(&ses->server->tcpSem); | 890 | up(&ses->server->tcpSem); |
814 | /* If not lock req, update # of requests on wire to server */ | 891 | |
815 | if(long_op < 3) { | 892 | if(rc < 0) { |
816 | atomic_dec(&ses->server->inFlight); | 893 | DeleteMidQEntry(midQ); |
817 | wake_up(&ses->server->request_q); | 894 | return rc; |
895 | } | ||
896 | |||
897 | /* Wait for a reply - allow signals to interrupt. */ | ||
898 | rc = wait_event_interruptible(ses->server->response_q, | ||
899 | (!(midQ->midState == MID_REQUEST_SUBMITTED)) || | ||
900 | ((ses->server->tcpStatus != CifsGood) && | ||
901 | (ses->server->tcpStatus != CifsNew))); | ||
902 | |||
903 | /* Were we interrupted by a signal ? */ | ||
904 | if ((rc == -ERESTARTSYS) && | ||
905 | (midQ->midState == MID_REQUEST_SUBMITTED) && | ||
906 | ((ses->server->tcpStatus == CifsGood) || | ||
907 | (ses->server->tcpStatus == CifsNew))) { | ||
908 | |||
909 | if (in_buf->Command == SMB_COM_TRANSACTION2) { | ||
910 | /* POSIX lock. We send a NT_CANCEL SMB to cause the | ||
911 | blocking lock to return. */ | ||
912 | |||
913 | rc = send_nt_cancel(tcon, in_buf, midQ); | ||
914 | if (rc) { | ||
915 | DeleteMidQEntry(midQ); | ||
916 | return rc; | ||
917 | } | ||
918 | } else { | ||
919 | /* Windows lock. We send a LOCKINGX_CANCEL_LOCK | ||
920 | to cause the blocking lock to return. */ | ||
921 | |||
922 | rc = send_lock_cancel(xid, tcon, in_buf, out_buf); | ||
923 | |||
924 | /* If we get -ENOLCK back the lock may have | ||
925 | already been removed. Don't exit in this case. */ | ||
926 | if (rc && rc != -ENOLCK) { | ||
927 | DeleteMidQEntry(midQ); | ||
928 | return rc; | ||
929 | } | ||
930 | } | ||
931 | |||
932 | /* Wait 5 seconds for the response. */ | ||
933 | if (wait_for_response(ses, midQ, 5 * HZ, 5 * HZ)==0) { | ||
934 | /* We got the response - restart system call. */ | ||
935 | rstart = 1; | ||
936 | } | ||
937 | } | ||
938 | |||
939 | spin_lock(&GlobalMid_Lock); | ||
940 | if (midQ->resp_buf) { | ||
941 | spin_unlock(&GlobalMid_Lock); | ||
942 | receive_len = midQ->resp_buf->smb_buf_length; | ||
943 | } else { | ||
944 | cERROR(1,("No response for cmd %d mid %d", | ||
945 | midQ->command, midQ->mid)); | ||
946 | if(midQ->midState == MID_REQUEST_SUBMITTED) { | ||
947 | if(ses->server->tcpStatus == CifsExiting) | ||
948 | rc = -EHOSTDOWN; | ||
949 | else { | ||
950 | ses->server->tcpStatus = CifsNeedReconnect; | ||
951 | midQ->midState = MID_RETRY_NEEDED; | ||
952 | } | ||
953 | } | ||
954 | |||
955 | if (rc != -EHOSTDOWN) { | ||
956 | if(midQ->midState == MID_RETRY_NEEDED) { | ||
957 | rc = -EAGAIN; | ||
958 | cFYI(1,("marking request for retry")); | ||
959 | } else { | ||
960 | rc = -EIO; | ||
961 | } | ||
962 | } | ||
963 | spin_unlock(&GlobalMid_Lock); | ||
964 | DeleteMidQEntry(midQ); | ||
965 | return rc; | ||
818 | } | 966 | } |
967 | |||
968 | if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { | ||
969 | cERROR(1, ("Frame too large received. Length: %d Xid: %d", | ||
970 | receive_len, xid)); | ||
971 | rc = -EIO; | ||
972 | } else { /* rcvd frame is ok */ | ||
973 | |||
974 | if (midQ->resp_buf && out_buf | ||
975 | && (midQ->midState == MID_RESPONSE_RECEIVED)) { | ||
976 | out_buf->smb_buf_length = receive_len; | ||
977 | memcpy((char *)out_buf + 4, | ||
978 | (char *)midQ->resp_buf + 4, | ||
979 | receive_len); | ||
980 | |||
981 | dump_smb(out_buf, 92); | ||
982 | /* convert the length into a more usable form */ | ||
983 | if((receive_len > 24) && | ||
984 | (ses->server->secMode & (SECMODE_SIGN_REQUIRED | | ||
985 | SECMODE_SIGN_ENABLED))) { | ||
986 | rc = cifs_verify_signature(out_buf, | ||
987 | ses->server->mac_signing_key, | ||
988 | midQ->sequence_number+1); | ||
989 | if(rc) { | ||
990 | cERROR(1,("Unexpected SMB signature")); | ||
991 | /* BB FIXME add code to kill session */ | ||
992 | } | ||
993 | } | ||
994 | |||
995 | *pbytes_returned = out_buf->smb_buf_length; | ||
996 | |||
997 | /* BB special case reconnect tid and uid here? */ | ||
998 | rc = map_smb_to_linux_error(out_buf); | ||
819 | 999 | ||
1000 | /* convert ByteCount if necessary */ | ||
1001 | if (receive_len >= | ||
1002 | sizeof (struct smb_hdr) - | ||
1003 | 4 /* do not count RFC1001 header */ + | ||
1004 | (2 * out_buf->WordCount) + 2 /* bcc */ ) | ||
1005 | BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); | ||
1006 | } else { | ||
1007 | rc = -EIO; | ||
1008 | cERROR(1,("Bad MID state?")); | ||
1009 | } | ||
1010 | } | ||
1011 | DeleteMidQEntry(midQ); | ||
1012 | if (rstart && rc == -EACCES) | ||
1013 | return -ERESTARTSYS; | ||
820 | return rc; | 1014 | return rc; |
821 | } | 1015 | } |
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 7754d641775e..067648b7179b 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c | |||
@@ -330,11 +330,15 @@ ssize_t cifs_listxattr(struct dentry * direntry, char * data, size_t buf_size) | |||
330 | sb = direntry->d_inode->i_sb; | 330 | sb = direntry->d_inode->i_sb; |
331 | if(sb == NULL) | 331 | if(sb == NULL) |
332 | return -EIO; | 332 | return -EIO; |
333 | xid = GetXid(); | ||
334 | 333 | ||
335 | cifs_sb = CIFS_SB(sb); | 334 | cifs_sb = CIFS_SB(sb); |
336 | pTcon = cifs_sb->tcon; | 335 | pTcon = cifs_sb->tcon; |
337 | 336 | ||
337 | if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) | ||
338 | return -EOPNOTSUPP; | ||
339 | |||
340 | xid = GetXid(); | ||
341 | |||
338 | full_path = build_path_from_dentry(direntry); | 342 | full_path = build_path_from_dentry(direntry); |
339 | if(full_path == NULL) { | 343 | if(full_path == NULL) { |
340 | FreeXid(xid); | 344 | FreeXid(xid); |
diff --git a/fs/coda/file.c b/fs/coda/file.c index cc66c681bd11..dbfbcfa5b3c0 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c | |||
@@ -136,10 +136,8 @@ int coda_open(struct inode *coda_inode, struct file *coda_file) | |||
136 | coda_vfs_stat.open++; | 136 | coda_vfs_stat.open++; |
137 | 137 | ||
138 | cfi = kmalloc(sizeof(struct coda_file_info), GFP_KERNEL); | 138 | cfi = kmalloc(sizeof(struct coda_file_info), GFP_KERNEL); |
139 | if (!cfi) { | 139 | if (!cfi) |
140 | unlock_kernel(); | ||
141 | return -ENOMEM; | 140 | return -ENOMEM; |
142 | } | ||
143 | 141 | ||
144 | lock_kernel(); | 142 | lock_kernel(); |
145 | 143 | ||
diff --git a/fs/dcache.c b/fs/dcache.c index c6e3535be192..1b4a3a34ec57 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -38,7 +38,7 @@ int sysctl_vfs_cache_pressure __read_mostly = 100; | |||
38 | EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); | 38 | EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); |
39 | 39 | ||
40 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); | 40 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); |
41 | static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; | 41 | static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); |
42 | 42 | ||
43 | EXPORT_SYMBOL(dcache_lock); | 43 | EXPORT_SYMBOL(dcache_lock); |
44 | 44 | ||
@@ -1339,10 +1339,10 @@ void d_move(struct dentry * dentry, struct dentry * target) | |||
1339 | */ | 1339 | */ |
1340 | if (target < dentry) { | 1340 | if (target < dentry) { |
1341 | spin_lock(&target->d_lock); | 1341 | spin_lock(&target->d_lock); |
1342 | spin_lock(&dentry->d_lock); | 1342 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); |
1343 | } else { | 1343 | } else { |
1344 | spin_lock(&dentry->d_lock); | 1344 | spin_lock(&dentry->d_lock); |
1345 | spin_lock(&target->d_lock); | 1345 | spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); |
1346 | } | 1346 | } |
1347 | 1347 | ||
1348 | /* Move the dentry to the target hash queue, if on different bucket */ | 1348 | /* Move the dentry to the target hash queue, if on different bucket */ |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 538fb0418fba..5981e17f46f0 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -220,7 +220,8 @@ static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes) | |||
220 | if (dio->end_io && dio->result) | 220 | if (dio->end_io && dio->result) |
221 | dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private); | 221 | dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private); |
222 | if (dio->lock_type == DIO_LOCKING) | 222 | if (dio->lock_type == DIO_LOCKING) |
223 | up_read(&dio->inode->i_alloc_sem); | 223 | /* lockdep: non-owner release */ |
224 | up_read_non_owner(&dio->inode->i_alloc_sem); | ||
224 | } | 225 | } |
225 | 226 | ||
226 | /* | 227 | /* |
@@ -1261,7 +1262,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1261 | } | 1262 | } |
1262 | 1263 | ||
1263 | if (dio_lock_type == DIO_LOCKING) | 1264 | if (dio_lock_type == DIO_LOCKING) |
1264 | down_read(&inode->i_alloc_sem); | 1265 | /* lockdep: not the owner will release it */ |
1266 | down_read_non_owner(&inode->i_alloc_sem); | ||
1265 | } | 1267 | } |
1266 | 1268 | ||
1267 | /* | 1269 | /* |
diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c index e249cf733a6b..1d30d2ff440f 100644 --- a/fs/efs/symlink.c +++ b/fs/efs/symlink.c | |||
@@ -22,7 +22,7 @@ static int efs_symlink_readpage(struct file *file, struct page *page) | |||
22 | 22 | ||
23 | err = -ENAMETOOLONG; | 23 | err = -ENAMETOOLONG; |
24 | if (size > 2 * EFS_BLOCKSIZE) | 24 | if (size > 2 * EFS_BLOCKSIZE) |
25 | goto fail; | 25 | goto fail_notlocked; |
26 | 26 | ||
27 | lock_kernel(); | 27 | lock_kernel(); |
28 | /* read first 512 bytes of link target */ | 28 | /* read first 512 bytes of link target */ |
@@ -47,6 +47,7 @@ static int efs_symlink_readpage(struct file *file, struct page *page) | |||
47 | return 0; | 47 | return 0; |
48 | fail: | 48 | fail: |
49 | unlock_kernel(); | 49 | unlock_kernel(); |
50 | fail_notlocked: | ||
50 | SetPageError(page); | 51 | SetPageError(page); |
51 | kunmap(page); | 52 | kunmap(page); |
52 | unlock_page(page); | 53 | unlock_page(page); |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9c677bbd0b08..3a3567433b92 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -120,7 +120,7 @@ struct epoll_filefd { | |||
120 | */ | 120 | */ |
121 | struct wake_task_node { | 121 | struct wake_task_node { |
122 | struct list_head llink; | 122 | struct list_head llink; |
123 | task_t *task; | 123 | struct task_struct *task; |
124 | wait_queue_head_t *wq; | 124 | wait_queue_head_t *wq; |
125 | }; | 125 | }; |
126 | 126 | ||
@@ -413,7 +413,7 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq) | |||
413 | { | 413 | { |
414 | int wake_nests = 0; | 414 | int wake_nests = 0; |
415 | unsigned long flags; | 415 | unsigned long flags; |
416 | task_t *this_task = current; | 416 | struct task_struct *this_task = current; |
417 | struct list_head *lsthead = &psw->wake_task_list, *lnk; | 417 | struct list_head *lsthead = &psw->wake_task_list, *lnk; |
418 | struct wake_task_node *tncur; | 418 | struct wake_task_node *tncur; |
419 | struct wake_task_node tnode; | 419 | struct wake_task_node tnode; |
@@ -1168,7 +1168,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi) | |||
1168 | eexit_1: | 1168 | eexit_1: |
1169 | 1169 | ||
1170 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n", | 1170 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n", |
1171 | current, ep, epi->file, error)); | 1171 | current, ep, epi->ffd.file, error)); |
1172 | 1172 | ||
1173 | return error; | 1173 | return error; |
1174 | } | 1174 | } |
@@ -1236,7 +1236,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k | |||
1236 | struct eventpoll *ep = epi->ep; | 1236 | struct eventpoll *ep = epi->ep; |
1237 | 1237 | ||
1238 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", | 1238 | DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", |
1239 | current, epi->file, epi, ep)); | 1239 | current, epi->ffd.file, epi, ep)); |
1240 | 1240 | ||
1241 | write_lock_irqsave(&ep->lock, flags); | 1241 | write_lock_irqsave(&ep->lock, flags); |
1242 | 1242 | ||
@@ -486,8 +486,6 @@ struct file *open_exec(const char *name) | |||
486 | if (!(nd.mnt->mnt_flags & MNT_NOEXEC) && | 486 | if (!(nd.mnt->mnt_flags & MNT_NOEXEC) && |
487 | S_ISREG(inode->i_mode)) { | 487 | S_ISREG(inode->i_mode)) { |
488 | int err = vfs_permission(&nd, MAY_EXEC); | 488 | int err = vfs_permission(&nd, MAY_EXEC); |
489 | if (!err && !(inode->i_mode & 0111)) | ||
490 | err = -EACCES; | ||
491 | file = ERR_PTR(err); | 489 | file = ERR_PTR(err); |
492 | if (!err) { | 490 | if (!err) { |
493 | file = nameidata_to_filp(&nd, O_RDONLY); | 491 | file = nameidata_to_filp(&nd, O_RDONLY); |
@@ -753,7 +751,7 @@ no_thread_group: | |||
753 | 751 | ||
754 | write_lock_irq(&tasklist_lock); | 752 | write_lock_irq(&tasklist_lock); |
755 | spin_lock(&oldsighand->siglock); | 753 | spin_lock(&oldsighand->siglock); |
756 | spin_lock(&newsighand->siglock); | 754 | spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING); |
757 | 755 | ||
758 | rcu_assign_pointer(current->sighand, newsighand); | 756 | rcu_assign_pointer(current->sighand, newsighand); |
759 | recalc_sigpending(); | 757 | recalc_sigpending(); |
@@ -922,12 +920,6 @@ int prepare_binprm(struct linux_binprm *bprm) | |||
922 | int retval; | 920 | int retval; |
923 | 921 | ||
924 | mode = inode->i_mode; | 922 | mode = inode->i_mode; |
925 | /* | ||
926 | * Check execute perms again - if the caller has CAP_DAC_OVERRIDE, | ||
927 | * generic_permission lets a non-executable through | ||
928 | */ | ||
929 | if (!(mode & 0111)) /* with at least _one_ execute bit set */ | ||
930 | return -EACCES; | ||
931 | if (bprm->file->f_op == NULL) | 923 | if (bprm->file->f_op == NULL) |
932 | return -EACCES; | 924 | return -EACCES; |
933 | 925 | ||
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 9f43879d6d68..ca5bfb6914d2 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -251,6 +251,44 @@ static struct super_operations ext2_sops = { | |||
251 | #endif | 251 | #endif |
252 | }; | 252 | }; |
253 | 253 | ||
254 | static struct dentry *ext2_get_dentry(struct super_block *sb, void *vobjp) | ||
255 | { | ||
256 | __u32 *objp = vobjp; | ||
257 | unsigned long ino = objp[0]; | ||
258 | __u32 generation = objp[1]; | ||
259 | struct inode *inode; | ||
260 | struct dentry *result; | ||
261 | |||
262 | if (ino < EXT2_FIRST_INO(sb) && ino != EXT2_ROOT_INO) | ||
263 | return ERR_PTR(-ESTALE); | ||
264 | if (ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count)) | ||
265 | return ERR_PTR(-ESTALE); | ||
266 | |||
267 | /* iget isn't really right if the inode is currently unallocated!! | ||
268 | * ext2_read_inode currently does appropriate checks, but | ||
269 | * it might be "neater" to call ext2_get_inode first and check | ||
270 | * if the inode is valid..... | ||
271 | */ | ||
272 | inode = iget(sb, ino); | ||
273 | if (inode == NULL) | ||
274 | return ERR_PTR(-ENOMEM); | ||
275 | if (is_bad_inode(inode) || | ||
276 | (generation && inode->i_generation != generation)) { | ||
277 | /* we didn't find the right inode.. */ | ||
278 | iput(inode); | ||
279 | return ERR_PTR(-ESTALE); | ||
280 | } | ||
281 | /* now to find a dentry. | ||
282 | * If possible, get a well-connected one | ||
283 | */ | ||
284 | result = d_alloc_anon(inode); | ||
285 | if (!result) { | ||
286 | iput(inode); | ||
287 | return ERR_PTR(-ENOMEM); | ||
288 | } | ||
289 | return result; | ||
290 | } | ||
291 | |||
254 | /* Yes, most of these are left as NULL!! | 292 | /* Yes, most of these are left as NULL!! |
255 | * A NULL value implies the default, which works with ext2-like file | 293 | * A NULL value implies the default, which works with ext2-like file |
256 | * systems, but can be improved upon. | 294 | * systems, but can be improved upon. |
@@ -258,6 +296,7 @@ static struct super_operations ext2_sops = { | |||
258 | */ | 296 | */ |
259 | static struct export_operations ext2_export_ops = { | 297 | static struct export_operations ext2_export_ops = { |
260 | .get_parent = ext2_get_parent, | 298 | .get_parent = ext2_get_parent, |
299 | .get_dentry = ext2_get_dentry, | ||
261 | }; | 300 | }; |
262 | 301 | ||
263 | static unsigned long get_sb_block(void **data) | 302 | static unsigned long get_sb_block(void **data) |
@@ -775,7 +814,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) | |||
775 | if (EXT2_INODE_SIZE(sb) == 0) | 814 | if (EXT2_INODE_SIZE(sb) == 0) |
776 | goto cantfind_ext2; | 815 | goto cantfind_ext2; |
777 | sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb); | 816 | sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb); |
778 | if (sbi->s_inodes_per_block == 0) | 817 | if (sbi->s_inodes_per_block == 0 || sbi->s_inodes_per_group == 0) |
779 | goto cantfind_ext2; | 818 | goto cantfind_ext2; |
780 | sbi->s_itb_per_group = sbi->s_inodes_per_group / | 819 | sbi->s_itb_per_group = sbi->s_inodes_per_group / |
781 | sbi->s_inodes_per_block; | 820 | sbi->s_inodes_per_block; |
@@ -1157,7 +1196,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, | |||
1157 | struct buffer_head tmp_bh; | 1196 | struct buffer_head tmp_bh; |
1158 | struct buffer_head *bh; | 1197 | struct buffer_head *bh; |
1159 | 1198 | ||
1160 | mutex_lock(&inode->i_mutex); | 1199 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); |
1161 | while (towrite > 0) { | 1200 | while (towrite > 0) { |
1162 | tocopy = sb->s_blocksize - offset < towrite ? | 1201 | tocopy = sb->s_blocksize - offset < towrite ? |
1163 | sb->s_blocksize - offset : towrite; | 1202 | sb->s_blocksize - offset : towrite; |
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h index 92d50b53a933..0d1e6279cbfd 100644 --- a/fs/ext3/acl.h +++ b/fs/ext3/acl.h | |||
@@ -62,9 +62,6 @@ extern int ext3_permission (struct inode *, int, struct nameidata *); | |||
62 | extern int ext3_acl_chmod (struct inode *); | 62 | extern int ext3_acl_chmod (struct inode *); |
63 | extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); | 63 | extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); |
64 | 64 | ||
65 | extern int init_ext3_acl(void); | ||
66 | extern void exit_ext3_acl(void); | ||
67 | |||
68 | #else /* CONFIG_EXT3_FS_POSIX_ACL */ | 65 | #else /* CONFIG_EXT3_FS_POSIX_ACL */ |
69 | #include <linux/sched.h> | 66 | #include <linux/sched.h> |
70 | #define ext3_permission NULL | 67 | #define ext3_permission NULL |
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index a504a40d6d29..063d994bda0b 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c | |||
@@ -1269,12 +1269,12 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, | |||
1269 | goal = le32_to_cpu(es->s_first_data_block); | 1269 | goal = le32_to_cpu(es->s_first_data_block); |
1270 | group_no = (goal - le32_to_cpu(es->s_first_data_block)) / | 1270 | group_no = (goal - le32_to_cpu(es->s_first_data_block)) / |
1271 | EXT3_BLOCKS_PER_GROUP(sb); | 1271 | EXT3_BLOCKS_PER_GROUP(sb); |
1272 | goal_group = group_no; | ||
1273 | retry_alloc: | ||
1272 | gdp = ext3_get_group_desc(sb, group_no, &gdp_bh); | 1274 | gdp = ext3_get_group_desc(sb, group_no, &gdp_bh); |
1273 | if (!gdp) | 1275 | if (!gdp) |
1274 | goto io_error; | 1276 | goto io_error; |
1275 | 1277 | ||
1276 | goal_group = group_no; | ||
1277 | retry: | ||
1278 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); | 1278 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); |
1279 | /* | 1279 | /* |
1280 | * if there is not enough free blocks to make a new resevation | 1280 | * if there is not enough free blocks to make a new resevation |
@@ -1349,7 +1349,7 @@ retry: | |||
1349 | if (my_rsv) { | 1349 | if (my_rsv) { |
1350 | my_rsv = NULL; | 1350 | my_rsv = NULL; |
1351 | group_no = goal_group; | 1351 | group_no = goal_group; |
1352 | goto retry; | 1352 | goto retry_alloc; |
1353 | } | 1353 | } |
1354 | /* No space left on the device */ | 1354 | /* No space left on the device */ |
1355 | *errp = -ENOSPC; | 1355 | *errp = -ENOSPC; |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index f804d5e9d60c..84be02e93652 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -925,7 +925,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
925 | set_buffer_new(bh_result); | 925 | set_buffer_new(bh_result); |
926 | got_it: | 926 | got_it: |
927 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); | 927 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); |
928 | if (blocks_to_boundary == 0) | 928 | if (count > blocks_to_boundary) |
929 | set_buffer_boundary(bh_result); | 929 | set_buffer_boundary(bh_result); |
930 | err = count; | 930 | err = count; |
931 | /* Clean up and exit */ | 931 | /* Clean up and exit */ |
@@ -1009,11 +1009,14 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode, | |||
1009 | buffer_trace_init(&dummy.b_history); | 1009 | buffer_trace_init(&dummy.b_history); |
1010 | err = ext3_get_blocks_handle(handle, inode, block, 1, | 1010 | err = ext3_get_blocks_handle(handle, inode, block, 1, |
1011 | &dummy, create, 1); | 1011 | &dummy, create, 1); |
1012 | if (err == 1) { | 1012 | /* |
1013 | * ext3_get_blocks_handle() returns number of blocks | ||
1014 | * mapped. 0 in case of a HOLE. | ||
1015 | */ | ||
1016 | if (err > 0) { | ||
1017 | if (err > 1) | ||
1018 | WARN_ON(1); | ||
1013 | err = 0; | 1019 | err = 0; |
1014 | } else if (err >= 0) { | ||
1015 | WARN_ON(1); | ||
1016 | err = -EIO; | ||
1017 | } | 1020 | } |
1018 | *errp = err; | 1021 | *errp = err; |
1019 | if (!err && buffer_mapped(&dummy)) { | 1022 | if (!err && buffer_mapped(&dummy)) { |
@@ -1158,7 +1161,7 @@ retry: | |||
1158 | ret = PTR_ERR(handle); | 1161 | ret = PTR_ERR(handle); |
1159 | goto out; | 1162 | goto out; |
1160 | } | 1163 | } |
1161 | if (test_opt(inode->i_sb, NOBH)) | 1164 | if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) |
1162 | ret = nobh_prepare_write(page, from, to, ext3_get_block); | 1165 | ret = nobh_prepare_write(page, from, to, ext3_get_block); |
1163 | else | 1166 | else |
1164 | ret = block_prepare_write(page, from, to, ext3_get_block); | 1167 | ret = block_prepare_write(page, from, to, ext3_get_block); |
@@ -1244,7 +1247,7 @@ static int ext3_writeback_commit_write(struct file *file, struct page *page, | |||
1244 | if (new_i_size > EXT3_I(inode)->i_disksize) | 1247 | if (new_i_size > EXT3_I(inode)->i_disksize) |
1245 | EXT3_I(inode)->i_disksize = new_i_size; | 1248 | EXT3_I(inode)->i_disksize = new_i_size; |
1246 | 1249 | ||
1247 | if (test_opt(inode->i_sb, NOBH)) | 1250 | if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) |
1248 | ret = nobh_commit_write(file, page, from, to); | 1251 | ret = nobh_commit_write(file, page, from, to); |
1249 | else | 1252 | else |
1250 | ret = generic_commit_write(file, page, from, to); | 1253 | ret = generic_commit_write(file, page, from, to); |
@@ -1494,7 +1497,7 @@ static int ext3_writeback_writepage(struct page *page, | |||
1494 | goto out_fail; | 1497 | goto out_fail; |
1495 | } | 1498 | } |
1496 | 1499 | ||
1497 | if (test_opt(inode->i_sb, NOBH)) | 1500 | if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) |
1498 | ret = nobh_writepage(page, ext3_get_block, wbc); | 1501 | ret = nobh_writepage(page, ext3_get_block, wbc); |
1499 | else | 1502 | else |
1500 | ret = block_write_full_page(page, ext3_get_block, wbc); | 1503 | ret = block_write_full_page(page, ext3_get_block, wbc); |
@@ -2402,14 +2405,15 @@ static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, | |||
2402 | struct buffer_head *bh; | 2405 | struct buffer_head *bh; |
2403 | struct ext3_group_desc * gdp; | 2406 | struct ext3_group_desc * gdp; |
2404 | 2407 | ||
2405 | 2408 | if (!ext3_valid_inum(sb, ino)) { | |
2406 | if ((ino != EXT3_ROOT_INO && ino != EXT3_JOURNAL_INO && | 2409 | /* |
2407 | ino != EXT3_RESIZE_INO && ino < EXT3_FIRST_INO(sb)) || | 2410 | * This error is already checked for in namei.c unless we are |
2408 | ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) { | 2411 | * looking at an NFS filehandle, in which case no error |
2409 | ext3_error(sb, "ext3_get_inode_block", | 2412 | * report is needed |
2410 | "bad inode number: %lu", ino); | 2413 | */ |
2411 | return 0; | 2414 | return 0; |
2412 | } | 2415 | } |
2416 | |||
2413 | block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); | 2417 | block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); |
2414 | if (block_group >= EXT3_SB(sb)->s_groups_count) { | 2418 | if (block_group >= EXT3_SB(sb)->s_groups_count) { |
2415 | ext3_error(sb,"ext3_get_inode_block","group >= groups count"); | 2419 | ext3_error(sb,"ext3_get_inode_block","group >= groups count"); |
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index d9176dba3698..2aa7101b27cd 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
@@ -1000,7 +1000,12 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str | |||
1000 | if (bh) { | 1000 | if (bh) { |
1001 | unsigned long ino = le32_to_cpu(de->inode); | 1001 | unsigned long ino = le32_to_cpu(de->inode); |
1002 | brelse (bh); | 1002 | brelse (bh); |
1003 | inode = iget(dir->i_sb, ino); | 1003 | if (!ext3_valid_inum(dir->i_sb, ino)) { |
1004 | ext3_error(dir->i_sb, "ext3_lookup", | ||
1005 | "bad inode number: %lu", ino); | ||
1006 | inode = NULL; | ||
1007 | } else | ||
1008 | inode = iget(dir->i_sb, ino); | ||
1004 | 1009 | ||
1005 | if (!inode) | 1010 | if (!inode) |
1006 | return ERR_PTR(-EACCES); | 1011 | return ERR_PTR(-EACCES); |
@@ -1028,7 +1033,13 @@ struct dentry *ext3_get_parent(struct dentry *child) | |||
1028 | return ERR_PTR(-ENOENT); | 1033 | return ERR_PTR(-ENOENT); |
1029 | ino = le32_to_cpu(de->inode); | 1034 | ino = le32_to_cpu(de->inode); |
1030 | brelse(bh); | 1035 | brelse(bh); |
1031 | inode = iget(child->d_inode->i_sb, ino); | 1036 | |
1037 | if (!ext3_valid_inum(child->d_inode->i_sb, ino)) { | ||
1038 | ext3_error(child->d_inode->i_sb, "ext3_get_parent", | ||
1039 | "bad inode number: %lu", ino); | ||
1040 | inode = NULL; | ||
1041 | } else | ||
1042 | inode = iget(child->d_inode->i_sb, ino); | ||
1032 | 1043 | ||
1033 | if (!inode) | 1044 | if (!inode) |
1034 | return ERR_PTR(-EACCES); | 1045 | return ERR_PTR(-EACCES); |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index f2dd71336612..3559086eee5f 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -554,6 +554,47 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
554 | return 0; | 554 | return 0; |
555 | } | 555 | } |
556 | 556 | ||
557 | |||
558 | static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp) | ||
559 | { | ||
560 | __u32 *objp = vobjp; | ||
561 | unsigned long ino = objp[0]; | ||
562 | __u32 generation = objp[1]; | ||
563 | struct inode *inode; | ||
564 | struct dentry *result; | ||
565 | |||
566 | if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO) | ||
567 | return ERR_PTR(-ESTALE); | ||
568 | if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) | ||
569 | return ERR_PTR(-ESTALE); | ||
570 | |||
571 | /* iget isn't really right if the inode is currently unallocated!! | ||
572 | * | ||
573 | * ext3_read_inode will return a bad_inode if the inode had been | ||
574 | * deleted, so we should be safe. | ||
575 | * | ||
576 | * Currently we don't know the generation for parent directory, so | ||
577 | * a generation of 0 means "accept any" | ||
578 | */ | ||
579 | inode = iget(sb, ino); | ||
580 | if (inode == NULL) | ||
581 | return ERR_PTR(-ENOMEM); | ||
582 | if (is_bad_inode(inode) || | ||
583 | (generation && inode->i_generation != generation)) { | ||
584 | iput(inode); | ||
585 | return ERR_PTR(-ESTALE); | ||
586 | } | ||
587 | /* now to find a dentry. | ||
588 | * If possible, get a well-connected one | ||
589 | */ | ||
590 | result = d_alloc_anon(inode); | ||
591 | if (!result) { | ||
592 | iput(inode); | ||
593 | return ERR_PTR(-ENOMEM); | ||
594 | } | ||
595 | return result; | ||
596 | } | ||
597 | |||
557 | #ifdef CONFIG_QUOTA | 598 | #ifdef CONFIG_QUOTA |
558 | #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") | 599 | #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") |
559 | #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) | 600 | #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) |
@@ -622,6 +663,7 @@ static struct super_operations ext3_sops = { | |||
622 | 663 | ||
623 | static struct export_operations ext3_export_ops = { | 664 | static struct export_operations ext3_export_ops = { |
624 | .get_parent = ext3_get_parent, | 665 | .get_parent = ext3_get_parent, |
666 | .get_dentry = ext3_get_dentry, | ||
625 | }; | 667 | }; |
626 | 668 | ||
627 | enum { | 669 | enum { |
@@ -2614,7 +2656,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type, | |||
2614 | struct buffer_head *bh; | 2656 | struct buffer_head *bh; |
2615 | handle_t *handle = journal_current_handle(); | 2657 | handle_t *handle = journal_current_handle(); |
2616 | 2658 | ||
2617 | mutex_lock(&inode->i_mutex); | 2659 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); |
2618 | while (towrite > 0) { | 2660 | while (towrite > 0) { |
2619 | tocopy = sb->s_blocksize - offset < towrite ? | 2661 | tocopy = sb->s_blocksize - offset < towrite ? |
2620 | sb->s_blocksize - offset : towrite; | 2662 | sb->s_blocksize - offset : towrite; |
@@ -240,13 +240,9 @@ static struct fdtable *alloc_fdtable(int nr) | |||
240 | if (!fdt) | 240 | if (!fdt) |
241 | goto out; | 241 | goto out; |
242 | 242 | ||
243 | nfds = 8 * L1_CACHE_BYTES; | 243 | nfds = max_t(int, 8 * L1_CACHE_BYTES, roundup_pow_of_two(nr + 1)); |
244 | /* Expand to the max in easy steps */ | 244 | if (nfds > NR_OPEN) |
245 | while (nfds <= nr) { | 245 | nfds = NR_OPEN; |
246 | nfds = nfds * 2; | ||
247 | if (nfds > NR_OPEN) | ||
248 | nfds = NR_OPEN; | ||
249 | } | ||
250 | 246 | ||
251 | new_openset = alloc_fdset(nfds); | 247 | new_openset = alloc_fdset(nfds); |
252 | new_execset = alloc_fdset(nfds); | 248 | new_execset = alloc_fdset(nfds); |
@@ -277,11 +273,13 @@ static struct fdtable *alloc_fdtable(int nr) | |||
277 | } while (nfds <= nr); | 273 | } while (nfds <= nr); |
278 | new_fds = alloc_fd_array(nfds); | 274 | new_fds = alloc_fd_array(nfds); |
279 | if (!new_fds) | 275 | if (!new_fds) |
280 | goto out; | 276 | goto out2; |
281 | fdt->fd = new_fds; | 277 | fdt->fd = new_fds; |
282 | fdt->max_fds = nfds; | 278 | fdt->max_fds = nfds; |
283 | fdt->free_files = NULL; | 279 | fdt->free_files = NULL; |
284 | return fdt; | 280 | return fdt; |
281 | out2: | ||
282 | nfds = fdt->max_fdset; | ||
285 | out: | 283 | out: |
286 | if (new_openset) | 284 | if (new_openset) |
287 | free_fdset(new_openset, nfds); | 285 | free_fdset(new_openset, nfds); |
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index 29cce456c7ce..43886fa00a2a 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c | |||
@@ -246,6 +246,8 @@ vxfs_readdir(struct file *fp, void *retp, filldir_t filler) | |||
246 | u_long page, npages, block, pblocks, nblocks, offset; | 246 | u_long page, npages, block, pblocks, nblocks, offset; |
247 | loff_t pos; | 247 | loff_t pos; |
248 | 248 | ||
249 | lock_kernel(); | ||
250 | |||
249 | switch ((long)fp->f_pos) { | 251 | switch ((long)fp->f_pos) { |
250 | case 0: | 252 | case 0: |
251 | if (filler(retp, ".", 1, fp->f_pos, ip->i_ino, DT_DIR) < 0) | 253 | if (filler(retp, ".", 1, fp->f_pos, ip->i_ino, DT_DIR) < 0) |
diff --git a/fs/fuse/control.c b/fs/fuse/control.c index a3bce3a77253..46fe60b2da23 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c | |||
@@ -105,7 +105,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, | |||
105 | 105 | ||
106 | /* | 106 | /* |
107 | * Add a connection to the control filesystem (if it exists). Caller | 107 | * Add a connection to the control filesystem (if it exists). Caller |
108 | * must host fuse_mutex | 108 | * must hold fuse_mutex |
109 | */ | 109 | */ |
110 | int fuse_ctl_add_conn(struct fuse_conn *fc) | 110 | int fuse_ctl_add_conn(struct fuse_conn *fc) |
111 | { | 111 | { |
@@ -139,7 +139,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc) | |||
139 | 139 | ||
140 | /* | 140 | /* |
141 | * Remove a connection from the control filesystem (if it exists). | 141 | * Remove a connection from the control filesystem (if it exists). |
142 | * Caller must host fuse_mutex | 142 | * Caller must hold fuse_mutex |
143 | */ | 143 | */ |
144 | void fuse_ctl_remove_conn(struct fuse_conn *fc) | 144 | void fuse_ctl_remove_conn(struct fuse_conn *fc) |
145 | { | 145 | { |
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 72a74cde6de8..409ce6a7cca4 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -14,6 +14,33 @@ | |||
14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
15 | #include <linux/namei.h> | 15 | #include <linux/namei.h> |
16 | 16 | ||
17 | #if BITS_PER_LONG >= 64 | ||
18 | static inline void fuse_dentry_settime(struct dentry *entry, u64 time) | ||
19 | { | ||
20 | entry->d_time = time; | ||
21 | } | ||
22 | |||
23 | static inline u64 fuse_dentry_time(struct dentry *entry) | ||
24 | { | ||
25 | return entry->d_time; | ||
26 | } | ||
27 | #else | ||
28 | /* | ||
29 | * On 32 bit archs store the high 32 bits of time in d_fsdata | ||
30 | */ | ||
31 | static void fuse_dentry_settime(struct dentry *entry, u64 time) | ||
32 | { | ||
33 | entry->d_time = time; | ||
34 | entry->d_fsdata = (void *) (unsigned long) (time >> 32); | ||
35 | } | ||
36 | |||
37 | static u64 fuse_dentry_time(struct dentry *entry) | ||
38 | { | ||
39 | return (u64) entry->d_time + | ||
40 | ((u64) (unsigned long) entry->d_fsdata << 32); | ||
41 | } | ||
42 | #endif | ||
43 | |||
17 | /* | 44 | /* |
18 | * FUSE caches dentries and attributes with separate timeout. The | 45 | * FUSE caches dentries and attributes with separate timeout. The |
19 | * time in jiffies until the dentry/attributes are valid is stored in | 46 | * time in jiffies until the dentry/attributes are valid is stored in |
@@ -23,10 +50,13 @@ | |||
23 | /* | 50 | /* |
24 | * Calculate the time in jiffies until a dentry/attributes are valid | 51 | * Calculate the time in jiffies until a dentry/attributes are valid |
25 | */ | 52 | */ |
26 | static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec) | 53 | static u64 time_to_jiffies(unsigned long sec, unsigned long nsec) |
27 | { | 54 | { |
28 | struct timespec ts = {sec, nsec}; | 55 | if (sec || nsec) { |
29 | return jiffies + timespec_to_jiffies(&ts); | 56 | struct timespec ts = {sec, nsec}; |
57 | return get_jiffies_64() + timespec_to_jiffies(&ts); | ||
58 | } else | ||
59 | return 0; | ||
30 | } | 60 | } |
31 | 61 | ||
32 | /* | 62 | /* |
@@ -35,7 +65,8 @@ static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec) | |||
35 | */ | 65 | */ |
36 | static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o) | 66 | static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o) |
37 | { | 67 | { |
38 | entry->d_time = time_to_jiffies(o->entry_valid, o->entry_valid_nsec); | 68 | fuse_dentry_settime(entry, |
69 | time_to_jiffies(o->entry_valid, o->entry_valid_nsec)); | ||
39 | if (entry->d_inode) | 70 | if (entry->d_inode) |
40 | get_fuse_inode(entry->d_inode)->i_time = | 71 | get_fuse_inode(entry->d_inode)->i_time = |
41 | time_to_jiffies(o->attr_valid, o->attr_valid_nsec); | 72 | time_to_jiffies(o->attr_valid, o->attr_valid_nsec); |
@@ -47,7 +78,7 @@ static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o) | |||
47 | */ | 78 | */ |
48 | void fuse_invalidate_attr(struct inode *inode) | 79 | void fuse_invalidate_attr(struct inode *inode) |
49 | { | 80 | { |
50 | get_fuse_inode(inode)->i_time = jiffies - 1; | 81 | get_fuse_inode(inode)->i_time = 0; |
51 | } | 82 | } |
52 | 83 | ||
53 | /* | 84 | /* |
@@ -60,7 +91,7 @@ void fuse_invalidate_attr(struct inode *inode) | |||
60 | */ | 91 | */ |
61 | static void fuse_invalidate_entry_cache(struct dentry *entry) | 92 | static void fuse_invalidate_entry_cache(struct dentry *entry) |
62 | { | 93 | { |
63 | entry->d_time = jiffies - 1; | 94 | fuse_dentry_settime(entry, 0); |
64 | } | 95 | } |
65 | 96 | ||
66 | /* | 97 | /* |
@@ -102,7 +133,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) | |||
102 | 133 | ||
103 | if (inode && is_bad_inode(inode)) | 134 | if (inode && is_bad_inode(inode)) |
104 | return 0; | 135 | return 0; |
105 | else if (time_after(jiffies, entry->d_time)) { | 136 | else if (fuse_dentry_time(entry) < get_jiffies_64()) { |
106 | int err; | 137 | int err; |
107 | struct fuse_entry_out outarg; | 138 | struct fuse_entry_out outarg; |
108 | struct fuse_conn *fc; | 139 | struct fuse_conn *fc; |
@@ -666,7 +697,7 @@ static int fuse_revalidate(struct dentry *entry) | |||
666 | if (!fuse_allow_task(fc, current)) | 697 | if (!fuse_allow_task(fc, current)) |
667 | return -EACCES; | 698 | return -EACCES; |
668 | if (get_node_id(inode) != FUSE_ROOT_ID && | 699 | if (get_node_id(inode) != FUSE_ROOT_ID && |
669 | time_before_eq(jiffies, fi->i_time)) | 700 | fi->i_time >= get_jiffies_64()) |
670 | return 0; | 701 | return 0; |
671 | 702 | ||
672 | return fuse_do_getattr(inode); | 703 | return fuse_do_getattr(inode); |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 63614ed16336..5c4fcd1dbf59 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -395,14 +395,16 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, | |||
395 | struct fuse_readpages_data data; | 395 | struct fuse_readpages_data data; |
396 | int err; | 396 | int err; |
397 | 397 | ||
398 | err = -EIO; | ||
398 | if (is_bad_inode(inode)) | 399 | if (is_bad_inode(inode)) |
399 | return -EIO; | 400 | goto clean_pages_up; |
400 | 401 | ||
401 | data.file = file; | 402 | data.file = file; |
402 | data.inode = inode; | 403 | data.inode = inode; |
403 | data.req = fuse_get_req(fc); | 404 | data.req = fuse_get_req(fc); |
405 | err = PTR_ERR(data.req); | ||
404 | if (IS_ERR(data.req)) | 406 | if (IS_ERR(data.req)) |
405 | return PTR_ERR(data.req); | 407 | goto clean_pages_up; |
406 | 408 | ||
407 | err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); | 409 | err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); |
408 | if (!err) { | 410 | if (!err) { |
@@ -412,6 +414,10 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, | |||
412 | fuse_put_request(fc, data.req); | 414 | fuse_put_request(fc, data.req); |
413 | } | 415 | } |
414 | return err; | 416 | return err; |
417 | |||
418 | clean_pages_up: | ||
419 | put_pages_list(pages); | ||
420 | return err; | ||
415 | } | 421 | } |
416 | 422 | ||
417 | static size_t fuse_send_write(struct fuse_req *req, struct file *file, | 423 | static size_t fuse_send_write(struct fuse_req *req, struct file *file, |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 0dbf96621841..69c7750d55b8 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -59,7 +59,7 @@ struct fuse_inode { | |||
59 | struct fuse_req *forget_req; | 59 | struct fuse_req *forget_req; |
60 | 60 | ||
61 | /** Time in jiffies until the file attributes are valid */ | 61 | /** Time in jiffies until the file attributes are valid */ |
62 | unsigned long i_time; | 62 | u64 i_time; |
63 | }; | 63 | }; |
64 | 64 | ||
65 | /** FUSE specific file data */ | 65 | /** FUSE specific file data */ |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index dcaaabd3b9c4..7d25092262ae 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -51,7 +51,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) | |||
51 | return NULL; | 51 | return NULL; |
52 | 52 | ||
53 | fi = get_fuse_inode(inode); | 53 | fi = get_fuse_inode(inode); |
54 | fi->i_time = jiffies - 1; | 54 | fi->i_time = 0; |
55 | fi->nodeid = 0; | 55 | fi->nodeid = 0; |
56 | fi->nlookup = 0; | 56 | fi->nlookup = 0; |
57 | fi->forget_req = fuse_request_alloc(); | 57 | fi->forget_req = fuse_request_alloc(); |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 6449cb697967..c3920c96dadf 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -83,8 +83,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
83 | 83 | ||
84 | ret = -ENOMEM; | 84 | ret = -ENOMEM; |
85 | len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); | 85 | len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); |
86 | if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size) | ||
87 | goto out; | ||
88 | 86 | ||
89 | if (vma->vm_flags & VM_MAYSHARE && | 87 | if (vma->vm_flags & VM_MAYSHARE && |
90 | hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), | 88 | hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), |
@@ -93,7 +91,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
93 | 91 | ||
94 | ret = 0; | 92 | ret = 0; |
95 | hugetlb_prefault_arch_hook(vma->vm_mm); | 93 | hugetlb_prefault_arch_hook(vma->vm_mm); |
96 | if (inode->i_size < len) | 94 | if (vma->vm_flags & VM_WRITE && inode->i_size < len) |
97 | inode->i_size = len; | 95 | inode->i_size = len; |
98 | out: | 96 | out: |
99 | mutex_unlock(&inode->i_mutex); | 97 | mutex_unlock(&inode->i_mutex); |
diff --git a/fs/inotify_user.c b/fs/inotify_user.c index f2386442adee..017cb0f134d6 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c | |||
@@ -187,7 +187,7 @@ static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie, | |||
187 | { | 187 | { |
188 | struct inotify_kernel_event *kevent; | 188 | struct inotify_kernel_event *kevent; |
189 | 189 | ||
190 | kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL); | 190 | kevent = kmem_cache_alloc(event_cachep, GFP_NOFS); |
191 | if (unlikely(!kevent)) | 191 | if (unlikely(!kevent)) |
192 | return NULL; | 192 | return NULL; |
193 | 193 | ||
diff --git a/fs/ioprio.c b/fs/ioprio.c index 93aa5715f224..78b1deae3fa2 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c | |||
@@ -44,6 +44,9 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) | |||
44 | task->ioprio = ioprio; | 44 | task->ioprio = ioprio; |
45 | 45 | ||
46 | ioc = task->io_context; | 46 | ioc = task->io_context; |
47 | /* see wmb() in current_io_context() */ | ||
48 | smp_read_barrier_depends(); | ||
49 | |||
47 | if (ioc && ioc->set_ioprio) | 50 | if (ioc && ioc->set_ioprio) |
48 | ioc->set_ioprio(ioc, ioprio); | 51 | ioc->set_ioprio(ioc, ioprio); |
49 | 52 | ||
@@ -111,9 +114,9 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) | |||
111 | continue; | 114 | continue; |
112 | ret = set_task_ioprio(p, ioprio); | 115 | ret = set_task_ioprio(p, ioprio); |
113 | if (ret) | 116 | if (ret) |
114 | break; | 117 | goto free_uid; |
115 | } while_each_thread(g, p); | 118 | } while_each_thread(g, p); |
116 | 119 | free_uid: | |
117 | if (who) | 120 | if (who) |
118 | free_uid(user); | 121 | free_uid(user); |
119 | break; | 122 | break; |
@@ -137,6 +140,29 @@ out: | |||
137 | return ret; | 140 | return ret; |
138 | } | 141 | } |
139 | 142 | ||
143 | int ioprio_best(unsigned short aprio, unsigned short bprio) | ||
144 | { | ||
145 | unsigned short aclass = IOPRIO_PRIO_CLASS(aprio); | ||
146 | unsigned short bclass = IOPRIO_PRIO_CLASS(bprio); | ||
147 | |||
148 | if (!ioprio_valid(aprio)) | ||
149 | return bprio; | ||
150 | if (!ioprio_valid(bprio)) | ||
151 | return aprio; | ||
152 | |||
153 | if (aclass == IOPRIO_CLASS_NONE) | ||
154 | aclass = IOPRIO_CLASS_BE; | ||
155 | if (bclass == IOPRIO_CLASS_NONE) | ||
156 | bclass = IOPRIO_CLASS_BE; | ||
157 | |||
158 | if (aclass == bclass) | ||
159 | return min(aprio, bprio); | ||
160 | if (aclass > bclass) | ||
161 | return bprio; | ||
162 | else | ||
163 | return aprio; | ||
164 | } | ||
165 | |||
140 | asmlinkage long sys_ioprio_get(int which, int who) | 166 | asmlinkage long sys_ioprio_get(int which, int who) |
141 | { | 167 | { |
142 | struct task_struct *g, *p; | 168 | struct task_struct *g, *p; |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 0971814c38b8..42da60784311 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -261,7 +261,7 @@ void journal_commit_transaction(journal_t *journal) | |||
261 | struct buffer_head *bh = jh2bh(jh); | 261 | struct buffer_head *bh = jh2bh(jh); |
262 | 262 | ||
263 | jbd_lock_bh_state(bh); | 263 | jbd_lock_bh_state(bh); |
264 | kfree(jh->b_committed_data); | 264 | jbd_slab_free(jh->b_committed_data, bh->b_size); |
265 | jh->b_committed_data = NULL; | 265 | jh->b_committed_data = NULL; |
266 | jbd_unlock_bh_state(bh); | 266 | jbd_unlock_bh_state(bh); |
267 | } | 267 | } |
@@ -745,14 +745,14 @@ restart_loop: | |||
745 | * Otherwise, we can just throw away the frozen data now. | 745 | * Otherwise, we can just throw away the frozen data now. |
746 | */ | 746 | */ |
747 | if (jh->b_committed_data) { | 747 | if (jh->b_committed_data) { |
748 | kfree(jh->b_committed_data); | 748 | jbd_slab_free(jh->b_committed_data, bh->b_size); |
749 | jh->b_committed_data = NULL; | 749 | jh->b_committed_data = NULL; |
750 | if (jh->b_frozen_data) { | 750 | if (jh->b_frozen_data) { |
751 | jh->b_committed_data = jh->b_frozen_data; | 751 | jh->b_committed_data = jh->b_frozen_data; |
752 | jh->b_frozen_data = NULL; | 752 | jh->b_frozen_data = NULL; |
753 | } | 753 | } |
754 | } else if (jh->b_frozen_data) { | 754 | } else if (jh->b_frozen_data) { |
755 | kfree(jh->b_frozen_data); | 755 | jbd_slab_free(jh->b_frozen_data, bh->b_size); |
756 | jh->b_frozen_data = NULL; | 756 | jh->b_frozen_data = NULL; |
757 | } | 757 | } |
758 | 758 | ||
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 8c9b28dff119..f66724ce443a 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -84,6 +84,7 @@ EXPORT_SYMBOL(journal_force_commit); | |||
84 | 84 | ||
85 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); | 85 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); |
86 | static void __journal_abort_soft (journal_t *journal, int errno); | 86 | static void __journal_abort_soft (journal_t *journal, int errno); |
87 | static int journal_create_jbd_slab(size_t slab_size); | ||
87 | 88 | ||
88 | /* | 89 | /* |
89 | * Helper function used to manage commit timeouts | 90 | * Helper function used to manage commit timeouts |
@@ -328,10 +329,10 @@ repeat: | |||
328 | char *tmp; | 329 | char *tmp; |
329 | 330 | ||
330 | jbd_unlock_bh_state(bh_in); | 331 | jbd_unlock_bh_state(bh_in); |
331 | tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS); | 332 | tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS); |
332 | jbd_lock_bh_state(bh_in); | 333 | jbd_lock_bh_state(bh_in); |
333 | if (jh_in->b_frozen_data) { | 334 | if (jh_in->b_frozen_data) { |
334 | kfree(tmp); | 335 | jbd_slab_free(tmp, bh_in->b_size); |
335 | goto repeat; | 336 | goto repeat; |
336 | } | 337 | } |
337 | 338 | ||
@@ -1069,17 +1070,17 @@ static int load_superblock(journal_t *journal) | |||
1069 | int journal_load(journal_t *journal) | 1070 | int journal_load(journal_t *journal) |
1070 | { | 1071 | { |
1071 | int err; | 1072 | int err; |
1073 | journal_superblock_t *sb; | ||
1072 | 1074 | ||
1073 | err = load_superblock(journal); | 1075 | err = load_superblock(journal); |
1074 | if (err) | 1076 | if (err) |
1075 | return err; | 1077 | return err; |
1076 | 1078 | ||
1079 | sb = journal->j_superblock; | ||
1077 | /* If this is a V2 superblock, then we have to check the | 1080 | /* If this is a V2 superblock, then we have to check the |
1078 | * features flags on it. */ | 1081 | * features flags on it. */ |
1079 | 1082 | ||
1080 | if (journal->j_format_version >= 2) { | 1083 | if (journal->j_format_version >= 2) { |
1081 | journal_superblock_t *sb = journal->j_superblock; | ||
1082 | |||
1083 | if ((sb->s_feature_ro_compat & | 1084 | if ((sb->s_feature_ro_compat & |
1084 | ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) || | 1085 | ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) || |
1085 | (sb->s_feature_incompat & | 1086 | (sb->s_feature_incompat & |
@@ -1090,6 +1091,13 @@ int journal_load(journal_t *journal) | |||
1090 | } | 1091 | } |
1091 | } | 1092 | } |
1092 | 1093 | ||
1094 | /* | ||
1095 | * Create a slab for this blocksize | ||
1096 | */ | ||
1097 | err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize)); | ||
1098 | if (err) | ||
1099 | return err; | ||
1100 | |||
1093 | /* Let the recovery code check whether it needs to recover any | 1101 | /* Let the recovery code check whether it needs to recover any |
1094 | * data from the journal. */ | 1102 | * data from the journal. */ |
1095 | if (journal_recover(journal)) | 1103 | if (journal_recover(journal)) |
@@ -1612,6 +1620,77 @@ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) | |||
1612 | } | 1620 | } |
1613 | 1621 | ||
1614 | /* | 1622 | /* |
1623 | * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed | ||
1624 | * and allocate frozen and commit buffers from these slabs. | ||
1625 | * | ||
1626 | * Reason for doing this is to avoid, SLAB_DEBUG - since it could | ||
1627 | * cause bh to cross page boundary. | ||
1628 | */ | ||
1629 | |||
1630 | #define JBD_MAX_SLABS 5 | ||
1631 | #define JBD_SLAB_INDEX(size) (size >> 11) | ||
1632 | |||
1633 | static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; | ||
1634 | static const char *jbd_slab_names[JBD_MAX_SLABS] = { | ||
1635 | "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" | ||
1636 | }; | ||
1637 | |||
1638 | static void journal_destroy_jbd_slabs(void) | ||
1639 | { | ||
1640 | int i; | ||
1641 | |||
1642 | for (i = 0; i < JBD_MAX_SLABS; i++) { | ||
1643 | if (jbd_slab[i]) | ||
1644 | kmem_cache_destroy(jbd_slab[i]); | ||
1645 | jbd_slab[i] = NULL; | ||
1646 | } | ||
1647 | } | ||
1648 | |||
1649 | static int journal_create_jbd_slab(size_t slab_size) | ||
1650 | { | ||
1651 | int i = JBD_SLAB_INDEX(slab_size); | ||
1652 | |||
1653 | BUG_ON(i >= JBD_MAX_SLABS); | ||
1654 | |||
1655 | /* | ||
1656 | * Check if we already have a slab created for this size | ||
1657 | */ | ||
1658 | if (jbd_slab[i]) | ||
1659 | return 0; | ||
1660 | |||
1661 | /* | ||
1662 | * Create a slab and force alignment to be same as slabsize - | ||
1663 | * this will make sure that allocations won't cross the page | ||
1664 | * boundary. | ||
1665 | */ | ||
1666 | jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], | ||
1667 | slab_size, slab_size, 0, NULL, NULL); | ||
1668 | if (!jbd_slab[i]) { | ||
1669 | printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); | ||
1670 | return -ENOMEM; | ||
1671 | } | ||
1672 | return 0; | ||
1673 | } | ||
1674 | |||
1675 | void * jbd_slab_alloc(size_t size, gfp_t flags) | ||
1676 | { | ||
1677 | int idx; | ||
1678 | |||
1679 | idx = JBD_SLAB_INDEX(size); | ||
1680 | BUG_ON(jbd_slab[idx] == NULL); | ||
1681 | return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); | ||
1682 | } | ||
1683 | |||
1684 | void jbd_slab_free(void *ptr, size_t size) | ||
1685 | { | ||
1686 | int idx; | ||
1687 | |||
1688 | idx = JBD_SLAB_INDEX(size); | ||
1689 | BUG_ON(jbd_slab[idx] == NULL); | ||
1690 | kmem_cache_free(jbd_slab[idx], ptr); | ||
1691 | } | ||
1692 | |||
1693 | /* | ||
1615 | * Journal_head storage management | 1694 | * Journal_head storage management |
1616 | */ | 1695 | */ |
1617 | static kmem_cache_t *journal_head_cache; | 1696 | static kmem_cache_t *journal_head_cache; |
@@ -1799,13 +1878,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh) | |||
1799 | printk(KERN_WARNING "%s: freeing " | 1878 | printk(KERN_WARNING "%s: freeing " |
1800 | "b_frozen_data\n", | 1879 | "b_frozen_data\n", |
1801 | __FUNCTION__); | 1880 | __FUNCTION__); |
1802 | kfree(jh->b_frozen_data); | 1881 | jbd_slab_free(jh->b_frozen_data, bh->b_size); |
1803 | } | 1882 | } |
1804 | if (jh->b_committed_data) { | 1883 | if (jh->b_committed_data) { |
1805 | printk(KERN_WARNING "%s: freeing " | 1884 | printk(KERN_WARNING "%s: freeing " |
1806 | "b_committed_data\n", | 1885 | "b_committed_data\n", |
1807 | __FUNCTION__); | 1886 | __FUNCTION__); |
1808 | kfree(jh->b_committed_data); | 1887 | jbd_slab_free(jh->b_committed_data, bh->b_size); |
1809 | } | 1888 | } |
1810 | bh->b_private = NULL; | 1889 | bh->b_private = NULL; |
1811 | jh->b_bh = NULL; /* debug, really */ | 1890 | jh->b_bh = NULL; /* debug, really */ |
@@ -1961,6 +2040,7 @@ static void journal_destroy_caches(void) | |||
1961 | journal_destroy_revoke_caches(); | 2040 | journal_destroy_revoke_caches(); |
1962 | journal_destroy_journal_head_cache(); | 2041 | journal_destroy_journal_head_cache(); |
1963 | journal_destroy_handle_cache(); | 2042 | journal_destroy_handle_cache(); |
2043 | journal_destroy_jbd_slabs(); | ||
1964 | } | 2044 | } |
1965 | 2045 | ||
1966 | static int __init journal_init(void) | 2046 | static int __init journal_init(void) |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 508b2ea91f43..f5169a96260e 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -666,8 +666,9 @@ repeat: | |||
666 | if (!frozen_buffer) { | 666 | if (!frozen_buffer) { |
667 | JBUFFER_TRACE(jh, "allocate memory for buffer"); | 667 | JBUFFER_TRACE(jh, "allocate memory for buffer"); |
668 | jbd_unlock_bh_state(bh); | 668 | jbd_unlock_bh_state(bh); |
669 | frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size, | 669 | frozen_buffer = |
670 | GFP_NOFS); | 670 | jbd_slab_alloc(jh2bh(jh)->b_size, |
671 | GFP_NOFS); | ||
671 | if (!frozen_buffer) { | 672 | if (!frozen_buffer) { |
672 | printk(KERN_EMERG | 673 | printk(KERN_EMERG |
673 | "%s: OOM for frozen_buffer\n", | 674 | "%s: OOM for frozen_buffer\n", |
@@ -726,7 +727,7 @@ done: | |||
726 | 727 | ||
727 | out: | 728 | out: |
728 | if (unlikely(frozen_buffer)) /* It's usually NULL */ | 729 | if (unlikely(frozen_buffer)) /* It's usually NULL */ |
729 | kfree(frozen_buffer); | 730 | jbd_slab_free(frozen_buffer, bh->b_size); |
730 | 731 | ||
731 | JBUFFER_TRACE(jh, "exit"); | 732 | JBUFFER_TRACE(jh, "exit"); |
732 | return error; | 733 | return error; |
@@ -879,7 +880,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh) | |||
879 | 880 | ||
880 | repeat: | 881 | repeat: |
881 | if (!jh->b_committed_data) { | 882 | if (!jh->b_committed_data) { |
882 | committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS); | 883 | committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); |
883 | if (!committed_data) { | 884 | if (!committed_data) { |
884 | printk(KERN_EMERG "%s: No memory for committed data\n", | 885 | printk(KERN_EMERG "%s: No memory for committed data\n", |
885 | __FUNCTION__); | 886 | __FUNCTION__); |
@@ -906,7 +907,7 @@ repeat: | |||
906 | out: | 907 | out: |
907 | journal_put_journal_head(jh); | 908 | journal_put_journal_head(jh); |
908 | if (unlikely(committed_data)) | 909 | if (unlikely(committed_data)) |
909 | kfree(committed_data); | 910 | jbd_slab_free(committed_data, bh->b_size); |
910 | return err; | 911 | return err; |
911 | } | 912 | } |
912 | 913 | ||
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 9c2077e7e081..0ae3cd10702c 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c | |||
@@ -345,10 +345,8 @@ int jffs2_init_acl(struct inode *inode, struct inode *dir) | |||
345 | return rc; | 345 | return rc; |
346 | } | 346 | } |
347 | 347 | ||
348 | void jffs2_clear_acl(struct inode *inode) | 348 | void jffs2_clear_acl(struct jffs2_inode_info *f) |
349 | { | 349 | { |
350 | struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); | ||
351 | |||
352 | if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) { | 350 | if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) { |
353 | posix_acl_release(f->i_acl_access); | 351 | posix_acl_release(f->i_acl_access); |
354 | f->i_acl_access = JFFS2_ACL_NOT_CACHED; | 352 | f->i_acl_access = JFFS2_ACL_NOT_CACHED; |
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index 8893bd1a6ba7..fa327dbd3171 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h | |||
@@ -30,7 +30,7 @@ struct jffs2_acl_header { | |||
30 | extern int jffs2_permission(struct inode *, int, struct nameidata *); | 30 | extern int jffs2_permission(struct inode *, int, struct nameidata *); |
31 | extern int jffs2_acl_chmod(struct inode *); | 31 | extern int jffs2_acl_chmod(struct inode *); |
32 | extern int jffs2_init_acl(struct inode *, struct inode *); | 32 | extern int jffs2_init_acl(struct inode *, struct inode *); |
33 | extern void jffs2_clear_acl(struct inode *); | 33 | extern void jffs2_clear_acl(struct jffs2_inode_info *); |
34 | 34 | ||
35 | extern struct xattr_handler jffs2_acl_access_xattr_handler; | 35 | extern struct xattr_handler jffs2_acl_access_xattr_handler; |
36 | extern struct xattr_handler jffs2_acl_default_xattr_handler; | 36 | extern struct xattr_handler jffs2_acl_default_xattr_handler; |
@@ -40,6 +40,6 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler; | |||
40 | #define jffs2_permission NULL | 40 | #define jffs2_permission NULL |
41 | #define jffs2_acl_chmod(inode) (0) | 41 | #define jffs2_acl_chmod(inode) (0) |
42 | #define jffs2_init_acl(inode,dir) (0) | 42 | #define jffs2_init_acl(inode,dir) (0) |
43 | #define jffs2_clear_acl(inode) | 43 | #define jffs2_clear_acl(f) |
44 | 44 | ||
45 | #endif /* CONFIG_JFFS2_FS_POSIX_ACL */ | 45 | #endif /* CONFIG_JFFS2_FS_POSIX_ACL */ |
diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c index 8310c95478e9..33f291005012 100644 --- a/fs/jffs2/malloc.c +++ b/fs/jffs2/malloc.c | |||
@@ -190,7 +190,7 @@ void jffs2_free_tmp_dnode_info(struct jffs2_tmp_dnode_info *x) | |||
190 | kmem_cache_free(tmp_dnode_info_slab, x); | 190 | kmem_cache_free(tmp_dnode_info_slab, x); |
191 | } | 191 | } |
192 | 192 | ||
193 | struct jffs2_raw_node_ref *jffs2_alloc_refblock(void) | 193 | static struct jffs2_raw_node_ref *jffs2_alloc_refblock(void) |
194 | { | 194 | { |
195 | struct jffs2_raw_node_ref *ret; | 195 | struct jffs2_raw_node_ref *ret; |
196 | 196 | ||
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index f752baa8d399..cae92c14116d 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h | |||
@@ -426,8 +426,6 @@ char *jffs2_getlink(struct jffs2_sb_info *c, struct jffs2_inode_info *f); | |||
426 | /* scan.c */ | 426 | /* scan.c */ |
427 | int jffs2_scan_medium(struct jffs2_sb_info *c); | 427 | int jffs2_scan_medium(struct jffs2_sb_info *c); |
428 | void jffs2_rotate_lists(struct jffs2_sb_info *c); | 428 | void jffs2_rotate_lists(struct jffs2_sb_info *c); |
429 | int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf, | ||
430 | uint32_t ofs, uint32_t len); | ||
431 | struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uint32_t ino); | 429 | struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uint32_t ino); |
432 | int jffs2_scan_classify_jeb(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb); | 430 | int jffs2_scan_classify_jeb(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb); |
433 | int jffs2_scan_dirty_space(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t size); | 431 | int jffs2_scan_dirty_space(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t size); |
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index cc1899268c43..266423b2709d 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c | |||
@@ -968,6 +968,7 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f) | |||
968 | struct jffs2_full_dirent *fd, *fds; | 968 | struct jffs2_full_dirent *fd, *fds; |
969 | int deleted; | 969 | int deleted; |
970 | 970 | ||
971 | jffs2_clear_acl(f); | ||
971 | jffs2_xattr_delete_inode(c, f->inocache); | 972 | jffs2_xattr_delete_inode(c, f->inocache); |
972 | down(&f->sem); | 973 | down(&f->sem); |
973 | deleted = f->inocache && !f->inocache->nlink; | 974 | deleted = f->inocache && !f->inocache->nlink; |
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 2bfdc33752d3..e2413466ddd5 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c | |||
@@ -274,8 +274,8 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) | |||
274 | return ret; | 274 | return ret; |
275 | } | 275 | } |
276 | 276 | ||
277 | int jffs2_fill_scan_buf (struct jffs2_sb_info *c, void *buf, | 277 | static int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf, |
278 | uint32_t ofs, uint32_t len) | 278 | uint32_t ofs, uint32_t len) |
279 | { | 279 | { |
280 | int ret; | 280 | int ret; |
281 | size_t retlen; | 281 | size_t retlen; |
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c index c19bd476e8ec..e52cef526d90 100644 --- a/fs/jffs2/summary.c +++ b/fs/jffs2/summary.c | |||
@@ -252,6 +252,11 @@ int jffs2_sum_add_kvec(struct jffs2_sb_info *c, const struct kvec *invecs, | |||
252 | union jffs2_node_union *node; | 252 | union jffs2_node_union *node; |
253 | struct jffs2_eraseblock *jeb; | 253 | struct jffs2_eraseblock *jeb; |
254 | 254 | ||
255 | if (c->summary->sum_size == JFFS2_SUMMARY_NOSUM_SIZE) { | ||
256 | dbg_summary("Summary is disabled for this jeb! Skipping summary info!\n"); | ||
257 | return 0; | ||
258 | } | ||
259 | |||
255 | node = invecs[0].iov_base; | 260 | node = invecs[0].iov_base; |
256 | jeb = &c->blocks[ofs / c->sector_size]; | 261 | jeb = &c->blocks[ofs / c->sector_size]; |
257 | ofs -= jeb->offset; | 262 | ofs -= jeb->offset; |
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 18e66dbf23b4..25bc1ae08648 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c | |||
@@ -50,9 +50,10 @@ | |||
50 | * is used to write xdatum to medium. xd->version will be incremented. | 50 | * is used to write xdatum to medium. xd->version will be incremented. |
51 | * create_xattr_datum(c, xprefix, xname, xvalue, xsize) | 51 | * create_xattr_datum(c, xprefix, xname, xvalue, xsize) |
52 | * is used to create new xdatum and write to medium. | 52 | * is used to create new xdatum and write to medium. |
53 | * delete_xattr_datum(c, xd) | 53 | * unrefer_xattr_datum(c, xd) |
54 | * is used to delete a xdatum. It marks xd JFFS2_XFLAGS_DEAD, and allows | 54 | * is used to delete a xdatum. When nobody refers this xdatum, JFFS2_XFLAGS_DEAD |
55 | * GC to reclaim those physical nodes. | 55 | * is set on xd->flags and chained xattr_dead_list or release it immediately. |
56 | * In the first case, the garbage collector release it later. | ||
56 | * -------------------------------------------------- */ | 57 | * -------------------------------------------------- */ |
57 | static uint32_t xattr_datum_hashkey(int xprefix, const char *xname, const char *xvalue, int xsize) | 58 | static uint32_t xattr_datum_hashkey(int xprefix, const char *xname, const char *xvalue, int xsize) |
58 | { | 59 | { |
@@ -394,22 +395,24 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c, | |||
394 | return xd; | 395 | return xd; |
395 | } | 396 | } |
396 | 397 | ||
397 | static void delete_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) | 398 | static void unrefer_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) |
398 | { | 399 | { |
399 | /* must be called under down_write(xattr_sem) */ | 400 | /* must be called under down_write(xattr_sem) */ |
400 | BUG_ON(atomic_read(&xd->refcnt)); | 401 | if (atomic_dec_and_lock(&xd->refcnt, &c->erase_completion_lock)) { |
402 | uint32_t xid = xd->xid, version = xd->version; | ||
401 | 403 | ||
402 | unload_xattr_datum(c, xd); | 404 | unload_xattr_datum(c, xd); |
403 | xd->flags |= JFFS2_XFLAGS_DEAD; | 405 | xd->flags |= JFFS2_XFLAGS_DEAD; |
404 | spin_lock(&c->erase_completion_lock); | 406 | if (xd->node == (void *)xd) { |
405 | if (xd->node == (void *)xd) { | 407 | BUG_ON(!(xd->flags & JFFS2_XFLAGS_INVALID)); |
406 | BUG_ON(!(xd->flags & JFFS2_XFLAGS_INVALID)); | 408 | jffs2_free_xattr_datum(xd); |
407 | jffs2_free_xattr_datum(xd); | 409 | } else { |
408 | } else { | 410 | list_add(&xd->xindex, &c->xattr_dead_list); |
409 | list_add(&xd->xindex, &c->xattr_dead_list); | 411 | } |
412 | spin_unlock(&c->erase_completion_lock); | ||
413 | |||
414 | dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xid, version); | ||
410 | } | 415 | } |
411 | spin_unlock(&c->erase_completion_lock); | ||
412 | dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xd->xid, xd->version); | ||
413 | } | 416 | } |
414 | 417 | ||
415 | /* -------- xref related functions ------------------ | 418 | /* -------- xref related functions ------------------ |
@@ -580,8 +583,7 @@ static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *re | |||
580 | dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) was removed.\n", | 583 | dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) was removed.\n", |
581 | ref->ino, ref->xid, ref->xseqno); | 584 | ref->ino, ref->xid, ref->xseqno); |
582 | 585 | ||
583 | if (atomic_dec_and_test(&xd->refcnt)) | 586 | unrefer_xattr_datum(c, xd); |
584 | delete_xattr_datum(c, xd); | ||
585 | } | 587 | } |
586 | 588 | ||
587 | void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) | 589 | void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) |
@@ -1119,8 +1121,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, | |||
1119 | ref->next = c->xref_dead_list; | 1121 | ref->next = c->xref_dead_list; |
1120 | c->xref_dead_list = ref; | 1122 | c->xref_dead_list = ref; |
1121 | spin_unlock(&c->erase_completion_lock); | 1123 | spin_unlock(&c->erase_completion_lock); |
1122 | if (atomic_dec_and_test(&xd->refcnt)) | 1124 | unrefer_xattr_datum(c, xd); |
1123 | delete_xattr_datum(c, xd); | ||
1124 | } else { | 1125 | } else { |
1125 | ref->ic = ic; | 1126 | ref->ic = ic; |
1126 | ref->xd = xd; | 1127 | ref->xd = xd; |
@@ -1156,8 +1157,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, | |||
1156 | down_write(&c->xattr_sem); | 1157 | down_write(&c->xattr_sem); |
1157 | if (rc) { | 1158 | if (rc) { |
1158 | JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, request); | 1159 | JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, request); |
1159 | if (atomic_dec_and_test(&xd->refcnt)) | 1160 | unrefer_xattr_datum(c, xd); |
1160 | delete_xattr_datum(c, xd); | ||
1161 | up_write(&c->xattr_sem); | 1161 | up_write(&c->xattr_sem); |
1162 | return rc; | 1162 | return rc; |
1163 | } | 1163 | } |
@@ -1170,8 +1170,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, | |||
1170 | ic->xref = ref; | 1170 | ic->xref = ref; |
1171 | } | 1171 | } |
1172 | rc = PTR_ERR(newref); | 1172 | rc = PTR_ERR(newref); |
1173 | if (atomic_dec_and_test(&xd->refcnt)) | 1173 | unrefer_xattr_datum(c, xd); |
1174 | delete_xattr_datum(c, xd); | ||
1175 | } else if (ref) { | 1174 | } else if (ref) { |
1176 | delete_xattr_ref(c, ref); | 1175 | delete_xattr_ref(c, ref); |
1177 | } | 1176 | } |
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 43e3f566aad6..a223cf4faa9b 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c | |||
@@ -168,16 +168,15 @@ void jfs_dirty_inode(struct inode *inode) | |||
168 | set_cflag(COMMIT_Dirty, inode); | 168 | set_cflag(COMMIT_Dirty, inode); |
169 | } | 169 | } |
170 | 170 | ||
171 | static int | 171 | int jfs_get_block(struct inode *ip, sector_t lblock, |
172 | jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, | 172 | struct buffer_head *bh_result, int create) |
173 | struct buffer_head *bh_result, int create) | ||
174 | { | 173 | { |
175 | s64 lblock64 = lblock; | 174 | s64 lblock64 = lblock; |
176 | int rc = 0; | 175 | int rc = 0; |
177 | xad_t xad; | 176 | xad_t xad; |
178 | s64 xaddr; | 177 | s64 xaddr; |
179 | int xflag; | 178 | int xflag; |
180 | s32 xlen = max_blocks; | 179 | s32 xlen = bh_result->b_size >> ip->i_blkbits; |
181 | 180 | ||
182 | /* | 181 | /* |
183 | * Take appropriate lock on inode | 182 | * Take appropriate lock on inode |
@@ -188,7 +187,7 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, | |||
188 | IREAD_LOCK(ip); | 187 | IREAD_LOCK(ip); |
189 | 188 | ||
190 | if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) && | 189 | if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) && |
191 | (!xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)) && | 190 | (!xtLookup(ip, lblock64, xlen, &xflag, &xaddr, &xlen, 0)) && |
192 | xaddr) { | 191 | xaddr) { |
193 | if (xflag & XAD_NOTRECORDED) { | 192 | if (xflag & XAD_NOTRECORDED) { |
194 | if (!create) | 193 | if (!create) |
@@ -255,13 +254,6 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, | |||
255 | return rc; | 254 | return rc; |
256 | } | 255 | } |
257 | 256 | ||
258 | static int jfs_get_block(struct inode *ip, sector_t lblock, | ||
259 | struct buffer_head *bh_result, int create) | ||
260 | { | ||
261 | return jfs_get_blocks(ip, lblock, bh_result->b_size >> ip->i_blkbits, | ||
262 | bh_result, create); | ||
263 | } | ||
264 | |||
265 | static int jfs_writepage(struct page *page, struct writeback_control *wbc) | 257 | static int jfs_writepage(struct page *page, struct writeback_control *wbc) |
266 | { | 258 | { |
267 | return nobh_writepage(page, jfs_get_block, wbc); | 259 | return nobh_writepage(page, jfs_get_block, wbc); |
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index b5c7da6190dc..1fc48df670c8 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h | |||
@@ -32,6 +32,7 @@ extern void jfs_truncate_nolock(struct inode *, loff_t); | |||
32 | extern void jfs_free_zero_link(struct inode *); | 32 | extern void jfs_free_zero_link(struct inode *); |
33 | extern struct dentry *jfs_get_parent(struct dentry *dentry); | 33 | extern struct dentry *jfs_get_parent(struct dentry *dentry); |
34 | extern void jfs_set_inode_flags(struct inode *); | 34 | extern void jfs_set_inode_flags(struct inode *); |
35 | extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); | ||
35 | 36 | ||
36 | extern const struct address_space_operations jfs_aops; | 37 | extern const struct address_space_operations jfs_aops; |
37 | extern struct inode_operations jfs_dir_inode_operations; | 38 | extern struct inode_operations jfs_dir_inode_operations; |
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 10c46231ce15..efbb586bed4b 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c | |||
@@ -2944,7 +2944,7 @@ int jfs_sync(void *arg) | |||
2944 | * Inode is being freed | 2944 | * Inode is being freed |
2945 | */ | 2945 | */ |
2946 | list_del_init(&jfs_ip->anon_inode_list); | 2946 | list_del_init(&jfs_ip->anon_inode_list); |
2947 | } else if (! !mutex_trylock(&jfs_ip->commit_mutex)) { | 2947 | } else if (mutex_trylock(&jfs_ip->commit_mutex)) { |
2948 | /* | 2948 | /* |
2949 | * inode will be removed from anonymous list | 2949 | * inode will be removed from anonymous list |
2950 | * when it is committed | 2950 | * when it is committed |
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 09ea03f62277..295268ad231b 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -165,8 +165,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, | |||
165 | 165 | ||
166 | out3: | 166 | out3: |
167 | txEnd(tid); | 167 | txEnd(tid); |
168 | mutex_unlock(&JFS_IP(dip)->commit_mutex); | ||
169 | mutex_unlock(&JFS_IP(ip)->commit_mutex); | 168 | mutex_unlock(&JFS_IP(ip)->commit_mutex); |
169 | mutex_unlock(&JFS_IP(dip)->commit_mutex); | ||
170 | if (rc) { | 170 | if (rc) { |
171 | free_ea_wmap(ip); | 171 | free_ea_wmap(ip); |
172 | ip->i_nlink = 0; | 172 | ip->i_nlink = 0; |
@@ -300,8 +300,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) | |||
300 | 300 | ||
301 | out3: | 301 | out3: |
302 | txEnd(tid); | 302 | txEnd(tid); |
303 | mutex_unlock(&JFS_IP(dip)->commit_mutex); | ||
304 | mutex_unlock(&JFS_IP(ip)->commit_mutex); | 303 | mutex_unlock(&JFS_IP(ip)->commit_mutex); |
304 | mutex_unlock(&JFS_IP(dip)->commit_mutex); | ||
305 | if (rc) { | 305 | if (rc) { |
306 | free_ea_wmap(ip); | 306 | free_ea_wmap(ip); |
307 | ip->i_nlink = 0; | 307 | ip->i_nlink = 0; |
@@ -384,8 +384,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) | |||
384 | if (rc == -EIO) | 384 | if (rc == -EIO) |
385 | txAbort(tid, 1); | 385 | txAbort(tid, 1); |
386 | txEnd(tid); | 386 | txEnd(tid); |
387 | mutex_unlock(&JFS_IP(dip)->commit_mutex); | ||
388 | mutex_unlock(&JFS_IP(ip)->commit_mutex); | 387 | mutex_unlock(&JFS_IP(ip)->commit_mutex); |
388 | mutex_unlock(&JFS_IP(dip)->commit_mutex); | ||
389 | 389 | ||
390 | goto out2; | 390 | goto out2; |
391 | } | 391 | } |
@@ -422,8 +422,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) | |||
422 | 422 | ||
423 | txEnd(tid); | 423 | txEnd(tid); |
424 | 424 | ||
425 | mutex_unlock(&JFS_IP(dip)->commit_mutex); | ||
426 | mutex_unlock(&JFS_IP(ip)->commit_mutex); | 425 | mutex_unlock(&JFS_IP(ip)->commit_mutex); |
426 | mutex_unlock(&JFS_IP(dip)->commit_mutex); | ||
427 | 427 | ||
428 | /* | 428 | /* |
429 | * Truncating the directory index table is not guaranteed. It | 429 | * Truncating the directory index table is not guaranteed. It |
@@ -503,8 +503,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) | |||
503 | if (rc == -EIO) | 503 | if (rc == -EIO) |
504 | txAbort(tid, 1); /* Marks FS Dirty */ | 504 | txAbort(tid, 1); /* Marks FS Dirty */ |
505 | txEnd(tid); | 505 | txEnd(tid); |
506 | mutex_unlock(&JFS_IP(dip)->commit_mutex); | ||
507 | mutex_unlock(&JFS_IP(ip)->commit_mutex); | 506 | mutex_unlock(&JFS_IP(ip)->commit_mutex); |
507 | mutex_unlock(&JFS_IP(dip)->commit_mutex); | ||
508 | IWRITE_UNLOCK(ip); | 508 | IWRITE_UNLOCK(ip); |
509 | goto out1; | 509 | goto out1; |
510 | } | 510 | } |
@@ -527,8 +527,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) | |||
527 | if ((new_size = commitZeroLink(tid, ip)) < 0) { | 527 | if ((new_size = commitZeroLink(tid, ip)) < 0) { |
528 | txAbort(tid, 1); /* Marks FS Dirty */ | 528 | txAbort(tid, 1); /* Marks FS Dirty */ |
529 | txEnd(tid); | 529 | txEnd(tid); |
530 | mutex_unlock(&JFS_IP(dip)->commit_mutex); | ||
531 | mutex_unlock(&JFS_IP(ip)->commit_mutex); | 530 | mutex_unlock(&JFS_IP(ip)->commit_mutex); |
531 | mutex_unlock(&JFS_IP(dip)->commit_mutex); | ||
532 | IWRITE_UNLOCK(ip); | 532 | IWRITE_UNLOCK(ip); |
533 | rc = new_size; | 533 | rc = new_size; |
534 | goto out1; | 534 | goto out1; |
@@ -556,9 +556,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) | |||
556 | 556 | ||
557 | txEnd(tid); | 557 | txEnd(tid); |
558 | 558 | ||
559 | mutex_unlock(&JFS_IP(dip)->commit_mutex); | ||
560 | mutex_unlock(&JFS_IP(ip)->commit_mutex); | 559 | mutex_unlock(&JFS_IP(ip)->commit_mutex); |
561 | 560 | mutex_unlock(&JFS_IP(dip)->commit_mutex); | |
562 | 561 | ||
563 | while (new_size && (rc == 0)) { | 562 | while (new_size && (rc == 0)) { |
564 | tid = txBegin(dip->i_sb, 0); | 563 | tid = txBegin(dip->i_sb, 0); |
@@ -847,8 +846,8 @@ static int jfs_link(struct dentry *old_dentry, | |||
847 | out: | 846 | out: |
848 | txEnd(tid); | 847 | txEnd(tid); |
849 | 848 | ||
850 | mutex_unlock(&JFS_IP(dir)->commit_mutex); | ||
851 | mutex_unlock(&JFS_IP(ip)->commit_mutex); | 849 | mutex_unlock(&JFS_IP(ip)->commit_mutex); |
850 | mutex_unlock(&JFS_IP(dir)->commit_mutex); | ||
852 | 851 | ||
853 | jfs_info("jfs_link: rc:%d", rc); | 852 | jfs_info("jfs_link: rc:%d", rc); |
854 | return rc; | 853 | return rc; |
@@ -1037,8 +1036,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, | |||
1037 | 1036 | ||
1038 | out3: | 1037 | out3: |
1039 | txEnd(tid); | 1038 | txEnd(tid); |
1040 | mutex_unlock(&JFS_IP(dip)->commit_mutex); | ||
1041 | mutex_unlock(&JFS_IP(ip)->commit_mutex); | 1039 | mutex_unlock(&JFS_IP(ip)->commit_mutex); |
1040 | mutex_unlock(&JFS_IP(dip)->commit_mutex); | ||
1042 | if (rc) { | 1041 | if (rc) { |
1043 | free_ea_wmap(ip); | 1042 | free_ea_wmap(ip); |
1044 | ip->i_nlink = 0; | 1043 | ip->i_nlink = 0; |
@@ -1160,10 +1159,11 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1160 | if (S_ISDIR(new_ip->i_mode)) { | 1159 | if (S_ISDIR(new_ip->i_mode)) { |
1161 | new_ip->i_nlink--; | 1160 | new_ip->i_nlink--; |
1162 | if (new_ip->i_nlink) { | 1161 | if (new_ip->i_nlink) { |
1163 | mutex_unlock(&JFS_IP(new_dir)->commit_mutex); | 1162 | mutex_unlock(&JFS_IP(new_ip)->commit_mutex); |
1164 | mutex_unlock(&JFS_IP(old_ip)->commit_mutex); | ||
1165 | if (old_dir != new_dir) | 1163 | if (old_dir != new_dir) |
1166 | mutex_unlock(&JFS_IP(old_dir)->commit_mutex); | 1164 | mutex_unlock(&JFS_IP(old_dir)->commit_mutex); |
1165 | mutex_unlock(&JFS_IP(old_ip)->commit_mutex); | ||
1166 | mutex_unlock(&JFS_IP(new_dir)->commit_mutex); | ||
1167 | if (!S_ISDIR(old_ip->i_mode) && new_ip) | 1167 | if (!S_ISDIR(old_ip->i_mode) && new_ip) |
1168 | IWRITE_UNLOCK(new_ip); | 1168 | IWRITE_UNLOCK(new_ip); |
1169 | jfs_error(new_ip->i_sb, | 1169 | jfs_error(new_ip->i_sb, |
@@ -1281,13 +1281,12 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1281 | 1281 | ||
1282 | out4: | 1282 | out4: |
1283 | txEnd(tid); | 1283 | txEnd(tid); |
1284 | |||
1285 | mutex_unlock(&JFS_IP(new_dir)->commit_mutex); | ||
1286 | mutex_unlock(&JFS_IP(old_ip)->commit_mutex); | ||
1287 | if (old_dir != new_dir) | ||
1288 | mutex_unlock(&JFS_IP(old_dir)->commit_mutex); | ||
1289 | if (new_ip) | 1284 | if (new_ip) |
1290 | mutex_unlock(&JFS_IP(new_ip)->commit_mutex); | 1285 | mutex_unlock(&JFS_IP(new_ip)->commit_mutex); |
1286 | if (old_dir != new_dir) | ||
1287 | mutex_unlock(&JFS_IP(old_dir)->commit_mutex); | ||
1288 | mutex_unlock(&JFS_IP(old_ip)->commit_mutex); | ||
1289 | mutex_unlock(&JFS_IP(new_dir)->commit_mutex); | ||
1291 | 1290 | ||
1292 | while (new_size && (rc == 0)) { | 1291 | while (new_size && (rc == 0)) { |
1293 | tid = txBegin(new_ip->i_sb, 0); | 1292 | tid = txBegin(new_ip->i_sb, 0); |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 4f6cfebc82db..143bcd1d5eaa 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/moduleparam.h> | 26 | #include <linux/moduleparam.h> |
27 | #include <linux/kthread.h> | 27 | #include <linux/kthread.h> |
28 | #include <linux/posix_acl.h> | 28 | #include <linux/posix_acl.h> |
29 | #include <linux/buffer_head.h> | ||
29 | #include <asm/uaccess.h> | 30 | #include <asm/uaccess.h> |
30 | #include <linux/seq_file.h> | 31 | #include <linux/seq_file.h> |
31 | 32 | ||
@@ -298,7 +299,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, | |||
298 | break; | 299 | break; |
299 | } | 300 | } |
300 | 301 | ||
301 | #if defined(CONFIG_QUOTA) | 302 | #ifdef CONFIG_QUOTA |
302 | case Opt_quota: | 303 | case Opt_quota: |
303 | case Opt_usrquota: | 304 | case Opt_usrquota: |
304 | *flag |= JFS_USRQUOTA; | 305 | *flag |= JFS_USRQUOTA; |
@@ -597,7 +598,7 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
597 | if (sbi->flag & JFS_NOINTEGRITY) | 598 | if (sbi->flag & JFS_NOINTEGRITY) |
598 | seq_puts(seq, ",nointegrity"); | 599 | seq_puts(seq, ",nointegrity"); |
599 | 600 | ||
600 | #if defined(CONFIG_QUOTA) | 601 | #ifdef CONFIG_QUOTA |
601 | if (sbi->flag & JFS_USRQUOTA) | 602 | if (sbi->flag & JFS_USRQUOTA) |
602 | seq_puts(seq, ",usrquota"); | 603 | seq_puts(seq, ",usrquota"); |
603 | 604 | ||
@@ -608,6 +609,113 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
608 | return 0; | 609 | return 0; |
609 | } | 610 | } |
610 | 611 | ||
612 | #ifdef CONFIG_QUOTA | ||
613 | |||
614 | /* Read data from quotafile - avoid pagecache and such because we cannot afford | ||
615 | * acquiring the locks... As quota files are never truncated and quota code | ||
616 | * itself serializes the operations (and noone else should touch the files) | ||
617 | * we don't have to be afraid of races */ | ||
618 | static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data, | ||
619 | size_t len, loff_t off) | ||
620 | { | ||
621 | struct inode *inode = sb_dqopt(sb)->files[type]; | ||
622 | sector_t blk = off >> sb->s_blocksize_bits; | ||
623 | int err = 0; | ||
624 | int offset = off & (sb->s_blocksize - 1); | ||
625 | int tocopy; | ||
626 | size_t toread; | ||
627 | struct buffer_head tmp_bh; | ||
628 | struct buffer_head *bh; | ||
629 | loff_t i_size = i_size_read(inode); | ||
630 | |||
631 | if (off > i_size) | ||
632 | return 0; | ||
633 | if (off+len > i_size) | ||
634 | len = i_size-off; | ||
635 | toread = len; | ||
636 | while (toread > 0) { | ||
637 | tocopy = sb->s_blocksize - offset < toread ? | ||
638 | sb->s_blocksize - offset : toread; | ||
639 | |||
640 | tmp_bh.b_state = 0; | ||
641 | tmp_bh.b_size = 1 << inode->i_blkbits; | ||
642 | err = jfs_get_block(inode, blk, &tmp_bh, 0); | ||
643 | if (err) | ||
644 | return err; | ||
645 | if (!buffer_mapped(&tmp_bh)) /* A hole? */ | ||
646 | memset(data, 0, tocopy); | ||
647 | else { | ||
648 | bh = sb_bread(sb, tmp_bh.b_blocknr); | ||
649 | if (!bh) | ||
650 | return -EIO; | ||
651 | memcpy(data, bh->b_data+offset, tocopy); | ||
652 | brelse(bh); | ||
653 | } | ||
654 | offset = 0; | ||
655 | toread -= tocopy; | ||
656 | data += tocopy; | ||
657 | blk++; | ||
658 | } | ||
659 | return len; | ||
660 | } | ||
661 | |||
662 | /* Write to quotafile */ | ||
663 | static ssize_t jfs_quota_write(struct super_block *sb, int type, | ||
664 | const char *data, size_t len, loff_t off) | ||
665 | { | ||
666 | struct inode *inode = sb_dqopt(sb)->files[type]; | ||
667 | sector_t blk = off >> sb->s_blocksize_bits; | ||
668 | int err = 0; | ||
669 | int offset = off & (sb->s_blocksize - 1); | ||
670 | int tocopy; | ||
671 | size_t towrite = len; | ||
672 | struct buffer_head tmp_bh; | ||
673 | struct buffer_head *bh; | ||
674 | |||
675 | mutex_lock(&inode->i_mutex); | ||
676 | while (towrite > 0) { | ||
677 | tocopy = sb->s_blocksize - offset < towrite ? | ||
678 | sb->s_blocksize - offset : towrite; | ||
679 | |||
680 | tmp_bh.b_state = 0; | ||
681 | tmp_bh.b_size = 1 << inode->i_blkbits; | ||
682 | err = jfs_get_block(inode, blk, &tmp_bh, 1); | ||
683 | if (err) | ||
684 | goto out; | ||
685 | if (offset || tocopy != sb->s_blocksize) | ||
686 | bh = sb_bread(sb, tmp_bh.b_blocknr); | ||
687 | else | ||
688 | bh = sb_getblk(sb, tmp_bh.b_blocknr); | ||
689 | if (!bh) { | ||
690 | err = -EIO; | ||
691 | goto out; | ||
692 | } | ||
693 | lock_buffer(bh); | ||
694 | memcpy(bh->b_data+offset, data, tocopy); | ||
695 | flush_dcache_page(bh->b_page); | ||
696 | set_buffer_uptodate(bh); | ||
697 | mark_buffer_dirty(bh); | ||
698 | unlock_buffer(bh); | ||
699 | brelse(bh); | ||
700 | offset = 0; | ||
701 | towrite -= tocopy; | ||
702 | data += tocopy; | ||
703 | blk++; | ||
704 | } | ||
705 | out: | ||
706 | if (len == towrite) | ||
707 | return err; | ||
708 | if (inode->i_size < off+len-towrite) | ||
709 | i_size_write(inode, off+len-towrite); | ||
710 | inode->i_version++; | ||
711 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
712 | mark_inode_dirty(inode); | ||
713 | mutex_unlock(&inode->i_mutex); | ||
714 | return len - towrite; | ||
715 | } | ||
716 | |||
717 | #endif | ||
718 | |||
611 | static struct super_operations jfs_super_operations = { | 719 | static struct super_operations jfs_super_operations = { |
612 | .alloc_inode = jfs_alloc_inode, | 720 | .alloc_inode = jfs_alloc_inode, |
613 | .destroy_inode = jfs_destroy_inode, | 721 | .destroy_inode = jfs_destroy_inode, |
@@ -621,7 +729,11 @@ static struct super_operations jfs_super_operations = { | |||
621 | .unlockfs = jfs_unlockfs, | 729 | .unlockfs = jfs_unlockfs, |
622 | .statfs = jfs_statfs, | 730 | .statfs = jfs_statfs, |
623 | .remount_fs = jfs_remount, | 731 | .remount_fs = jfs_remount, |
624 | .show_options = jfs_show_options | 732 | .show_options = jfs_show_options, |
733 | #ifdef CONFIG_QUOTA | ||
734 | .quota_read = jfs_quota_read, | ||
735 | .quota_write = jfs_quota_write, | ||
736 | #endif | ||
625 | }; | 737 | }; |
626 | 738 | ||
627 | static struct export_operations jfs_export_operations = { | 739 | static struct export_operations jfs_export_operations = { |
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 5980c45998cc..89ba0df14c22 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c | |||
@@ -454,7 +454,7 @@ static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *ho | |||
454 | fl->fl_ops = &nlmclnt_lock_ops; | 454 | fl->fl_ops = &nlmclnt_lock_ops; |
455 | } | 455 | } |
456 | 456 | ||
457 | static void do_vfs_lock(struct file_lock *fl) | 457 | static int do_vfs_lock(struct file_lock *fl) |
458 | { | 458 | { |
459 | int res = 0; | 459 | int res = 0; |
460 | switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { | 460 | switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { |
@@ -467,9 +467,7 @@ static void do_vfs_lock(struct file_lock *fl) | |||
467 | default: | 467 | default: |
468 | BUG(); | 468 | BUG(); |
469 | } | 469 | } |
470 | if (res < 0) | 470 | return res; |
471 | printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", | ||
472 | __FUNCTION__); | ||
473 | } | 471 | } |
474 | 472 | ||
475 | /* | 473 | /* |
@@ -498,6 +496,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) | |||
498 | struct nlm_host *host = req->a_host; | 496 | struct nlm_host *host = req->a_host; |
499 | struct nlm_res *resp = &req->a_res; | 497 | struct nlm_res *resp = &req->a_res; |
500 | struct nlm_wait *block = NULL; | 498 | struct nlm_wait *block = NULL; |
499 | unsigned char fl_flags = fl->fl_flags; | ||
501 | int status = -ENOLCK; | 500 | int status = -ENOLCK; |
502 | 501 | ||
503 | if (!host->h_monitored && nsm_monitor(host) < 0) { | 502 | if (!host->h_monitored && nsm_monitor(host) < 0) { |
@@ -505,6 +504,10 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) | |||
505 | host->h_name); | 504 | host->h_name); |
506 | goto out; | 505 | goto out; |
507 | } | 506 | } |
507 | fl->fl_flags |= FL_ACCESS; | ||
508 | status = do_vfs_lock(fl); | ||
509 | if (status < 0) | ||
510 | goto out; | ||
508 | 511 | ||
509 | block = nlmclnt_prepare_block(host, fl); | 512 | block = nlmclnt_prepare_block(host, fl); |
510 | again: | 513 | again: |
@@ -539,9 +542,10 @@ again: | |||
539 | up_read(&host->h_rwsem); | 542 | up_read(&host->h_rwsem); |
540 | goto again; | 543 | goto again; |
541 | } | 544 | } |
542 | fl->fl_flags |= FL_SLEEP; | ||
543 | /* Ensure the resulting lock will get added to granted list */ | 545 | /* Ensure the resulting lock will get added to granted list */ |
544 | do_vfs_lock(fl); | 546 | fl->fl_flags = fl_flags | FL_SLEEP; |
547 | if (do_vfs_lock(fl) < 0) | ||
548 | printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); | ||
545 | up_read(&host->h_rwsem); | 549 | up_read(&host->h_rwsem); |
546 | } | 550 | } |
547 | status = nlm_stat_to_errno(resp->status); | 551 | status = nlm_stat_to_errno(resp->status); |
@@ -552,6 +556,7 @@ out_unblock: | |||
552 | nlmclnt_cancel(host, req->a_args.block, fl); | 556 | nlmclnt_cancel(host, req->a_args.block, fl); |
553 | out: | 557 | out: |
554 | nlm_release_call(req); | 558 | nlm_release_call(req); |
559 | fl->fl_flags = fl_flags; | ||
555 | return status; | 560 | return status; |
556 | } | 561 | } |
557 | 562 | ||
@@ -606,15 +611,19 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) | |||
606 | { | 611 | { |
607 | struct nlm_host *host = req->a_host; | 612 | struct nlm_host *host = req->a_host; |
608 | struct nlm_res *resp = &req->a_res; | 613 | struct nlm_res *resp = &req->a_res; |
609 | int status; | 614 | int status = 0; |
610 | 615 | ||
611 | /* | 616 | /* |
612 | * Note: the server is supposed to either grant us the unlock | 617 | * Note: the server is supposed to either grant us the unlock |
613 | * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either | 618 | * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either |
614 | * case, we want to unlock. | 619 | * case, we want to unlock. |
615 | */ | 620 | */ |
621 | fl->fl_flags |= FL_EXISTS; | ||
616 | down_read(&host->h_rwsem); | 622 | down_read(&host->h_rwsem); |
617 | do_vfs_lock(fl); | 623 | if (do_vfs_lock(fl) == -ENOENT) { |
624 | up_read(&host->h_rwsem); | ||
625 | goto out; | ||
626 | } | ||
618 | up_read(&host->h_rwsem); | 627 | up_read(&host->h_rwsem); |
619 | 628 | ||
620 | if (req->a_flags & RPC_TASK_ASYNC) | 629 | if (req->a_flags & RPC_TASK_ASYNC) |
@@ -624,7 +633,6 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) | |||
624 | if (status < 0) | 633 | if (status < 0) |
625 | goto out; | 634 | goto out; |
626 | 635 | ||
627 | status = 0; | ||
628 | if (resp->status == NLM_LCK_GRANTED) | 636 | if (resp->status == NLM_LCK_GRANTED) |
629 | goto out; | 637 | goto out; |
630 | 638 | ||
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index baf5ae513481..c9d419703cf3 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
@@ -638,9 +638,6 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) | |||
638 | if (task->tk_status < 0) { | 638 | if (task->tk_status < 0) { |
639 | /* RPC error: Re-insert for retransmission */ | 639 | /* RPC error: Re-insert for retransmission */ |
640 | timeout = 10 * HZ; | 640 | timeout = 10 * HZ; |
641 | } else if (block->b_done) { | ||
642 | /* Block already removed, kill it for real */ | ||
643 | timeout = 0; | ||
644 | } else { | 641 | } else { |
645 | /* Call was successful, now wait for client callback */ | 642 | /* Call was successful, now wait for client callback */ |
646 | timeout = 60 * HZ; | 643 | timeout = 60 * HZ; |
@@ -709,13 +706,10 @@ nlmsvc_retry_blocked(void) | |||
709 | break; | 706 | break; |
710 | if (time_after(block->b_when,jiffies)) | 707 | if (time_after(block->b_when,jiffies)) |
711 | break; | 708 | break; |
712 | dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n", | 709 | dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", |
713 | block, block->b_when, block->b_done); | 710 | block, block->b_when); |
714 | kref_get(&block->b_count); | 711 | kref_get(&block->b_count); |
715 | if (block->b_done) | 712 | nlmsvc_grant_blocked(block); |
716 | nlmsvc_unlink_block(block); | ||
717 | else | ||
718 | nlmsvc_grant_blocked(block); | ||
719 | nlmsvc_release_block(block); | 713 | nlmsvc_release_block(block); |
720 | } | 714 | } |
721 | 715 | ||
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 2a4df9b3779a..01b4db9e5466 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c | |||
@@ -237,19 +237,22 @@ static int | |||
237 | nlm_traverse_files(struct nlm_host *host, int action) | 237 | nlm_traverse_files(struct nlm_host *host, int action) |
238 | { | 238 | { |
239 | struct nlm_file *file, **fp; | 239 | struct nlm_file *file, **fp; |
240 | int i; | 240 | int i, ret = 0; |
241 | 241 | ||
242 | mutex_lock(&nlm_file_mutex); | 242 | mutex_lock(&nlm_file_mutex); |
243 | for (i = 0; i < FILE_NRHASH; i++) { | 243 | for (i = 0; i < FILE_NRHASH; i++) { |
244 | fp = nlm_files + i; | 244 | fp = nlm_files + i; |
245 | while ((file = *fp) != NULL) { | 245 | while ((file = *fp) != NULL) { |
246 | file->f_count++; | ||
247 | mutex_unlock(&nlm_file_mutex); | ||
248 | |||
246 | /* Traverse locks, blocks and shares of this file | 249 | /* Traverse locks, blocks and shares of this file |
247 | * and update file->f_locks count */ | 250 | * and update file->f_locks count */ |
248 | if (nlm_inspect_file(host, file, action)) { | 251 | if (nlm_inspect_file(host, file, action)) |
249 | mutex_unlock(&nlm_file_mutex); | 252 | ret = 1; |
250 | return 1; | ||
251 | } | ||
252 | 253 | ||
254 | mutex_lock(&nlm_file_mutex); | ||
255 | file->f_count--; | ||
253 | /* No more references to this file. Let go of it. */ | 256 | /* No more references to this file. Let go of it. */ |
254 | if (!file->f_blocks && !file->f_locks | 257 | if (!file->f_blocks && !file->f_locks |
255 | && !file->f_shares && !file->f_count) { | 258 | && !file->f_shares && !file->f_count) { |
@@ -262,7 +265,7 @@ nlm_traverse_files(struct nlm_host *host, int action) | |||
262 | } | 265 | } |
263 | } | 266 | } |
264 | mutex_unlock(&nlm_file_mutex); | 267 | mutex_unlock(&nlm_file_mutex); |
265 | return 0; | 268 | return ret; |
266 | } | 269 | } |
267 | 270 | ||
268 | /* | 271 | /* |
diff --git a/fs/locks.c b/fs/locks.c index 1ad29c9b6252..d7c53392cac1 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -725,6 +725,10 @@ next_task: | |||
725 | /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks | 725 | /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks |
726 | * at the head of the list, but that's secret knowledge known only to | 726 | * at the head of the list, but that's secret knowledge known only to |
727 | * flock_lock_file and posix_lock_file. | 727 | * flock_lock_file and posix_lock_file. |
728 | * | ||
729 | * Note that if called with an FL_EXISTS argument, the caller may determine | ||
730 | * whether or not a lock was successfully freed by testing the return | ||
731 | * value for -ENOENT. | ||
728 | */ | 732 | */ |
729 | static int flock_lock_file(struct file *filp, struct file_lock *request) | 733 | static int flock_lock_file(struct file *filp, struct file_lock *request) |
730 | { | 734 | { |
@@ -735,6 +739,8 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) | |||
735 | int found = 0; | 739 | int found = 0; |
736 | 740 | ||
737 | lock_kernel(); | 741 | lock_kernel(); |
742 | if (request->fl_flags & FL_ACCESS) | ||
743 | goto find_conflict; | ||
738 | for_each_lock(inode, before) { | 744 | for_each_lock(inode, before) { |
739 | struct file_lock *fl = *before; | 745 | struct file_lock *fl = *before; |
740 | if (IS_POSIX(fl)) | 746 | if (IS_POSIX(fl)) |
@@ -750,8 +756,11 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) | |||
750 | break; | 756 | break; |
751 | } | 757 | } |
752 | 758 | ||
753 | if (request->fl_type == F_UNLCK) | 759 | if (request->fl_type == F_UNLCK) { |
760 | if ((request->fl_flags & FL_EXISTS) && !found) | ||
761 | error = -ENOENT; | ||
754 | goto out; | 762 | goto out; |
763 | } | ||
755 | 764 | ||
756 | error = -ENOMEM; | 765 | error = -ENOMEM; |
757 | new_fl = locks_alloc_lock(); | 766 | new_fl = locks_alloc_lock(); |
@@ -764,6 +773,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) | |||
764 | if (found) | 773 | if (found) |
765 | cond_resched(); | 774 | cond_resched(); |
766 | 775 | ||
776 | find_conflict: | ||
767 | for_each_lock(inode, before) { | 777 | for_each_lock(inode, before) { |
768 | struct file_lock *fl = *before; | 778 | struct file_lock *fl = *before; |
769 | if (IS_POSIX(fl)) | 779 | if (IS_POSIX(fl)) |
@@ -777,6 +787,8 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) | |||
777 | locks_insert_block(fl, request); | 787 | locks_insert_block(fl, request); |
778 | goto out; | 788 | goto out; |
779 | } | 789 | } |
790 | if (request->fl_flags & FL_ACCESS) | ||
791 | goto out; | ||
780 | locks_copy_lock(new_fl, request); | 792 | locks_copy_lock(new_fl, request); |
781 | locks_insert_lock(&inode->i_flock, new_fl); | 793 | locks_insert_lock(&inode->i_flock, new_fl); |
782 | new_fl = NULL; | 794 | new_fl = NULL; |
@@ -948,8 +960,11 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request | |||
948 | 960 | ||
949 | error = 0; | 961 | error = 0; |
950 | if (!added) { | 962 | if (!added) { |
951 | if (request->fl_type == F_UNLCK) | 963 | if (request->fl_type == F_UNLCK) { |
964 | if (request->fl_flags & FL_EXISTS) | ||
965 | error = -ENOENT; | ||
952 | goto out; | 966 | goto out; |
967 | } | ||
953 | 968 | ||
954 | if (!new_fl) { | 969 | if (!new_fl) { |
955 | error = -ENOLCK; | 970 | error = -ENOLCK; |
@@ -996,6 +1011,10 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request | |||
996 | * Add a POSIX style lock to a file. | 1011 | * Add a POSIX style lock to a file. |
997 | * We merge adjacent & overlapping locks whenever possible. | 1012 | * We merge adjacent & overlapping locks whenever possible. |
998 | * POSIX locks are sorted by owner task, then by starting address | 1013 | * POSIX locks are sorted by owner task, then by starting address |
1014 | * | ||
1015 | * Note that if called with an FL_EXISTS argument, the caller may determine | ||
1016 | * whether or not a lock was successfully freed by testing the return | ||
1017 | * value for -ENOENT. | ||
999 | */ | 1018 | */ |
1000 | int posix_lock_file(struct file *filp, struct file_lock *fl) | 1019 | int posix_lock_file(struct file *filp, struct file_lock *fl) |
1001 | { | 1020 | { |
@@ -1402,8 +1421,9 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp) | |||
1402 | if (!leases_enable) | 1421 | if (!leases_enable) |
1403 | goto out; | 1422 | goto out; |
1404 | 1423 | ||
1405 | error = lease_alloc(filp, arg, &fl); | 1424 | error = -ENOMEM; |
1406 | if (error) | 1425 | fl = locks_alloc_lock(); |
1426 | if (fl == NULL) | ||
1407 | goto out; | 1427 | goto out; |
1408 | 1428 | ||
1409 | locks_copy_lock(fl, lease); | 1429 | locks_copy_lock(fl, lease); |
@@ -1411,6 +1431,7 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp) | |||
1411 | locks_insert_lock(before, fl); | 1431 | locks_insert_lock(before, fl); |
1412 | 1432 | ||
1413 | *flp = fl; | 1433 | *flp = fl; |
1434 | error = 0; | ||
1414 | out: | 1435 | out: |
1415 | return error; | 1436 | return error; |
1416 | } | 1437 | } |
diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 9ea91c5eeb7b..330ff9fc7cf0 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c | |||
@@ -204,6 +204,8 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) | |||
204 | /* | 204 | /* |
205 | * Allocate the buffer map to keep the superblock small. | 205 | * Allocate the buffer map to keep the superblock small. |
206 | */ | 206 | */ |
207 | if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0) | ||
208 | goto out_illegal_sb; | ||
207 | i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh); | 209 | i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh); |
208 | map = kmalloc(i, GFP_KERNEL); | 210 | map = kmalloc(i, GFP_KERNEL); |
209 | if (!map) | 211 | if (!map) |
@@ -263,7 +265,7 @@ out_no_root: | |||
263 | 265 | ||
264 | out_no_bitmap: | 266 | out_no_bitmap: |
265 | printk("MINIX-fs: bad superblock or unable to read bitmaps\n"); | 267 | printk("MINIX-fs: bad superblock or unable to read bitmaps\n"); |
266 | out_freemap: | 268 | out_freemap: |
267 | for (i = 0; i < sbi->s_imap_blocks; i++) | 269 | for (i = 0; i < sbi->s_imap_blocks; i++) |
268 | brelse(sbi->s_imap[i]); | 270 | brelse(sbi->s_imap[i]); |
269 | for (i = 0; i < sbi->s_zmap_blocks; i++) | 271 | for (i = 0; i < sbi->s_zmap_blocks; i++) |
@@ -276,11 +278,16 @@ out_no_map: | |||
276 | printk("MINIX-fs: can't allocate map\n"); | 278 | printk("MINIX-fs: can't allocate map\n"); |
277 | goto out_release; | 279 | goto out_release; |
278 | 280 | ||
281 | out_illegal_sb: | ||
282 | if (!silent) | ||
283 | printk("MINIX-fs: bad superblock\n"); | ||
284 | goto out_release; | ||
285 | |||
279 | out_no_fs: | 286 | out_no_fs: |
280 | if (!silent) | 287 | if (!silent) |
281 | printk("VFS: Can't find a Minix or Minix V2 filesystem " | 288 | printk("VFS: Can't find a Minix or Minix V2 filesystem " |
282 | "on device %s\n", s->s_id); | 289 | "on device %s\n", s->s_id); |
283 | out_release: | 290 | out_release: |
284 | brelse(bh); | 291 | brelse(bh); |
285 | goto out; | 292 | goto out; |
286 | 293 | ||
@@ -290,7 +297,7 @@ out_bad_hblock: | |||
290 | 297 | ||
291 | out_bad_sb: | 298 | out_bad_sb: |
292 | printk("MINIX-fs: unable to read superblock\n"); | 299 | printk("MINIX-fs: unable to read superblock\n"); |
293 | out: | 300 | out: |
294 | s->s_fs_info = NULL; | 301 | s->s_fs_info = NULL; |
295 | kfree(sbi); | 302 | kfree(sbi); |
296 | return -EINVAL; | 303 | return -EINVAL; |
diff --git a/fs/namei.c b/fs/namei.c index c784e8bb57a3..432d6bc6fab0 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -159,7 +159,7 @@ char * getname(const char __user * filename) | |||
159 | #ifdef CONFIG_AUDITSYSCALL | 159 | #ifdef CONFIG_AUDITSYSCALL |
160 | void putname(const char *name) | 160 | void putname(const char *name) |
161 | { | 161 | { |
162 | if (unlikely(current->audit_context)) | 162 | if (unlikely(!audit_dummy_context())) |
163 | audit_putname(name); | 163 | audit_putname(name); |
164 | else | 164 | else |
165 | __putname(name); | 165 | __putname(name); |
@@ -227,10 +227,10 @@ int generic_permission(struct inode *inode, int mask, | |||
227 | 227 | ||
228 | int permission(struct inode *inode, int mask, struct nameidata *nd) | 228 | int permission(struct inode *inode, int mask, struct nameidata *nd) |
229 | { | 229 | { |
230 | umode_t mode = inode->i_mode; | ||
230 | int retval, submask; | 231 | int retval, submask; |
231 | 232 | ||
232 | if (mask & MAY_WRITE) { | 233 | if (mask & MAY_WRITE) { |
233 | umode_t mode = inode->i_mode; | ||
234 | 234 | ||
235 | /* | 235 | /* |
236 | * Nobody gets write access to a read-only fs. | 236 | * Nobody gets write access to a read-only fs. |
@@ -247,6 +247,13 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) | |||
247 | } | 247 | } |
248 | 248 | ||
249 | 249 | ||
250 | /* | ||
251 | * MAY_EXEC on regular files requires special handling: We override | ||
252 | * filesystem execute permissions if the mode bits aren't set. | ||
253 | */ | ||
254 | if ((mask & MAY_EXEC) && S_ISREG(mode) && !(mode & S_IXUGO)) | ||
255 | return -EACCES; | ||
256 | |||
250 | /* Ordinary permission routines do not understand MAY_APPEND. */ | 257 | /* Ordinary permission routines do not understand MAY_APPEND. */ |
251 | submask = mask & ~MAY_APPEND; | 258 | submask = mask & ~MAY_APPEND; |
252 | if (inode->i_op && inode->i_op->permission) | 259 | if (inode->i_op && inode->i_op->permission) |
@@ -1125,7 +1132,7 @@ static int fastcall do_path_lookup(int dfd, const char *name, | |||
1125 | retval = link_path_walk(name, nd); | 1132 | retval = link_path_walk(name, nd); |
1126 | out: | 1133 | out: |
1127 | if (likely(retval == 0)) { | 1134 | if (likely(retval == 0)) { |
1128 | if (unlikely(current->audit_context && nd && nd->dentry && | 1135 | if (unlikely(!audit_dummy_context() && nd && nd->dentry && |
1129 | nd->dentry->d_inode)) | 1136 | nd->dentry->d_inode)) |
1130 | audit_inode(name, nd->dentry->d_inode); | 1137 | audit_inode(name, nd->dentry->d_inode); |
1131 | } | 1138 | } |
@@ -1357,7 +1364,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir) | |||
1357 | return -ENOENT; | 1364 | return -ENOENT; |
1358 | 1365 | ||
1359 | BUG_ON(victim->d_parent->d_inode != dir); | 1366 | BUG_ON(victim->d_parent->d_inode != dir); |
1360 | audit_inode_child(victim->d_name.name, victim->d_inode, dir->i_ino); | 1367 | audit_inode_child(victim->d_name.name, victim->d_inode, dir); |
1361 | 1368 | ||
1362 | error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); | 1369 | error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); |
1363 | if (error) | 1370 | if (error) |
@@ -1423,7 +1430,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) | |||
1423 | struct dentry *p; | 1430 | struct dentry *p; |
1424 | 1431 | ||
1425 | if (p1 == p2) { | 1432 | if (p1 == p2) { |
1426 | mutex_lock(&p1->d_inode->i_mutex); | 1433 | mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); |
1427 | return NULL; | 1434 | return NULL; |
1428 | } | 1435 | } |
1429 | 1436 | ||
@@ -1431,22 +1438,22 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) | |||
1431 | 1438 | ||
1432 | for (p = p1; p->d_parent != p; p = p->d_parent) { | 1439 | for (p = p1; p->d_parent != p; p = p->d_parent) { |
1433 | if (p->d_parent == p2) { | 1440 | if (p->d_parent == p2) { |
1434 | mutex_lock(&p2->d_inode->i_mutex); | 1441 | mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); |
1435 | mutex_lock(&p1->d_inode->i_mutex); | 1442 | mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); |
1436 | return p; | 1443 | return p; |
1437 | } | 1444 | } |
1438 | } | 1445 | } |
1439 | 1446 | ||
1440 | for (p = p2; p->d_parent != p; p = p->d_parent) { | 1447 | for (p = p2; p->d_parent != p; p = p->d_parent) { |
1441 | if (p->d_parent == p1) { | 1448 | if (p->d_parent == p1) { |
1442 | mutex_lock(&p1->d_inode->i_mutex); | 1449 | mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); |
1443 | mutex_lock(&p2->d_inode->i_mutex); | 1450 | mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); |
1444 | return p; | 1451 | return p; |
1445 | } | 1452 | } |
1446 | } | 1453 | } |
1447 | 1454 | ||
1448 | mutex_lock(&p1->d_inode->i_mutex); | 1455 | mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); |
1449 | mutex_lock(&p2->d_inode->i_mutex); | 1456 | mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); |
1450 | return NULL; | 1457 | return NULL; |
1451 | } | 1458 | } |
1452 | 1459 | ||
@@ -1659,6 +1666,7 @@ do_last: | |||
1659 | * It already exists. | 1666 | * It already exists. |
1660 | */ | 1667 | */ |
1661 | mutex_unlock(&dir->d_inode->i_mutex); | 1668 | mutex_unlock(&dir->d_inode->i_mutex); |
1669 | audit_inode_update(path.dentry->d_inode); | ||
1662 | 1670 | ||
1663 | error = -EEXIST; | 1671 | error = -EEXIST; |
1664 | if (flag & O_EXCL) | 1672 | if (flag & O_EXCL) |
@@ -1669,6 +1677,7 @@ do_last: | |||
1669 | if (flag & O_NOFOLLOW) | 1677 | if (flag & O_NOFOLLOW) |
1670 | goto exit_dput; | 1678 | goto exit_dput; |
1671 | } | 1679 | } |
1680 | |||
1672 | error = -ENOENT; | 1681 | error = -ENOENT; |
1673 | if (!path.dentry->d_inode) | 1682 | if (!path.dentry->d_inode) |
1674 | goto exit_dput; | 1683 | goto exit_dput; |
@@ -1712,8 +1721,14 @@ do_link: | |||
1712 | if (error) | 1721 | if (error) |
1713 | goto exit_dput; | 1722 | goto exit_dput; |
1714 | error = __do_follow_link(&path, nd); | 1723 | error = __do_follow_link(&path, nd); |
1715 | if (error) | 1724 | if (error) { |
1725 | /* Does someone understand code flow here? Or it is only | ||
1726 | * me so stupid? Anathema to whoever designed this non-sense | ||
1727 | * with "intent.open". | ||
1728 | */ | ||
1729 | release_open_intent(nd); | ||
1716 | return error; | 1730 | return error; |
1731 | } | ||
1717 | nd->flags &= ~LOOKUP_PARENT; | 1732 | nd->flags &= ~LOOKUP_PARENT; |
1718 | if (nd->last_type == LAST_BIND) | 1733 | if (nd->last_type == LAST_BIND) |
1719 | goto ok; | 1734 | goto ok; |
@@ -1751,7 +1766,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir) | |||
1751 | { | 1766 | { |
1752 | struct dentry *dentry = ERR_PTR(-EEXIST); | 1767 | struct dentry *dentry = ERR_PTR(-EEXIST); |
1753 | 1768 | ||
1754 | mutex_lock(&nd->dentry->d_inode->i_mutex); | 1769 | mutex_lock_nested(&nd->dentry->d_inode->i_mutex, I_MUTEX_PARENT); |
1755 | /* | 1770 | /* |
1756 | * Yucky last component or no last component at all? | 1771 | * Yucky last component or no last component at all? |
1757 | * (foo/., foo/.., /////) | 1772 | * (foo/., foo/.., /////) |
@@ -1759,6 +1774,8 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir) | |||
1759 | if (nd->last_type != LAST_NORM) | 1774 | if (nd->last_type != LAST_NORM) |
1760 | goto fail; | 1775 | goto fail; |
1761 | nd->flags &= ~LOOKUP_PARENT; | 1776 | nd->flags &= ~LOOKUP_PARENT; |
1777 | nd->flags |= LOOKUP_CREATE; | ||
1778 | nd->intent.open.flags = O_EXCL; | ||
1762 | 1779 | ||
1763 | /* | 1780 | /* |
1764 | * Do the final lookup. | 1781 | * Do the final lookup. |
@@ -2008,7 +2025,7 @@ static long do_rmdir(int dfd, const char __user *pathname) | |||
2008 | error = -EBUSY; | 2025 | error = -EBUSY; |
2009 | goto exit1; | 2026 | goto exit1; |
2010 | } | 2027 | } |
2011 | mutex_lock(&nd.dentry->d_inode->i_mutex); | 2028 | mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT); |
2012 | dentry = lookup_hash(&nd); | 2029 | dentry = lookup_hash(&nd); |
2013 | error = PTR_ERR(dentry); | 2030 | error = PTR_ERR(dentry); |
2014 | if (!IS_ERR(dentry)) { | 2031 | if (!IS_ERR(dentry)) { |
@@ -2082,7 +2099,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) | |||
2082 | error = -EISDIR; | 2099 | error = -EISDIR; |
2083 | if (nd.last_type != LAST_NORM) | 2100 | if (nd.last_type != LAST_NORM) |
2084 | goto exit1; | 2101 | goto exit1; |
2085 | mutex_lock(&nd.dentry->d_inode->i_mutex); | 2102 | mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT); |
2086 | dentry = lookup_hash(&nd); | 2103 | dentry = lookup_hash(&nd); |
2087 | error = PTR_ERR(dentry); | 2104 | error = PTR_ERR(dentry); |
2088 | if (!IS_ERR(dentry)) { | 2105 | if (!IS_ERR(dentry)) { |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3ddda6f7ecc2..e7ffb4deb3e5 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -690,7 +690,9 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) | |||
690 | goto out_force; | 690 | goto out_force; |
691 | /* This is an open(2) */ | 691 | /* This is an open(2) */ |
692 | if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 && | 692 | if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 && |
693 | !(server->flags & NFS_MOUNT_NOCTO)) | 693 | !(server->flags & NFS_MOUNT_NOCTO) && |
694 | (S_ISREG(inode->i_mode) || | ||
695 | S_ISDIR(inode->i_mode))) | ||
694 | goto out_force; | 696 | goto out_force; |
695 | } | 697 | } |
696 | return nfs_revalidate_inode(server, inode); | 698 | return nfs_revalidate_inode(server, inode); |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4cdd1b499e35..76ca1cbc38f9 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -67,25 +67,19 @@ struct nfs_direct_req { | |||
67 | struct kref kref; /* release manager */ | 67 | struct kref kref; /* release manager */ |
68 | 68 | ||
69 | /* I/O parameters */ | 69 | /* I/O parameters */ |
70 | struct list_head list, /* nfs_read/write_data structs */ | ||
71 | rewrite_list; /* saved nfs_write_data structs */ | ||
72 | struct nfs_open_context *ctx; /* file open context info */ | 70 | struct nfs_open_context *ctx; /* file open context info */ |
73 | struct kiocb * iocb; /* controlling i/o request */ | 71 | struct kiocb * iocb; /* controlling i/o request */ |
74 | struct inode * inode; /* target file of i/o */ | 72 | struct inode * inode; /* target file of i/o */ |
75 | unsigned long user_addr; /* location of user's buffer */ | ||
76 | size_t user_count; /* total bytes to move */ | ||
77 | loff_t pos; /* starting offset in file */ | ||
78 | struct page ** pages; /* pages in our buffer */ | ||
79 | unsigned int npages; /* count of pages */ | ||
80 | 73 | ||
81 | /* completion state */ | 74 | /* completion state */ |
75 | atomic_t io_count; /* i/os we're waiting for */ | ||
82 | spinlock_t lock; /* protect completion state */ | 76 | spinlock_t lock; /* protect completion state */ |
83 | int outstanding; /* i/os we're waiting for */ | ||
84 | ssize_t count, /* bytes actually processed */ | 77 | ssize_t count, /* bytes actually processed */ |
85 | error; /* any reported error */ | 78 | error; /* any reported error */ |
86 | struct completion completion; /* wait for i/o completion */ | 79 | struct completion completion; /* wait for i/o completion */ |
87 | 80 | ||
88 | /* commit state */ | 81 | /* commit state */ |
82 | struct list_head rewrite_list; /* saved nfs_write_data structs */ | ||
89 | struct nfs_write_data * commit_data; /* special write_data for commits */ | 83 | struct nfs_write_data * commit_data; /* special write_data for commits */ |
90 | int flags; | 84 | int flags; |
91 | #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ | 85 | #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ |
@@ -93,8 +87,18 @@ struct nfs_direct_req { | |||
93 | struct nfs_writeverf verf; /* unstable write verifier */ | 87 | struct nfs_writeverf verf; /* unstable write verifier */ |
94 | }; | 88 | }; |
95 | 89 | ||
96 | static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync); | ||
97 | static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); | 90 | static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); |
91 | static const struct rpc_call_ops nfs_write_direct_ops; | ||
92 | |||
93 | static inline void get_dreq(struct nfs_direct_req *dreq) | ||
94 | { | ||
95 | atomic_inc(&dreq->io_count); | ||
96 | } | ||
97 | |||
98 | static inline int put_dreq(struct nfs_direct_req *dreq) | ||
99 | { | ||
100 | return atomic_dec_and_test(&dreq->io_count); | ||
101 | } | ||
98 | 102 | ||
99 | /** | 103 | /** |
100 | * nfs_direct_IO - NFS address space operation for direct I/O | 104 | * nfs_direct_IO - NFS address space operation for direct I/O |
@@ -118,50 +122,21 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_ | |||
118 | return -EINVAL; | 122 | return -EINVAL; |
119 | } | 123 | } |
120 | 124 | ||
121 | static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty) | 125 | static void nfs_direct_dirty_pages(struct page **pages, int npages) |
122 | { | 126 | { |
123 | int i; | 127 | int i; |
124 | for (i = 0; i < npages; i++) { | 128 | for (i = 0; i < npages; i++) { |
125 | struct page *page = pages[i]; | 129 | struct page *page = pages[i]; |
126 | if (do_dirty && !PageCompound(page)) | 130 | if (!PageCompound(page)) |
127 | set_page_dirty_lock(page); | 131 | set_page_dirty_lock(page); |
128 | page_cache_release(page); | ||
129 | } | 132 | } |
130 | kfree(pages); | ||
131 | } | 133 | } |
132 | 134 | ||
133 | static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages) | 135 | static void nfs_direct_release_pages(struct page **pages, int npages) |
134 | { | 136 | { |
135 | int result = -ENOMEM; | 137 | int i; |
136 | unsigned long page_count; | 138 | for (i = 0; i < npages; i++) |
137 | size_t array_size; | 139 | page_cache_release(pages[i]); |
138 | |||
139 | page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
140 | page_count -= user_addr >> PAGE_SHIFT; | ||
141 | |||
142 | array_size = (page_count * sizeof(struct page *)); | ||
143 | *pages = kmalloc(array_size, GFP_KERNEL); | ||
144 | if (*pages) { | ||
145 | down_read(¤t->mm->mmap_sem); | ||
146 | result = get_user_pages(current, current->mm, user_addr, | ||
147 | page_count, (rw == READ), 0, | ||
148 | *pages, NULL); | ||
149 | up_read(¤t->mm->mmap_sem); | ||
150 | if (result != page_count) { | ||
151 | /* | ||
152 | * If we got fewer pages than expected from | ||
153 | * get_user_pages(), the user buffer runs off the | ||
154 | * end of a mapping; return EFAULT. | ||
155 | */ | ||
156 | if (result >= 0) { | ||
157 | nfs_free_user_pages(*pages, result, 0); | ||
158 | result = -EFAULT; | ||
159 | } else | ||
160 | kfree(*pages); | ||
161 | *pages = NULL; | ||
162 | } | ||
163 | } | ||
164 | return result; | ||
165 | } | 140 | } |
166 | 141 | ||
167 | static inline struct nfs_direct_req *nfs_direct_req_alloc(void) | 142 | static inline struct nfs_direct_req *nfs_direct_req_alloc(void) |
@@ -173,13 +148,13 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) | |||
173 | return NULL; | 148 | return NULL; |
174 | 149 | ||
175 | kref_init(&dreq->kref); | 150 | kref_init(&dreq->kref); |
151 | kref_get(&dreq->kref); | ||
176 | init_completion(&dreq->completion); | 152 | init_completion(&dreq->completion); |
177 | INIT_LIST_HEAD(&dreq->list); | ||
178 | INIT_LIST_HEAD(&dreq->rewrite_list); | 153 | INIT_LIST_HEAD(&dreq->rewrite_list); |
179 | dreq->iocb = NULL; | 154 | dreq->iocb = NULL; |
180 | dreq->ctx = NULL; | 155 | dreq->ctx = NULL; |
181 | spin_lock_init(&dreq->lock); | 156 | spin_lock_init(&dreq->lock); |
182 | dreq->outstanding = 0; | 157 | atomic_set(&dreq->io_count, 0); |
183 | dreq->count = 0; | 158 | dreq->count = 0; |
184 | dreq->error = 0; | 159 | dreq->error = 0; |
185 | dreq->flags = 0; | 160 | dreq->flags = 0; |
@@ -220,18 +195,11 @@ out: | |||
220 | } | 195 | } |
221 | 196 | ||
222 | /* | 197 | /* |
223 | * We must hold a reference to all the pages in this direct read request | 198 | * Synchronous I/O uses a stack-allocated iocb. Thus we can't trust |
224 | * until the RPCs complete. This could be long *after* we are woken up in | 199 | * the iocb is still valid here if this is a synchronous request. |
225 | * nfs_direct_wait (for instance, if someone hits ^C on a slow server). | ||
226 | * | ||
227 | * In addition, synchronous I/O uses a stack-allocated iocb. Thus we | ||
228 | * can't trust the iocb is still valid here if this is a synchronous | ||
229 | * request. If the waiter is woken prematurely, the iocb is long gone. | ||
230 | */ | 200 | */ |
231 | static void nfs_direct_complete(struct nfs_direct_req *dreq) | 201 | static void nfs_direct_complete(struct nfs_direct_req *dreq) |
232 | { | 202 | { |
233 | nfs_free_user_pages(dreq->pages, dreq->npages, 1); | ||
234 | |||
235 | if (dreq->iocb) { | 203 | if (dreq->iocb) { |
236 | long res = (long) dreq->error; | 204 | long res = (long) dreq->error; |
237 | if (!res) | 205 | if (!res) |
@@ -244,48 +212,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) | |||
244 | } | 212 | } |
245 | 213 | ||
246 | /* | 214 | /* |
247 | * Note we also set the number of requests we have in the dreq when we are | 215 | * We must hold a reference to all the pages in this direct read request |
248 | * done. This prevents races with I/O completion so we will always wait | 216 | * until the RPCs complete. This could be long *after* we are woken up in |
249 | * until all requests have been dispatched and completed. | 217 | * nfs_direct_wait (for instance, if someone hits ^C on a slow server). |
250 | */ | 218 | */ |
251 | static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) | ||
252 | { | ||
253 | struct list_head *list; | ||
254 | struct nfs_direct_req *dreq; | ||
255 | unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
256 | |||
257 | dreq = nfs_direct_req_alloc(); | ||
258 | if (!dreq) | ||
259 | return NULL; | ||
260 | |||
261 | list = &dreq->list; | ||
262 | for(;;) { | ||
263 | struct nfs_read_data *data = nfs_readdata_alloc(rpages); | ||
264 | |||
265 | if (unlikely(!data)) { | ||
266 | while (!list_empty(list)) { | ||
267 | data = list_entry(list->next, | ||
268 | struct nfs_read_data, pages); | ||
269 | list_del(&data->pages); | ||
270 | nfs_readdata_free(data); | ||
271 | } | ||
272 | kref_put(&dreq->kref, nfs_direct_req_release); | ||
273 | return NULL; | ||
274 | } | ||
275 | |||
276 | INIT_LIST_HEAD(&data->pages); | ||
277 | list_add(&data->pages, list); | ||
278 | |||
279 | data->req = (struct nfs_page *) dreq; | ||
280 | dreq->outstanding++; | ||
281 | if (nbytes <= rsize) | ||
282 | break; | ||
283 | nbytes -= rsize; | ||
284 | } | ||
285 | kref_get(&dreq->kref); | ||
286 | return dreq; | ||
287 | } | ||
288 | |||
289 | static void nfs_direct_read_result(struct rpc_task *task, void *calldata) | 219 | static void nfs_direct_read_result(struct rpc_task *task, void *calldata) |
290 | { | 220 | { |
291 | struct nfs_read_data *data = calldata; | 221 | struct nfs_read_data *data = calldata; |
@@ -294,6 +224,9 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) | |||
294 | if (nfs_readpage_result(task, data) != 0) | 224 | if (nfs_readpage_result(task, data) != 0) |
295 | return; | 225 | return; |
296 | 226 | ||
227 | nfs_direct_dirty_pages(data->pagevec, data->npages); | ||
228 | nfs_direct_release_pages(data->pagevec, data->npages); | ||
229 | |||
297 | spin_lock(&dreq->lock); | 230 | spin_lock(&dreq->lock); |
298 | 231 | ||
299 | if (likely(task->tk_status >= 0)) | 232 | if (likely(task->tk_status >= 0)) |
@@ -301,13 +234,10 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) | |||
301 | else | 234 | else |
302 | dreq->error = task->tk_status; | 235 | dreq->error = task->tk_status; |
303 | 236 | ||
304 | if (--dreq->outstanding) { | ||
305 | spin_unlock(&dreq->lock); | ||
306 | return; | ||
307 | } | ||
308 | |||
309 | spin_unlock(&dreq->lock); | 237 | spin_unlock(&dreq->lock); |
310 | nfs_direct_complete(dreq); | 238 | |
239 | if (put_dreq(dreq)) | ||
240 | nfs_direct_complete(dreq); | ||
311 | } | 241 | } |
312 | 242 | ||
313 | static const struct rpc_call_ops nfs_read_direct_ops = { | 243 | static const struct rpc_call_ops nfs_read_direct_ops = { |
@@ -316,41 +246,56 @@ static const struct rpc_call_ops nfs_read_direct_ops = { | |||
316 | }; | 246 | }; |
317 | 247 | ||
318 | /* | 248 | /* |
319 | * For each nfs_read_data struct that was allocated on the list, dispatch | 249 | * For each rsize'd chunk of the user's buffer, dispatch an NFS READ |
320 | * an NFS READ operation | 250 | * operation. If nfs_readdata_alloc() or get_user_pages() fails, |
251 | * bail and stop sending more reads. Read length accounting is | ||
252 | * handled automatically by nfs_direct_read_result(). Otherwise, if | ||
253 | * no requests have been sent, just return an error. | ||
321 | */ | 254 | */ |
322 | static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) | 255 | static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) |
323 | { | 256 | { |
324 | struct nfs_open_context *ctx = dreq->ctx; | 257 | struct nfs_open_context *ctx = dreq->ctx; |
325 | struct inode *inode = ctx->dentry->d_inode; | 258 | struct inode *inode = ctx->dentry->d_inode; |
326 | struct list_head *list = &dreq->list; | ||
327 | struct page **pages = dreq->pages; | ||
328 | size_t count = dreq->user_count; | ||
329 | loff_t pos = dreq->pos; | ||
330 | size_t rsize = NFS_SERVER(inode)->rsize; | 259 | size_t rsize = NFS_SERVER(inode)->rsize; |
331 | unsigned int curpage, pgbase; | 260 | unsigned int pgbase; |
261 | int result; | ||
262 | ssize_t started = 0; | ||
263 | |||
264 | get_dreq(dreq); | ||
332 | 265 | ||
333 | curpage = 0; | ||
334 | pgbase = dreq->user_addr & ~PAGE_MASK; | ||
335 | do { | 266 | do { |
336 | struct nfs_read_data *data; | 267 | struct nfs_read_data *data; |
337 | size_t bytes; | 268 | size_t bytes; |
338 | 269 | ||
339 | bytes = rsize; | 270 | pgbase = user_addr & ~PAGE_MASK; |
340 | if (count < rsize) | 271 | bytes = min(rsize,count); |
341 | bytes = count; | ||
342 | 272 | ||
343 | BUG_ON(list_empty(list)); | 273 | result = -ENOMEM; |
344 | data = list_entry(list->next, struct nfs_read_data, pages); | 274 | data = nfs_readdata_alloc(pgbase + bytes); |
345 | list_del_init(&data->pages); | 275 | if (unlikely(!data)) |
276 | break; | ||
277 | |||
278 | down_read(¤t->mm->mmap_sem); | ||
279 | result = get_user_pages(current, current->mm, user_addr, | ||
280 | data->npages, 1, 0, data->pagevec, NULL); | ||
281 | up_read(¤t->mm->mmap_sem); | ||
282 | if (unlikely(result < data->npages)) { | ||
283 | if (result > 0) | ||
284 | nfs_direct_release_pages(data->pagevec, result); | ||
285 | nfs_readdata_release(data); | ||
286 | break; | ||
287 | } | ||
288 | |||
289 | get_dreq(dreq); | ||
346 | 290 | ||
291 | data->req = (struct nfs_page *) dreq; | ||
347 | data->inode = inode; | 292 | data->inode = inode; |
348 | data->cred = ctx->cred; | 293 | data->cred = ctx->cred; |
349 | data->args.fh = NFS_FH(inode); | 294 | data->args.fh = NFS_FH(inode); |
350 | data->args.context = ctx; | 295 | data->args.context = ctx; |
351 | data->args.offset = pos; | 296 | data->args.offset = pos; |
352 | data->args.pgbase = pgbase; | 297 | data->args.pgbase = pgbase; |
353 | data->args.pages = &pages[curpage]; | 298 | data->args.pages = data->pagevec; |
354 | data->args.count = bytes; | 299 | data->args.count = bytes; |
355 | data->res.fattr = &data->fattr; | 300 | data->res.fattr = &data->fattr; |
356 | data->res.eof = 0; | 301 | data->res.eof = 0; |
@@ -373,33 +318,37 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) | |||
373 | bytes, | 318 | bytes, |
374 | (unsigned long long)data->args.offset); | 319 | (unsigned long long)data->args.offset); |
375 | 320 | ||
321 | started += bytes; | ||
322 | user_addr += bytes; | ||
376 | pos += bytes; | 323 | pos += bytes; |
324 | /* FIXME: Remove this unnecessary math from final patch */ | ||
377 | pgbase += bytes; | 325 | pgbase += bytes; |
378 | curpage += pgbase >> PAGE_SHIFT; | ||
379 | pgbase &= ~PAGE_MASK; | 326 | pgbase &= ~PAGE_MASK; |
327 | BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); | ||
380 | 328 | ||
381 | count -= bytes; | 329 | count -= bytes; |
382 | } while (count != 0); | 330 | } while (count != 0); |
383 | BUG_ON(!list_empty(list)); | 331 | |
332 | if (put_dreq(dreq)) | ||
333 | nfs_direct_complete(dreq); | ||
334 | |||
335 | if (started) | ||
336 | return 0; | ||
337 | return result < 0 ? (ssize_t) result : -EFAULT; | ||
384 | } | 338 | } |
385 | 339 | ||
386 | static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages) | 340 | static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) |
387 | { | 341 | { |
388 | ssize_t result; | 342 | ssize_t result = 0; |
389 | sigset_t oldset; | 343 | sigset_t oldset; |
390 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 344 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
391 | struct rpc_clnt *clnt = NFS_CLIENT(inode); | 345 | struct rpc_clnt *clnt = NFS_CLIENT(inode); |
392 | struct nfs_direct_req *dreq; | 346 | struct nfs_direct_req *dreq; |
393 | 347 | ||
394 | dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize); | 348 | dreq = nfs_direct_req_alloc(); |
395 | if (!dreq) | 349 | if (!dreq) |
396 | return -ENOMEM; | 350 | return -ENOMEM; |
397 | 351 | ||
398 | dreq->user_addr = user_addr; | ||
399 | dreq->user_count = count; | ||
400 | dreq->pos = pos; | ||
401 | dreq->pages = pages; | ||
402 | dreq->npages = nr_pages; | ||
403 | dreq->inode = inode; | 352 | dreq->inode = inode; |
404 | dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); | 353 | dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); |
405 | if (!is_sync_kiocb(iocb)) | 354 | if (!is_sync_kiocb(iocb)) |
@@ -407,8 +356,9 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size | |||
407 | 356 | ||
408 | nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); | 357 | nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); |
409 | rpc_clnt_sigmask(clnt, &oldset); | 358 | rpc_clnt_sigmask(clnt, &oldset); |
410 | nfs_direct_read_schedule(dreq); | 359 | result = nfs_direct_read_schedule(dreq, user_addr, count, pos); |
411 | result = nfs_direct_wait(dreq); | 360 | if (!result) |
361 | result = nfs_direct_wait(dreq); | ||
412 | rpc_clnt_sigunmask(clnt, &oldset); | 362 | rpc_clnt_sigunmask(clnt, &oldset); |
413 | 363 | ||
414 | return result; | 364 | return result; |
@@ -416,10 +366,10 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size | |||
416 | 366 | ||
417 | static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) | 367 | static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) |
418 | { | 368 | { |
419 | list_splice_init(&dreq->rewrite_list, &dreq->list); | 369 | while (!list_empty(&dreq->rewrite_list)) { |
420 | while (!list_empty(&dreq->list)) { | 370 | struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); |
421 | struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages); | ||
422 | list_del(&data->pages); | 371 | list_del(&data->pages); |
372 | nfs_direct_release_pages(data->pagevec, data->npages); | ||
423 | nfs_writedata_release(data); | 373 | nfs_writedata_release(data); |
424 | } | 374 | } |
425 | } | 375 | } |
@@ -427,14 +377,51 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) | |||
427 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 377 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) |
428 | static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | 378 | static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) |
429 | { | 379 | { |
430 | struct list_head *pos; | 380 | struct inode *inode = dreq->inode; |
381 | struct list_head *p; | ||
382 | struct nfs_write_data *data; | ||
431 | 383 | ||
432 | list_splice_init(&dreq->rewrite_list, &dreq->list); | ||
433 | list_for_each(pos, &dreq->list) | ||
434 | dreq->outstanding++; | ||
435 | dreq->count = 0; | 384 | dreq->count = 0; |
385 | get_dreq(dreq); | ||
386 | |||
387 | list_for_each(p, &dreq->rewrite_list) { | ||
388 | data = list_entry(p, struct nfs_write_data, pages); | ||
389 | |||
390 | get_dreq(dreq); | ||
436 | 391 | ||
437 | nfs_direct_write_schedule(dreq, FLUSH_STABLE); | 392 | /* |
393 | * Reset data->res. | ||
394 | */ | ||
395 | nfs_fattr_init(&data->fattr); | ||
396 | data->res.count = data->args.count; | ||
397 | memset(&data->verf, 0, sizeof(data->verf)); | ||
398 | |||
399 | /* | ||
400 | * Reuse data->task; data->args should not have changed | ||
401 | * since the original request was sent. | ||
402 | */ | ||
403 | rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, | ||
404 | &nfs_write_direct_ops, data); | ||
405 | NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE); | ||
406 | |||
407 | data->task.tk_priority = RPC_PRIORITY_NORMAL; | ||
408 | data->task.tk_cookie = (unsigned long) inode; | ||
409 | |||
410 | /* | ||
411 | * We're called via an RPC callback, so BKL is already held. | ||
412 | */ | ||
413 | rpc_execute(&data->task); | ||
414 | |||
415 | dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", | ||
416 | data->task.tk_pid, | ||
417 | inode->i_sb->s_id, | ||
418 | (long long)NFS_FILEID(inode), | ||
419 | data->args.count, | ||
420 | (unsigned long long)data->args.offset); | ||
421 | } | ||
422 | |||
423 | if (put_dreq(dreq)) | ||
424 | nfs_direct_write_complete(dreq, inode); | ||
438 | } | 425 | } |
439 | 426 | ||
440 | static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) | 427 | static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) |
@@ -471,8 +458,8 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) | |||
471 | data->cred = dreq->ctx->cred; | 458 | data->cred = dreq->ctx->cred; |
472 | 459 | ||
473 | data->args.fh = NFS_FH(data->inode); | 460 | data->args.fh = NFS_FH(data->inode); |
474 | data->args.offset = dreq->pos; | 461 | data->args.offset = 0; |
475 | data->args.count = dreq->user_count; | 462 | data->args.count = 0; |
476 | data->res.count = 0; | 463 | data->res.count = 0; |
477 | data->res.fattr = &data->fattr; | 464 | data->res.fattr = &data->fattr; |
478 | data->res.verf = &data->verf; | 465 | data->res.verf = &data->verf; |
@@ -516,7 +503,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode | |||
516 | 503 | ||
517 | static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) | 504 | static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) |
518 | { | 505 | { |
519 | dreq->commit_data = nfs_commit_alloc(0); | 506 | dreq->commit_data = nfs_commit_alloc(); |
520 | if (dreq->commit_data != NULL) | 507 | if (dreq->commit_data != NULL) |
521 | dreq->commit_data->req = (struct nfs_page *) dreq; | 508 | dreq->commit_data->req = (struct nfs_page *) dreq; |
522 | } | 509 | } |
@@ -534,47 +521,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode | |||
534 | } | 521 | } |
535 | #endif | 522 | #endif |
536 | 523 | ||
537 | static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize) | ||
538 | { | ||
539 | struct list_head *list; | ||
540 | struct nfs_direct_req *dreq; | ||
541 | unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
542 | |||
543 | dreq = nfs_direct_req_alloc(); | ||
544 | if (!dreq) | ||
545 | return NULL; | ||
546 | |||
547 | list = &dreq->list; | ||
548 | for(;;) { | ||
549 | struct nfs_write_data *data = nfs_writedata_alloc(wpages); | ||
550 | |||
551 | if (unlikely(!data)) { | ||
552 | while (!list_empty(list)) { | ||
553 | data = list_entry(list->next, | ||
554 | struct nfs_write_data, pages); | ||
555 | list_del(&data->pages); | ||
556 | nfs_writedata_free(data); | ||
557 | } | ||
558 | kref_put(&dreq->kref, nfs_direct_req_release); | ||
559 | return NULL; | ||
560 | } | ||
561 | |||
562 | INIT_LIST_HEAD(&data->pages); | ||
563 | list_add(&data->pages, list); | ||
564 | |||
565 | data->req = (struct nfs_page *) dreq; | ||
566 | dreq->outstanding++; | ||
567 | if (nbytes <= wsize) | ||
568 | break; | ||
569 | nbytes -= wsize; | ||
570 | } | ||
571 | |||
572 | nfs_alloc_commit_data(dreq); | ||
573 | |||
574 | kref_get(&dreq->kref); | ||
575 | return dreq; | ||
576 | } | ||
577 | |||
578 | static void nfs_direct_write_result(struct rpc_task *task, void *calldata) | 524 | static void nfs_direct_write_result(struct rpc_task *task, void *calldata) |
579 | { | 525 | { |
580 | struct nfs_write_data *data = calldata; | 526 | struct nfs_write_data *data = calldata; |
@@ -604,8 +550,6 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) | |||
604 | } | 550 | } |
605 | } | 551 | } |
606 | } | 552 | } |
607 | /* In case we have to resend */ | ||
608 | data->args.stable = NFS_FILE_SYNC; | ||
609 | 553 | ||
610 | spin_unlock(&dreq->lock); | 554 | spin_unlock(&dreq->lock); |
611 | } | 555 | } |
@@ -619,14 +563,8 @@ static void nfs_direct_write_release(void *calldata) | |||
619 | struct nfs_write_data *data = calldata; | 563 | struct nfs_write_data *data = calldata; |
620 | struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; | 564 | struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; |
621 | 565 | ||
622 | spin_lock(&dreq->lock); | 566 | if (put_dreq(dreq)) |
623 | if (--dreq->outstanding) { | 567 | nfs_direct_write_complete(dreq, data->inode); |
624 | spin_unlock(&dreq->lock); | ||
625 | return; | ||
626 | } | ||
627 | spin_unlock(&dreq->lock); | ||
628 | |||
629 | nfs_direct_write_complete(dreq, data->inode); | ||
630 | } | 568 | } |
631 | 569 | ||
632 | static const struct rpc_call_ops nfs_write_direct_ops = { | 570 | static const struct rpc_call_ops nfs_write_direct_ops = { |
@@ -635,41 +573,58 @@ static const struct rpc_call_ops nfs_write_direct_ops = { | |||
635 | }; | 573 | }; |
636 | 574 | ||
637 | /* | 575 | /* |
638 | * For each nfs_write_data struct that was allocated on the list, dispatch | 576 | * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE |
639 | * an NFS WRITE operation | 577 | * operation. If nfs_writedata_alloc() or get_user_pages() fails, |
578 | * bail and stop sending more writes. Write length accounting is | ||
579 | * handled automatically by nfs_direct_write_result(). Otherwise, if | ||
580 | * no requests have been sent, just return an error. | ||
640 | */ | 581 | */ |
641 | static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) | 582 | static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) |
642 | { | 583 | { |
643 | struct nfs_open_context *ctx = dreq->ctx; | 584 | struct nfs_open_context *ctx = dreq->ctx; |
644 | struct inode *inode = ctx->dentry->d_inode; | 585 | struct inode *inode = ctx->dentry->d_inode; |
645 | struct list_head *list = &dreq->list; | ||
646 | struct page **pages = dreq->pages; | ||
647 | size_t count = dreq->user_count; | ||
648 | loff_t pos = dreq->pos; | ||
649 | size_t wsize = NFS_SERVER(inode)->wsize; | 586 | size_t wsize = NFS_SERVER(inode)->wsize; |
650 | unsigned int curpage, pgbase; | 587 | unsigned int pgbase; |
588 | int result; | ||
589 | ssize_t started = 0; | ||
590 | |||
591 | get_dreq(dreq); | ||
651 | 592 | ||
652 | curpage = 0; | ||
653 | pgbase = dreq->user_addr & ~PAGE_MASK; | ||
654 | do { | 593 | do { |
655 | struct nfs_write_data *data; | 594 | struct nfs_write_data *data; |
656 | size_t bytes; | 595 | size_t bytes; |
657 | 596 | ||
658 | bytes = wsize; | 597 | pgbase = user_addr & ~PAGE_MASK; |
659 | if (count < wsize) | 598 | bytes = min(wsize,count); |
660 | bytes = count; | 599 | |
600 | result = -ENOMEM; | ||
601 | data = nfs_writedata_alloc(pgbase + bytes); | ||
602 | if (unlikely(!data)) | ||
603 | break; | ||
604 | |||
605 | down_read(¤t->mm->mmap_sem); | ||
606 | result = get_user_pages(current, current->mm, user_addr, | ||
607 | data->npages, 0, 0, data->pagevec, NULL); | ||
608 | up_read(¤t->mm->mmap_sem); | ||
609 | if (unlikely(result < data->npages)) { | ||
610 | if (result > 0) | ||
611 | nfs_direct_release_pages(data->pagevec, result); | ||
612 | nfs_writedata_release(data); | ||
613 | break; | ||
614 | } | ||
615 | |||
616 | get_dreq(dreq); | ||
661 | 617 | ||
662 | BUG_ON(list_empty(list)); | ||
663 | data = list_entry(list->next, struct nfs_write_data, pages); | ||
664 | list_move_tail(&data->pages, &dreq->rewrite_list); | 618 | list_move_tail(&data->pages, &dreq->rewrite_list); |
665 | 619 | ||
620 | data->req = (struct nfs_page *) dreq; | ||
666 | data->inode = inode; | 621 | data->inode = inode; |
667 | data->cred = ctx->cred; | 622 | data->cred = ctx->cred; |
668 | data->args.fh = NFS_FH(inode); | 623 | data->args.fh = NFS_FH(inode); |
669 | data->args.context = ctx; | 624 | data->args.context = ctx; |
670 | data->args.offset = pos; | 625 | data->args.offset = pos; |
671 | data->args.pgbase = pgbase; | 626 | data->args.pgbase = pgbase; |
672 | data->args.pages = &pages[curpage]; | 627 | data->args.pages = data->pagevec; |
673 | data->args.count = bytes; | 628 | data->args.count = bytes; |
674 | data->res.fattr = &data->fattr; | 629 | data->res.fattr = &data->fattr; |
675 | data->res.count = bytes; | 630 | data->res.count = bytes; |
@@ -693,19 +648,29 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) | |||
693 | bytes, | 648 | bytes, |
694 | (unsigned long long)data->args.offset); | 649 | (unsigned long long)data->args.offset); |
695 | 650 | ||
651 | started += bytes; | ||
652 | user_addr += bytes; | ||
696 | pos += bytes; | 653 | pos += bytes; |
654 | |||
655 | /* FIXME: Remove this useless math from the final patch */ | ||
697 | pgbase += bytes; | 656 | pgbase += bytes; |
698 | curpage += pgbase >> PAGE_SHIFT; | ||
699 | pgbase &= ~PAGE_MASK; | 657 | pgbase &= ~PAGE_MASK; |
658 | BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); | ||
700 | 659 | ||
701 | count -= bytes; | 660 | count -= bytes; |
702 | } while (count != 0); | 661 | } while (count != 0); |
703 | BUG_ON(!list_empty(list)); | 662 | |
663 | if (put_dreq(dreq)) | ||
664 | nfs_direct_write_complete(dreq, inode); | ||
665 | |||
666 | if (started) | ||
667 | return 0; | ||
668 | return result < 0 ? (ssize_t) result : -EFAULT; | ||
704 | } | 669 | } |
705 | 670 | ||
706 | static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages) | 671 | static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) |
707 | { | 672 | { |
708 | ssize_t result; | 673 | ssize_t result = 0; |
709 | sigset_t oldset; | 674 | sigset_t oldset; |
710 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 675 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
711 | struct rpc_clnt *clnt = NFS_CLIENT(inode); | 676 | struct rpc_clnt *clnt = NFS_CLIENT(inode); |
@@ -713,17 +678,14 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz | |||
713 | size_t wsize = NFS_SERVER(inode)->wsize; | 678 | size_t wsize = NFS_SERVER(inode)->wsize; |
714 | int sync = 0; | 679 | int sync = 0; |
715 | 680 | ||
716 | dreq = nfs_direct_write_alloc(count, wsize); | 681 | dreq = nfs_direct_req_alloc(); |
717 | if (!dreq) | 682 | if (!dreq) |
718 | return -ENOMEM; | 683 | return -ENOMEM; |
684 | nfs_alloc_commit_data(dreq); | ||
685 | |||
719 | if (dreq->commit_data == NULL || count < wsize) | 686 | if (dreq->commit_data == NULL || count < wsize) |
720 | sync = FLUSH_STABLE; | 687 | sync = FLUSH_STABLE; |
721 | 688 | ||
722 | dreq->user_addr = user_addr; | ||
723 | dreq->user_count = count; | ||
724 | dreq->pos = pos; | ||
725 | dreq->pages = pages; | ||
726 | dreq->npages = nr_pages; | ||
727 | dreq->inode = inode; | 689 | dreq->inode = inode; |
728 | dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); | 690 | dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); |
729 | if (!is_sync_kiocb(iocb)) | 691 | if (!is_sync_kiocb(iocb)) |
@@ -734,8 +696,9 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz | |||
734 | nfs_begin_data_update(inode); | 696 | nfs_begin_data_update(inode); |
735 | 697 | ||
736 | rpc_clnt_sigmask(clnt, &oldset); | 698 | rpc_clnt_sigmask(clnt, &oldset); |
737 | nfs_direct_write_schedule(dreq, sync); | 699 | result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync); |
738 | result = nfs_direct_wait(dreq); | 700 | if (!result) |
701 | result = nfs_direct_wait(dreq); | ||
739 | rpc_clnt_sigunmask(clnt, &oldset); | 702 | rpc_clnt_sigunmask(clnt, &oldset); |
740 | 703 | ||
741 | return result; | 704 | return result; |
@@ -765,8 +728,6 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz | |||
765 | ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) | 728 | ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) |
766 | { | 729 | { |
767 | ssize_t retval = -EINVAL; | 730 | ssize_t retval = -EINVAL; |
768 | int page_count; | ||
769 | struct page **pages; | ||
770 | struct file *file = iocb->ki_filp; | 731 | struct file *file = iocb->ki_filp; |
771 | struct address_space *mapping = file->f_mapping; | 732 | struct address_space *mapping = file->f_mapping; |
772 | 733 | ||
@@ -788,14 +749,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, | |||
788 | if (retval) | 749 | if (retval) |
789 | goto out; | 750 | goto out; |
790 | 751 | ||
791 | retval = nfs_get_user_pages(READ, (unsigned long) buf, | 752 | retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos); |
792 | count, &pages); | ||
793 | if (retval < 0) | ||
794 | goto out; | ||
795 | page_count = retval; | ||
796 | |||
797 | retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos, | ||
798 | pages, page_count); | ||
799 | if (retval > 0) | 753 | if (retval > 0) |
800 | iocb->ki_pos = pos + retval; | 754 | iocb->ki_pos = pos + retval; |
801 | 755 | ||
@@ -831,8 +785,6 @@ out: | |||
831 | ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) | 785 | ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) |
832 | { | 786 | { |
833 | ssize_t retval; | 787 | ssize_t retval; |
834 | int page_count; | ||
835 | struct page **pages; | ||
836 | struct file *file = iocb->ki_filp; | 788 | struct file *file = iocb->ki_filp; |
837 | struct address_space *mapping = file->f_mapping; | 789 | struct address_space *mapping = file->f_mapping; |
838 | 790 | ||
@@ -860,14 +812,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t | |||
860 | if (retval) | 812 | if (retval) |
861 | goto out; | 813 | goto out; |
862 | 814 | ||
863 | retval = nfs_get_user_pages(WRITE, (unsigned long) buf, | 815 | retval = nfs_direct_write(iocb, (unsigned long) buf, count, pos); |
864 | count, &pages); | ||
865 | if (retval < 0) | ||
866 | goto out; | ||
867 | page_count = retval; | ||
868 | |||
869 | retval = nfs_direct_write(iocb, (unsigned long) buf, count, | ||
870 | pos, pages, page_count); | ||
871 | 816 | ||
872 | /* | 817 | /* |
873 | * XXX: nfs_end_data_update() already ensures this file's | 818 | * XXX: nfs_end_data_update() already ensures this file's |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index cc2b874ad5a4..48e892880d5b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -312,7 +312,13 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) | |||
312 | 312 | ||
313 | static int nfs_release_page(struct page *page, gfp_t gfp) | 313 | static int nfs_release_page(struct page *page, gfp_t gfp) |
314 | { | 314 | { |
315 | return !nfs_wb_page(page->mapping->host, page); | 315 | if (gfp & __GFP_FS) |
316 | return !nfs_wb_page(page->mapping->host, page); | ||
317 | else | ||
318 | /* | ||
319 | * Avoid deadlock on nfs_wait_on_request(). | ||
320 | */ | ||
321 | return 0; | ||
316 | } | 322 | } |
317 | 323 | ||
318 | const struct address_space_operations nfs_file_aops = { | 324 | const struct address_space_operations nfs_file_aops = { |
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index b81e7ed3c902..07a5dd57646e 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c | |||
@@ -130,9 +130,7 @@ nfs_idmap_delete(struct nfs4_client *clp) | |||
130 | 130 | ||
131 | if (!idmap) | 131 | if (!idmap) |
132 | return; | 132 | return; |
133 | dput(idmap->idmap_dentry); | 133 | rpc_unlink(idmap->idmap_dentry); |
134 | idmap->idmap_dentry = NULL; | ||
135 | rpc_unlink(idmap->idmap_path); | ||
136 | clp->cl_idmap = NULL; | 134 | clp->cl_idmap = NULL; |
137 | kfree(idmap); | 135 | kfree(idmap); |
138 | } | 136 | } |
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 19b98ca468eb..86b3169c8cac 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c | |||
@@ -51,7 +51,7 @@ char *nfs_path(const char *base, const struct dentry *dentry, | |||
51 | namelen = dentry->d_name.len; | 51 | namelen = dentry->d_name.len; |
52 | buflen -= namelen + 1; | 52 | buflen -= namelen + 1; |
53 | if (buflen < 0) | 53 | if (buflen < 0) |
54 | goto Elong; | 54 | goto Elong_unlock; |
55 | end -= namelen; | 55 | end -= namelen; |
56 | memcpy(end, dentry->d_name.name, namelen); | 56 | memcpy(end, dentry->d_name.name, namelen); |
57 | *--end = '/'; | 57 | *--end = '/'; |
@@ -68,6 +68,8 @@ char *nfs_path(const char *base, const struct dentry *dentry, | |||
68 | end -= namelen; | 68 | end -= namelen; |
69 | memcpy(end, base, namelen); | 69 | memcpy(end, base, namelen); |
70 | return end; | 70 | return end; |
71 | Elong_unlock: | ||
72 | spin_unlock(&dcache_lock); | ||
71 | Elong: | 73 | Elong: |
72 | return ERR_PTR(-ENAMETOOLONG); | 74 | return ERR_PTR(-ENAMETOOLONG); |
73 | } | 75 | } |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b4916b092194..153898e1331f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -2668,7 +2668,7 @@ out: | |||
2668 | nfs4_set_cached_acl(inode, acl); | 2668 | nfs4_set_cached_acl(inode, acl); |
2669 | } | 2669 | } |
2670 | 2670 | ||
2671 | static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) | 2671 | static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) |
2672 | { | 2672 | { |
2673 | struct page *pages[NFS4ACL_MAXPAGES]; | 2673 | struct page *pages[NFS4ACL_MAXPAGES]; |
2674 | struct nfs_getaclargs args = { | 2674 | struct nfs_getaclargs args = { |
@@ -2721,6 +2721,19 @@ out_free: | |||
2721 | return ret; | 2721 | return ret; |
2722 | } | 2722 | } |
2723 | 2723 | ||
2724 | static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) | ||
2725 | { | ||
2726 | struct nfs4_exception exception = { }; | ||
2727 | ssize_t ret; | ||
2728 | do { | ||
2729 | ret = __nfs4_get_acl_uncached(inode, buf, buflen); | ||
2730 | if (ret >= 0) | ||
2731 | break; | ||
2732 | ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception); | ||
2733 | } while (exception.retry); | ||
2734 | return ret; | ||
2735 | } | ||
2736 | |||
2724 | static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) | 2737 | static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) |
2725 | { | 2738 | { |
2726 | struct nfs_server *server = NFS_SERVER(inode); | 2739 | struct nfs_server *server = NFS_SERVER(inode); |
@@ -2737,7 +2750,7 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) | |||
2737 | return nfs4_get_acl_uncached(inode, buf, buflen); | 2750 | return nfs4_get_acl_uncached(inode, buf, buflen); |
2738 | } | 2751 | } |
2739 | 2752 | ||
2740 | static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) | 2753 | static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) |
2741 | { | 2754 | { |
2742 | struct nfs_server *server = NFS_SERVER(inode); | 2755 | struct nfs_server *server = NFS_SERVER(inode); |
2743 | struct page *pages[NFS4ACL_MAXPAGES]; | 2756 | struct page *pages[NFS4ACL_MAXPAGES]; |
@@ -2763,6 +2776,18 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen | |||
2763 | return ret; | 2776 | return ret; |
2764 | } | 2777 | } |
2765 | 2778 | ||
2779 | static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) | ||
2780 | { | ||
2781 | struct nfs4_exception exception = { }; | ||
2782 | int err; | ||
2783 | do { | ||
2784 | err = nfs4_handle_exception(NFS_SERVER(inode), | ||
2785 | __nfs4_proc_set_acl(inode, buf, buflen), | ||
2786 | &exception); | ||
2787 | } while (exception.retry); | ||
2788 | return err; | ||
2789 | } | ||
2790 | |||
2766 | static int | 2791 | static int |
2767 | nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) | 2792 | nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) |
2768 | { | 2793 | { |
@@ -3144,9 +3169,6 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) | |||
3144 | default: | 3169 | default: |
3145 | BUG(); | 3170 | BUG(); |
3146 | } | 3171 | } |
3147 | if (res < 0) | ||
3148 | printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", | ||
3149 | __FUNCTION__); | ||
3150 | return res; | 3172 | return res; |
3151 | } | 3173 | } |
3152 | 3174 | ||
@@ -3258,8 +3280,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, | |||
3258 | return ERR_PTR(-ENOMEM); | 3280 | return ERR_PTR(-ENOMEM); |
3259 | } | 3281 | } |
3260 | 3282 | ||
3261 | /* Unlock _before_ we do the RPC call */ | ||
3262 | do_vfs_lock(fl->fl_file, fl); | ||
3263 | return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data); | 3283 | return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data); |
3264 | } | 3284 | } |
3265 | 3285 | ||
@@ -3270,30 +3290,28 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * | |||
3270 | struct rpc_task *task; | 3290 | struct rpc_task *task; |
3271 | int status = 0; | 3291 | int status = 0; |
3272 | 3292 | ||
3273 | /* Is this a delegated lock? */ | ||
3274 | if (test_bit(NFS_DELEGATED_STATE, &state->flags)) | ||
3275 | goto out_unlock; | ||
3276 | /* Is this open_owner holding any locks on the server? */ | ||
3277 | if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) | ||
3278 | goto out_unlock; | ||
3279 | |||
3280 | status = nfs4_set_lock_state(state, request); | 3293 | status = nfs4_set_lock_state(state, request); |
3294 | /* Unlock _before_ we do the RPC call */ | ||
3295 | request->fl_flags |= FL_EXISTS; | ||
3296 | if (do_vfs_lock(request->fl_file, request) == -ENOENT) | ||
3297 | goto out; | ||
3281 | if (status != 0) | 3298 | if (status != 0) |
3282 | goto out_unlock; | 3299 | goto out; |
3300 | /* Is this a delegated lock? */ | ||
3301 | if (test_bit(NFS_DELEGATED_STATE, &state->flags)) | ||
3302 | goto out; | ||
3283 | lsp = request->fl_u.nfs4_fl.owner; | 3303 | lsp = request->fl_u.nfs4_fl.owner; |
3284 | status = -ENOMEM; | ||
3285 | seqid = nfs_alloc_seqid(&lsp->ls_seqid); | 3304 | seqid = nfs_alloc_seqid(&lsp->ls_seqid); |
3305 | status = -ENOMEM; | ||
3286 | if (seqid == NULL) | 3306 | if (seqid == NULL) |
3287 | goto out_unlock; | 3307 | goto out; |
3288 | task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid); | 3308 | task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid); |
3289 | status = PTR_ERR(task); | 3309 | status = PTR_ERR(task); |
3290 | if (IS_ERR(task)) | 3310 | if (IS_ERR(task)) |
3291 | goto out_unlock; | 3311 | goto out; |
3292 | status = nfs4_wait_for_completion_rpc_task(task); | 3312 | status = nfs4_wait_for_completion_rpc_task(task); |
3293 | rpc_release_task(task); | 3313 | rpc_release_task(task); |
3294 | return status; | 3314 | out: |
3295 | out_unlock: | ||
3296 | do_vfs_lock(request->fl_file, request); | ||
3297 | return status; | 3315 | return status; |
3298 | } | 3316 | } |
3299 | 3317 | ||
@@ -3461,10 +3479,10 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request | |||
3461 | struct nfs4_exception exception = { }; | 3479 | struct nfs4_exception exception = { }; |
3462 | int err; | 3480 | int err; |
3463 | 3481 | ||
3464 | /* Cache the lock if possible... */ | ||
3465 | if (test_bit(NFS_DELEGATED_STATE, &state->flags)) | ||
3466 | return 0; | ||
3467 | do { | 3482 | do { |
3483 | /* Cache the lock if possible... */ | ||
3484 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) | ||
3485 | return 0; | ||
3468 | err = _nfs4_do_setlk(state, F_SETLK, request, 1); | 3486 | err = _nfs4_do_setlk(state, F_SETLK, request, 1); |
3469 | if (err != -NFS4ERR_DELAY) | 3487 | if (err != -NFS4ERR_DELAY) |
3470 | break; | 3488 | break; |
@@ -3483,6 +3501,8 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request | |||
3483 | if (err != 0) | 3501 | if (err != 0) |
3484 | return err; | 3502 | return err; |
3485 | do { | 3503 | do { |
3504 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) | ||
3505 | return 0; | ||
3486 | err = _nfs4_do_setlk(state, F_SETLK, request, 0); | 3506 | err = _nfs4_do_setlk(state, F_SETLK, request, 0); |
3487 | if (err != -NFS4ERR_DELAY) | 3507 | if (err != -NFS4ERR_DELAY) |
3488 | break; | 3508 | break; |
@@ -3494,29 +3514,42 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request | |||
3494 | static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) | 3514 | static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) |
3495 | { | 3515 | { |
3496 | struct nfs4_client *clp = state->owner->so_client; | 3516 | struct nfs4_client *clp = state->owner->so_client; |
3517 | unsigned char fl_flags = request->fl_flags; | ||
3497 | int status; | 3518 | int status; |
3498 | 3519 | ||
3499 | /* Is this a delegated open? */ | 3520 | /* Is this a delegated open? */ |
3500 | if (NFS_I(state->inode)->delegation_state != 0) { | ||
3501 | /* Yes: cache locks! */ | ||
3502 | status = do_vfs_lock(request->fl_file, request); | ||
3503 | /* ...but avoid races with delegation recall... */ | ||
3504 | if (status < 0 || test_bit(NFS_DELEGATED_STATE, &state->flags)) | ||
3505 | return status; | ||
3506 | } | ||
3507 | down_read(&clp->cl_sem); | ||
3508 | status = nfs4_set_lock_state(state, request); | 3521 | status = nfs4_set_lock_state(state, request); |
3509 | if (status != 0) | 3522 | if (status != 0) |
3510 | goto out; | 3523 | goto out; |
3524 | request->fl_flags |= FL_ACCESS; | ||
3525 | status = do_vfs_lock(request->fl_file, request); | ||
3526 | if (status < 0) | ||
3527 | goto out; | ||
3528 | down_read(&clp->cl_sem); | ||
3529 | if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { | ||
3530 | struct nfs_inode *nfsi = NFS_I(state->inode); | ||
3531 | /* Yes: cache locks! */ | ||
3532 | down_read(&nfsi->rwsem); | ||
3533 | /* ...but avoid races with delegation recall... */ | ||
3534 | if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { | ||
3535 | request->fl_flags = fl_flags & ~FL_SLEEP; | ||
3536 | status = do_vfs_lock(request->fl_file, request); | ||
3537 | up_read(&nfsi->rwsem); | ||
3538 | goto out_unlock; | ||
3539 | } | ||
3540 | up_read(&nfsi->rwsem); | ||
3541 | } | ||
3511 | status = _nfs4_do_setlk(state, cmd, request, 0); | 3542 | status = _nfs4_do_setlk(state, cmd, request, 0); |
3512 | if (status != 0) | 3543 | if (status != 0) |
3513 | goto out; | 3544 | goto out_unlock; |
3514 | /* Note: we always want to sleep here! */ | 3545 | /* Note: we always want to sleep here! */ |
3515 | request->fl_flags |= FL_SLEEP; | 3546 | request->fl_flags = fl_flags | FL_SLEEP; |
3516 | if (do_vfs_lock(request->fl_file, request) < 0) | 3547 | if (do_vfs_lock(request->fl_file, request) < 0) |
3517 | printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); | 3548 | printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); |
3518 | out: | 3549 | out_unlock: |
3519 | up_read(&clp->cl_sem); | 3550 | up_read(&clp->cl_sem); |
3551 | out: | ||
3552 | request->fl_flags = fl_flags; | ||
3520 | return status; | 3553 | return status; |
3521 | } | 3554 | } |
3522 | 3555 | ||
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1750d996f49f..730ec8fb31c6 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -3355,7 +3355,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n | |||
3355 | struct kvec *iov = rcvbuf->head; | 3355 | struct kvec *iov = rcvbuf->head; |
3356 | unsigned int nr, pglen = rcvbuf->page_len; | 3356 | unsigned int nr, pglen = rcvbuf->page_len; |
3357 | uint32_t *end, *entry, *p, *kaddr; | 3357 | uint32_t *end, *entry, *p, *kaddr; |
3358 | uint32_t len, attrlen; | 3358 | uint32_t len, attrlen, xlen; |
3359 | int hdrlen, recvd, status; | 3359 | int hdrlen, recvd, status; |
3360 | 3360 | ||
3361 | status = decode_op_hdr(xdr, OP_READDIR); | 3361 | status = decode_op_hdr(xdr, OP_READDIR); |
@@ -3377,10 +3377,10 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n | |||
3377 | 3377 | ||
3378 | BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE); | 3378 | BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE); |
3379 | kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0); | 3379 | kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0); |
3380 | end = (uint32_t *) ((char *)p + pglen + readdir->pgbase); | 3380 | end = p + ((pglen + readdir->pgbase) >> 2); |
3381 | entry = p; | 3381 | entry = p; |
3382 | for (nr = 0; *p++; nr++) { | 3382 | for (nr = 0; *p++; nr++) { |
3383 | if (p + 3 > end) | 3383 | if (end - p < 3) |
3384 | goto short_pkt; | 3384 | goto short_pkt; |
3385 | dprintk("cookie = %Lu, ", *((unsigned long long *)p)); | 3385 | dprintk("cookie = %Lu, ", *((unsigned long long *)p)); |
3386 | p += 2; /* cookie */ | 3386 | p += 2; /* cookie */ |
@@ -3389,18 +3389,19 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n | |||
3389 | printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len); | 3389 | printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len); |
3390 | goto err_unmap; | 3390 | goto err_unmap; |
3391 | } | 3391 | } |
3392 | dprintk("filename = %*s\n", len, (char *)p); | 3392 | xlen = XDR_QUADLEN(len); |
3393 | p += XDR_QUADLEN(len); | 3393 | if (end - p < xlen + 1) |
3394 | if (p + 1 > end) | ||
3395 | goto short_pkt; | 3394 | goto short_pkt; |
3395 | dprintk("filename = %*s\n", len, (char *)p); | ||
3396 | p += xlen; | ||
3396 | len = ntohl(*p++); /* bitmap length */ | 3397 | len = ntohl(*p++); /* bitmap length */ |
3397 | p += len; | 3398 | if (end - p < len + 1) |
3398 | if (p + 1 > end) | ||
3399 | goto short_pkt; | 3399 | goto short_pkt; |
3400 | p += len; | ||
3400 | attrlen = XDR_QUADLEN(ntohl(*p++)); | 3401 | attrlen = XDR_QUADLEN(ntohl(*p++)); |
3401 | p += attrlen; /* attributes */ | 3402 | if (end - p < attrlen + 2) |
3402 | if (p + 2 > end) | ||
3403 | goto short_pkt; | 3403 | goto short_pkt; |
3404 | p += attrlen; /* attributes */ | ||
3404 | entry = p; | 3405 | entry = p; |
3405 | } | 3406 | } |
3406 | if (!nr && (entry[0] != 0 || entry[1] == 0)) | 3407 | if (!nr && (entry[0] != 0 || entry[1] == 0)) |
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 52bf634260a1..7a9ee00e0c61 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -43,13 +43,15 @@ static mempool_t *nfs_rdata_mempool; | |||
43 | 43 | ||
44 | #define MIN_POOL_READ (32) | 44 | #define MIN_POOL_READ (32) |
45 | 45 | ||
46 | struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) | 46 | struct nfs_read_data *nfs_readdata_alloc(size_t len) |
47 | { | 47 | { |
48 | unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
48 | struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); | 49 | struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); |
49 | 50 | ||
50 | if (p) { | 51 | if (p) { |
51 | memset(p, 0, sizeof(*p)); | 52 | memset(p, 0, sizeof(*p)); |
52 | INIT_LIST_HEAD(&p->pages); | 53 | INIT_LIST_HEAD(&p->pages); |
54 | p->npages = pagecount; | ||
53 | if (pagecount <= ARRAY_SIZE(p->page_array)) | 55 | if (pagecount <= ARRAY_SIZE(p->page_array)) |
54 | p->pagevec = p->page_array; | 56 | p->pagevec = p->page_array; |
55 | else { | 57 | else { |
@@ -63,7 +65,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) | |||
63 | return p; | 65 | return p; |
64 | } | 66 | } |
65 | 67 | ||
66 | void nfs_readdata_free(struct nfs_read_data *p) | 68 | static void nfs_readdata_free(struct nfs_read_data *p) |
67 | { | 69 | { |
68 | if (p && (p->pagevec != &p->page_array[0])) | 70 | if (p && (p->pagevec != &p->page_array[0])) |
69 | kfree(p->pagevec); | 71 | kfree(p->pagevec); |
@@ -116,10 +118,17 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) | |||
116 | pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; | 118 | pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; |
117 | base &= ~PAGE_CACHE_MASK; | 119 | base &= ~PAGE_CACHE_MASK; |
118 | pglen = PAGE_CACHE_SIZE - base; | 120 | pglen = PAGE_CACHE_SIZE - base; |
119 | if (pglen < remainder) | 121 | for (;;) { |
122 | if (remainder <= pglen) { | ||
123 | memclear_highpage_flush(*pages, base, remainder); | ||
124 | break; | ||
125 | } | ||
120 | memclear_highpage_flush(*pages, base, pglen); | 126 | memclear_highpage_flush(*pages, base, pglen); |
121 | else | 127 | pages++; |
122 | memclear_highpage_flush(*pages, base, remainder); | 128 | remainder -= pglen; |
129 | pglen = PAGE_CACHE_SIZE; | ||
130 | base = 0; | ||
131 | } | ||
123 | } | 132 | } |
124 | 133 | ||
125 | /* | 134 | /* |
@@ -133,7 +142,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, | |||
133 | int result; | 142 | int result; |
134 | struct nfs_read_data *rdata; | 143 | struct nfs_read_data *rdata; |
135 | 144 | ||
136 | rdata = nfs_readdata_alloc(1); | 145 | rdata = nfs_readdata_alloc(count); |
137 | if (!rdata) | 146 | if (!rdata) |
138 | return -ENOMEM; | 147 | return -ENOMEM; |
139 | 148 | ||
@@ -329,25 +338,25 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode) | |||
329 | struct nfs_page *req = nfs_list_entry(head->next); | 338 | struct nfs_page *req = nfs_list_entry(head->next); |
330 | struct page *page = req->wb_page; | 339 | struct page *page = req->wb_page; |
331 | struct nfs_read_data *data; | 340 | struct nfs_read_data *data; |
332 | unsigned int rsize = NFS_SERVER(inode)->rsize; | 341 | size_t rsize = NFS_SERVER(inode)->rsize, nbytes; |
333 | unsigned int nbytes, offset; | 342 | unsigned int offset; |
334 | int requests = 0; | 343 | int requests = 0; |
335 | LIST_HEAD(list); | 344 | LIST_HEAD(list); |
336 | 345 | ||
337 | nfs_list_remove_request(req); | 346 | nfs_list_remove_request(req); |
338 | 347 | ||
339 | nbytes = req->wb_bytes; | 348 | nbytes = req->wb_bytes; |
340 | for(;;) { | 349 | do { |
341 | data = nfs_readdata_alloc(1); | 350 | size_t len = min(nbytes,rsize); |
351 | |||
352 | data = nfs_readdata_alloc(len); | ||
342 | if (!data) | 353 | if (!data) |
343 | goto out_bad; | 354 | goto out_bad; |
344 | INIT_LIST_HEAD(&data->pages); | 355 | INIT_LIST_HEAD(&data->pages); |
345 | list_add(&data->pages, &list); | 356 | list_add(&data->pages, &list); |
346 | requests++; | 357 | requests++; |
347 | if (nbytes <= rsize) | 358 | nbytes -= len; |
348 | break; | 359 | } while(nbytes != 0); |
349 | nbytes -= rsize; | ||
350 | } | ||
351 | atomic_set(&req->wb_complete, requests); | 360 | atomic_set(&req->wb_complete, requests); |
352 | 361 | ||
353 | ClearPageError(page); | 362 | ClearPageError(page); |
@@ -395,7 +404,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode) | |||
395 | if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) | 404 | if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) |
396 | return nfs_pagein_multi(head, inode); | 405 | return nfs_pagein_multi(head, inode); |
397 | 406 | ||
398 | data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages); | 407 | data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize); |
399 | if (!data) | 408 | if (!data) |
400 | goto out_bad; | 409 | goto out_bad; |
401 | 410 | ||
@@ -476,6 +485,8 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) | |||
476 | unsigned int base = data->args.pgbase; | 485 | unsigned int base = data->args.pgbase; |
477 | struct page **pages; | 486 | struct page **pages; |
478 | 487 | ||
488 | if (data->res.eof) | ||
489 | count = data->args.count; | ||
479 | if (unlikely(count == 0)) | 490 | if (unlikely(count == 0)) |
480 | return; | 491 | return; |
481 | pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; | 492 | pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; |
@@ -483,11 +494,7 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) | |||
483 | count += base; | 494 | count += base; |
484 | for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) | 495 | for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) |
485 | SetPageUptodate(*pages); | 496 | SetPageUptodate(*pages); |
486 | /* | 497 | if (count != 0) |
487 | * Was this an eof or a short read? If the latter, don't mark the page | ||
488 | * as uptodate yet. | ||
489 | */ | ||
490 | if (count > 0 && (data->res.eof || data->args.count == data->res.count)) | ||
491 | SetPageUptodate(*pages); | 498 | SetPageUptodate(*pages); |
492 | } | 499 | } |
493 | 500 | ||
@@ -502,6 +509,8 @@ static void nfs_readpage_set_pages_error(struct nfs_read_data *data) | |||
502 | count += base; | 509 | count += base; |
503 | for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) | 510 | for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) |
504 | SetPageError(*pages); | 511 | SetPageError(*pages); |
512 | if (count != 0) | ||
513 | SetPageError(*pages); | ||
505 | } | 514 | } |
506 | 515 | ||
507 | /* | 516 | /* |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index bca5734ca9fb..8ab3cf10d792 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -90,22 +90,13 @@ static mempool_t *nfs_commit_mempool; | |||
90 | 90 | ||
91 | static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); | 91 | static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); |
92 | 92 | ||
93 | struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) | 93 | struct nfs_write_data *nfs_commit_alloc(void) |
94 | { | 94 | { |
95 | struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); | 95 | struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); |
96 | 96 | ||
97 | if (p) { | 97 | if (p) { |
98 | memset(p, 0, sizeof(*p)); | 98 | memset(p, 0, sizeof(*p)); |
99 | INIT_LIST_HEAD(&p->pages); | 99 | INIT_LIST_HEAD(&p->pages); |
100 | if (pagecount <= ARRAY_SIZE(p->page_array)) | ||
101 | p->pagevec = p->page_array; | ||
102 | else { | ||
103 | p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); | ||
104 | if (!p->pagevec) { | ||
105 | mempool_free(p, nfs_commit_mempool); | ||
106 | p = NULL; | ||
107 | } | ||
108 | } | ||
109 | } | 100 | } |
110 | return p; | 101 | return p; |
111 | } | 102 | } |
@@ -117,13 +108,15 @@ void nfs_commit_free(struct nfs_write_data *p) | |||
117 | mempool_free(p, nfs_commit_mempool); | 108 | mempool_free(p, nfs_commit_mempool); |
118 | } | 109 | } |
119 | 110 | ||
120 | struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) | 111 | struct nfs_write_data *nfs_writedata_alloc(size_t len) |
121 | { | 112 | { |
113 | unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
122 | struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); | 114 | struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); |
123 | 115 | ||
124 | if (p) { | 116 | if (p) { |
125 | memset(p, 0, sizeof(*p)); | 117 | memset(p, 0, sizeof(*p)); |
126 | INIT_LIST_HEAD(&p->pages); | 118 | INIT_LIST_HEAD(&p->pages); |
119 | p->npages = pagecount; | ||
127 | if (pagecount <= ARRAY_SIZE(p->page_array)) | 120 | if (pagecount <= ARRAY_SIZE(p->page_array)) |
128 | p->pagevec = p->page_array; | 121 | p->pagevec = p->page_array; |
129 | else { | 122 | else { |
@@ -137,7 +130,7 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) | |||
137 | return p; | 130 | return p; |
138 | } | 131 | } |
139 | 132 | ||
140 | void nfs_writedata_free(struct nfs_write_data *p) | 133 | static void nfs_writedata_free(struct nfs_write_data *p) |
141 | { | 134 | { |
142 | if (p && (p->pagevec != &p->page_array[0])) | 135 | if (p && (p->pagevec != &p->page_array[0])) |
143 | kfree(p->pagevec); | 136 | kfree(p->pagevec); |
@@ -208,7 +201,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, | |||
208 | int result, written = 0; | 201 | int result, written = 0; |
209 | struct nfs_write_data *wdata; | 202 | struct nfs_write_data *wdata; |
210 | 203 | ||
211 | wdata = nfs_writedata_alloc(1); | 204 | wdata = nfs_writedata_alloc(wsize); |
212 | if (!wdata) | 205 | if (!wdata) |
213 | return -ENOMEM; | 206 | return -ENOMEM; |
214 | 207 | ||
@@ -578,7 +571,7 @@ static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, un | |||
578 | return ret; | 571 | return ret; |
579 | } | 572 | } |
580 | 573 | ||
581 | static void nfs_cancel_requests(struct list_head *head) | 574 | static void nfs_cancel_dirty_list(struct list_head *head) |
582 | { | 575 | { |
583 | struct nfs_page *req; | 576 | struct nfs_page *req; |
584 | while(!list_empty(head)) { | 577 | while(!list_empty(head)) { |
@@ -589,6 +582,19 @@ static void nfs_cancel_requests(struct list_head *head) | |||
589 | } | 582 | } |
590 | } | 583 | } |
591 | 584 | ||
585 | static void nfs_cancel_commit_list(struct list_head *head) | ||
586 | { | ||
587 | struct nfs_page *req; | ||
588 | |||
589 | while(!list_empty(head)) { | ||
590 | req = nfs_list_entry(head->next); | ||
591 | nfs_list_remove_request(req); | ||
592 | nfs_inode_remove_request(req); | ||
593 | nfs_clear_page_writeback(req); | ||
594 | dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | ||
595 | } | ||
596 | } | ||
597 | |||
592 | /* | 598 | /* |
593 | * nfs_scan_dirty - Scan an inode for dirty requests | 599 | * nfs_scan_dirty - Scan an inode for dirty requests |
594 | * @inode: NFS inode to scan | 600 | * @inode: NFS inode to scan |
@@ -986,24 +992,24 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how) | |||
986 | struct nfs_page *req = nfs_list_entry(head->next); | 992 | struct nfs_page *req = nfs_list_entry(head->next); |
987 | struct page *page = req->wb_page; | 993 | struct page *page = req->wb_page; |
988 | struct nfs_write_data *data; | 994 | struct nfs_write_data *data; |
989 | unsigned int wsize = NFS_SERVER(inode)->wsize; | 995 | size_t wsize = NFS_SERVER(inode)->wsize, nbytes; |
990 | unsigned int nbytes, offset; | 996 | unsigned int offset; |
991 | int requests = 0; | 997 | int requests = 0; |
992 | LIST_HEAD(list); | 998 | LIST_HEAD(list); |
993 | 999 | ||
994 | nfs_list_remove_request(req); | 1000 | nfs_list_remove_request(req); |
995 | 1001 | ||
996 | nbytes = req->wb_bytes; | 1002 | nbytes = req->wb_bytes; |
997 | for (;;) { | 1003 | do { |
998 | data = nfs_writedata_alloc(1); | 1004 | size_t len = min(nbytes, wsize); |
1005 | |||
1006 | data = nfs_writedata_alloc(len); | ||
999 | if (!data) | 1007 | if (!data) |
1000 | goto out_bad; | 1008 | goto out_bad; |
1001 | list_add(&data->pages, &list); | 1009 | list_add(&data->pages, &list); |
1002 | requests++; | 1010 | requests++; |
1003 | if (nbytes <= wsize) | 1011 | nbytes -= len; |
1004 | break; | 1012 | } while (nbytes != 0); |
1005 | nbytes -= wsize; | ||
1006 | } | ||
1007 | atomic_set(&req->wb_complete, requests); | 1013 | atomic_set(&req->wb_complete, requests); |
1008 | 1014 | ||
1009 | ClearPageError(page); | 1015 | ClearPageError(page); |
@@ -1057,7 +1063,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) | |||
1057 | struct nfs_write_data *data; | 1063 | struct nfs_write_data *data; |
1058 | unsigned int count; | 1064 | unsigned int count; |
1059 | 1065 | ||
1060 | data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); | 1066 | data = nfs_writedata_alloc(NFS_SERVER(inode)->wsize); |
1061 | if (!data) | 1067 | if (!data) |
1062 | goto out_bad; | 1068 | goto out_bad; |
1063 | 1069 | ||
@@ -1365,7 +1371,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) | |||
1365 | struct nfs_write_data *data; | 1371 | struct nfs_write_data *data; |
1366 | struct nfs_page *req; | 1372 | struct nfs_page *req; |
1367 | 1373 | ||
1368 | data = nfs_commit_alloc(NFS_SERVER(inode)->wpages); | 1374 | data = nfs_commit_alloc(); |
1369 | 1375 | ||
1370 | if (!data) | 1376 | if (!data) |
1371 | goto out_bad; | 1377 | goto out_bad; |
@@ -1381,6 +1387,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) | |||
1381 | nfs_list_remove_request(req); | 1387 | nfs_list_remove_request(req); |
1382 | nfs_mark_request_commit(req); | 1388 | nfs_mark_request_commit(req); |
1383 | nfs_clear_page_writeback(req); | 1389 | nfs_clear_page_writeback(req); |
1390 | dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | ||
1384 | } | 1391 | } |
1385 | return -ENOMEM; | 1392 | return -ENOMEM; |
1386 | } | 1393 | } |
@@ -1499,7 +1506,7 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start, | |||
1499 | if (pages != 0) { | 1506 | if (pages != 0) { |
1500 | spin_unlock(&nfsi->req_lock); | 1507 | spin_unlock(&nfsi->req_lock); |
1501 | if (how & FLUSH_INVALIDATE) | 1508 | if (how & FLUSH_INVALIDATE) |
1502 | nfs_cancel_requests(&head); | 1509 | nfs_cancel_dirty_list(&head); |
1503 | else | 1510 | else |
1504 | ret = nfs_flush_list(inode, &head, pages, how); | 1511 | ret = nfs_flush_list(inode, &head, pages, how); |
1505 | spin_lock(&nfsi->req_lock); | 1512 | spin_lock(&nfsi->req_lock); |
@@ -1512,7 +1519,7 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start, | |||
1512 | break; | 1519 | break; |
1513 | if (how & FLUSH_INVALIDATE) { | 1520 | if (how & FLUSH_INVALIDATE) { |
1514 | spin_unlock(&nfsi->req_lock); | 1521 | spin_unlock(&nfsi->req_lock); |
1515 | nfs_cancel_requests(&head); | 1522 | nfs_cancel_commit_list(&head); |
1516 | spin_lock(&nfsi->req_lock); | 1523 | spin_lock(&nfsi->req_lock); |
1517 | continue; | 1524 | continue; |
1518 | } | 1525 | } |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index b0e095ea0c03..ee4eff27aedc 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -721,6 +721,12 @@ nfsd4_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) | |||
721 | return nfs_ok; | 721 | return nfs_ok; |
722 | } | 722 | } |
723 | 723 | ||
724 | static inline void nfsd4_increment_op_stats(u32 opnum) | ||
725 | { | ||
726 | if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP) | ||
727 | nfsdstats.nfs4_opcount[opnum]++; | ||
728 | } | ||
729 | |||
724 | 730 | ||
725 | /* | 731 | /* |
726 | * COMPOUND call. | 732 | * COMPOUND call. |
@@ -930,6 +936,8 @@ encode_op: | |||
930 | /* XXX Ugh, we need to get rid of this kind of special case: */ | 936 | /* XXX Ugh, we need to get rid of this kind of special case: */ |
931 | if (op->opnum == OP_READ && op->u.read.rd_filp) | 937 | if (op->opnum == OP_READ && op->u.read.rd_filp) |
932 | fput(op->u.read.rd_filp); | 938 | fput(op->u.read.rd_filp); |
939 | |||
940 | nfsd4_increment_op_stats(op->opnum); | ||
933 | } | 941 | } |
934 | 942 | ||
935 | out: | 943 | out: |
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index ecc439d2565f..501d83884530 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
@@ -187,6 +187,11 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
187 | goto out; | 187 | goto out; |
188 | } | 188 | } |
189 | 189 | ||
190 | /* Set user creds for this exportpoint */ | ||
191 | error = nfserrno(nfsd_setuser(rqstp, exp)); | ||
192 | if (error) | ||
193 | goto out; | ||
194 | |||
190 | /* | 195 | /* |
191 | * Look up the dentry using the NFS file handle. | 196 | * Look up the dentry using the NFS file handle. |
192 | */ | 197 | */ |
@@ -241,16 +246,17 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
241 | dprintk("nfsd: fh_verify - just checking\n"); | 246 | dprintk("nfsd: fh_verify - just checking\n"); |
242 | dentry = fhp->fh_dentry; | 247 | dentry = fhp->fh_dentry; |
243 | exp = fhp->fh_export; | 248 | exp = fhp->fh_export; |
249 | /* Set user creds for this exportpoint; necessary even | ||
250 | * in the "just checking" case because this may be a | ||
251 | * filehandle that was created by fh_compose, and that | ||
252 | * is about to be used in another nfsv4 compound | ||
253 | * operation */ | ||
254 | error = nfserrno(nfsd_setuser(rqstp, exp)); | ||
255 | if (error) | ||
256 | goto out; | ||
244 | } | 257 | } |
245 | cache_get(&exp->h); | 258 | cache_get(&exp->h); |
246 | 259 | ||
247 | /* Set user creds for this exportpoint; necessary even in the "just | ||
248 | * checking" case because this may be a filehandle that was created by | ||
249 | * fh_compose, and that is about to be used in another nfsv4 compound | ||
250 | * operation */ | ||
251 | error = nfserrno(nfsd_setuser(rqstp, exp)); | ||
252 | if (error) | ||
253 | goto out; | ||
254 | 260 | ||
255 | error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type); | 261 | error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type); |
256 | if (error) | 262 | if (error) |
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index 57265d563804..71944cddf680 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c | |||
@@ -72,6 +72,16 @@ static int nfsd_proc_show(struct seq_file *seq, void *v) | |||
72 | /* show my rpc info */ | 72 | /* show my rpc info */ |
73 | svc_seq_show(seq, &nfsd_svcstats); | 73 | svc_seq_show(seq, &nfsd_svcstats); |
74 | 74 | ||
75 | #ifdef CONFIG_NFSD_V4 | ||
76 | /* Show count for individual nfsv4 operations */ | ||
77 | /* Writing operation numbers 0 1 2 also for maintaining uniformity */ | ||
78 | seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1); | ||
79 | for (i = 0; i <= LAST_NFS4_OP; i++) | ||
80 | seq_printf(seq, " %u", nfsdstats.nfs4_opcount[i]); | ||
81 | |||
82 | seq_putc(seq, '\n'); | ||
83 | #endif | ||
84 | |||
75 | return 0; | 85 | return 0; |
76 | } | 86 | } |
77 | 87 | ||
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 4c86b7e1d1eb..d313f356e66a 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c | |||
@@ -367,6 +367,12 @@ static void ntfs_destroy_extent_inode(ntfs_inode *ni) | |||
367 | kmem_cache_free(ntfs_inode_cache, ni); | 367 | kmem_cache_free(ntfs_inode_cache, ni); |
368 | } | 368 | } |
369 | 369 | ||
370 | /* | ||
371 | * The attribute runlist lock has separate locking rules from the | ||
372 | * normal runlist lock, so split the two lock-classes: | ||
373 | */ | ||
374 | static struct lock_class_key attr_list_rl_lock_class; | ||
375 | |||
370 | /** | 376 | /** |
371 | * __ntfs_init_inode - initialize ntfs specific part of an inode | 377 | * __ntfs_init_inode - initialize ntfs specific part of an inode |
372 | * @sb: super block of mounted volume | 378 | * @sb: super block of mounted volume |
@@ -394,6 +400,8 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni) | |||
394 | ni->attr_list_size = 0; | 400 | ni->attr_list_size = 0; |
395 | ni->attr_list = NULL; | 401 | ni->attr_list = NULL; |
396 | ntfs_init_runlist(&ni->attr_list_rl); | 402 | ntfs_init_runlist(&ni->attr_list_rl); |
403 | lockdep_set_class(&ni->attr_list_rl.lock, | ||
404 | &attr_list_rl_lock_class); | ||
397 | ni->itype.index.bmp_ino = NULL; | 405 | ni->itype.index.bmp_ino = NULL; |
398 | ni->itype.index.block_size = 0; | 406 | ni->itype.index.block_size = 0; |
399 | ni->itype.index.vcn_size = 0; | 407 | ni->itype.index.vcn_size = 0; |
@@ -405,6 +413,13 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni) | |||
405 | ni->ext.base_ntfs_ino = NULL; | 413 | ni->ext.base_ntfs_ino = NULL; |
406 | } | 414 | } |
407 | 415 | ||
416 | /* | ||
417 | * Extent inodes get MFT-mapped in a nested way, while the base inode | ||
418 | * is still mapped. Teach this nesting to the lock validator by creating | ||
419 | * a separate class for nested inode's mrec_lock's: | ||
420 | */ | ||
421 | static struct lock_class_key extent_inode_mrec_lock_key; | ||
422 | |||
408 | inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, | 423 | inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, |
409 | unsigned long mft_no) | 424 | unsigned long mft_no) |
410 | { | 425 | { |
@@ -413,6 +428,7 @@ inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, | |||
413 | ntfs_debug("Entering."); | 428 | ntfs_debug("Entering."); |
414 | if (likely(ni != NULL)) { | 429 | if (likely(ni != NULL)) { |
415 | __ntfs_init_inode(sb, ni); | 430 | __ntfs_init_inode(sb, ni); |
431 | lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key); | ||
416 | ni->mft_no = mft_no; | 432 | ni->mft_no = mft_no; |
417 | ni->type = AT_UNUSED; | 433 | ni->type = AT_UNUSED; |
418 | ni->name = NULL; | 434 | ni->name = NULL; |
@@ -1722,6 +1738,15 @@ err_out: | |||
1722 | return err; | 1738 | return err; |
1723 | } | 1739 | } |
1724 | 1740 | ||
1741 | /* | ||
1742 | * The MFT inode has special locking, so teach the lock validator | ||
1743 | * about this by splitting off the locking rules of the MFT from | ||
1744 | * the locking rules of other inodes. The MFT inode can never be | ||
1745 | * accessed from the VFS side (or even internally), only by the | ||
1746 | * map_mft functions. | ||
1747 | */ | ||
1748 | static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key; | ||
1749 | |||
1725 | /** | 1750 | /** |
1726 | * ntfs_read_inode_mount - special read_inode for mount time use only | 1751 | * ntfs_read_inode_mount - special read_inode for mount time use only |
1727 | * @vi: inode to read | 1752 | * @vi: inode to read |
@@ -2148,6 +2173,14 @@ int ntfs_read_inode_mount(struct inode *vi) | |||
2148 | ntfs_attr_put_search_ctx(ctx); | 2173 | ntfs_attr_put_search_ctx(ctx); |
2149 | ntfs_debug("Done."); | 2174 | ntfs_debug("Done."); |
2150 | ntfs_free(m); | 2175 | ntfs_free(m); |
2176 | |||
2177 | /* | ||
2178 | * Split the locking rules of the MFT inode from the | ||
2179 | * locking rules of other inodes: | ||
2180 | */ | ||
2181 | lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key); | ||
2182 | lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key); | ||
2183 | |||
2151 | return 0; | 2184 | return 0; |
2152 | 2185 | ||
2153 | em_put_err_out: | 2186 | em_put_err_out: |
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 0e14acea3f8b..74e0ee8fce72 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c | |||
@@ -1724,6 +1724,14 @@ upcase_failed: | |||
1724 | return FALSE; | 1724 | return FALSE; |
1725 | } | 1725 | } |
1726 | 1726 | ||
1727 | /* | ||
1728 | * The lcn and mft bitmap inodes are NTFS-internal inodes with | ||
1729 | * their own special locking rules: | ||
1730 | */ | ||
1731 | static struct lock_class_key | ||
1732 | lcnbmp_runlist_lock_key, lcnbmp_mrec_lock_key, | ||
1733 | mftbmp_runlist_lock_key, mftbmp_mrec_lock_key; | ||
1734 | |||
1727 | /** | 1735 | /** |
1728 | * load_system_files - open the system files using normal functions | 1736 | * load_system_files - open the system files using normal functions |
1729 | * @vol: ntfs super block describing device whose system files to load | 1737 | * @vol: ntfs super block describing device whose system files to load |
@@ -1780,6 +1788,10 @@ static BOOL load_system_files(ntfs_volume *vol) | |||
1780 | ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute."); | 1788 | ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute."); |
1781 | goto iput_mirr_err_out; | 1789 | goto iput_mirr_err_out; |
1782 | } | 1790 | } |
1791 | lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->runlist.lock, | ||
1792 | &mftbmp_runlist_lock_key); | ||
1793 | lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->mrec_lock, | ||
1794 | &mftbmp_mrec_lock_key); | ||
1783 | /* Read upcase table and setup @vol->upcase and @vol->upcase_len. */ | 1795 | /* Read upcase table and setup @vol->upcase and @vol->upcase_len. */ |
1784 | if (!load_and_init_upcase(vol)) | 1796 | if (!load_and_init_upcase(vol)) |
1785 | goto iput_mftbmp_err_out; | 1797 | goto iput_mftbmp_err_out; |
@@ -1802,6 +1814,11 @@ static BOOL load_system_files(ntfs_volume *vol) | |||
1802 | iput(vol->lcnbmp_ino); | 1814 | iput(vol->lcnbmp_ino); |
1803 | goto bitmap_failed; | 1815 | goto bitmap_failed; |
1804 | } | 1816 | } |
1817 | lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->runlist.lock, | ||
1818 | &lcnbmp_runlist_lock_key); | ||
1819 | lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->mrec_lock, | ||
1820 | &lcnbmp_mrec_lock_key); | ||
1821 | |||
1805 | NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino)); | 1822 | NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino)); |
1806 | if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) { | 1823 | if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) { |
1807 | iput(vol->lcnbmp_ino); | 1824 | iput(vol->lcnbmp_ino); |
@@ -2743,6 +2760,17 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) | |||
2743 | struct inode *tmp_ino; | 2760 | struct inode *tmp_ino; |
2744 | int blocksize, result; | 2761 | int blocksize, result; |
2745 | 2762 | ||
2763 | /* | ||
2764 | * We do a pretty difficult piece of bootstrap by reading the | ||
2765 | * MFT (and other metadata) from disk into memory. We'll only | ||
2766 | * release this metadata during umount, so the locking patterns | ||
2767 | * observed during bootstrap do not count. So turn off the | ||
2768 | * observation of locking patterns (strictly for this context | ||
2769 | * only) while mounting NTFS. [The validator is still active | ||
2770 | * otherwise, even for this context: it will for example record | ||
2771 | * lock class registrations.] | ||
2772 | */ | ||
2773 | lockdep_off(); | ||
2746 | ntfs_debug("Entering."); | 2774 | ntfs_debug("Entering."); |
2747 | #ifndef NTFS_RW | 2775 | #ifndef NTFS_RW |
2748 | sb->s_flags |= MS_RDONLY; | 2776 | sb->s_flags |= MS_RDONLY; |
@@ -2754,6 +2782,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) | |||
2754 | if (!silent) | 2782 | if (!silent) |
2755 | ntfs_error(sb, "Allocation of NTFS volume structure " | 2783 | ntfs_error(sb, "Allocation of NTFS volume structure " |
2756 | "failed. Aborting mount..."); | 2784 | "failed. Aborting mount..."); |
2785 | lockdep_on(); | ||
2757 | return -ENOMEM; | 2786 | return -ENOMEM; |
2758 | } | 2787 | } |
2759 | /* Initialize ntfs_volume structure. */ | 2788 | /* Initialize ntfs_volume structure. */ |
@@ -2940,6 +2969,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) | |||
2940 | mutex_unlock(&ntfs_lock); | 2969 | mutex_unlock(&ntfs_lock); |
2941 | sb->s_export_op = &ntfs_export_ops; | 2970 | sb->s_export_op = &ntfs_export_ops; |
2942 | lock_kernel(); | 2971 | lock_kernel(); |
2972 | lockdep_on(); | ||
2943 | return 0; | 2973 | return 0; |
2944 | } | 2974 | } |
2945 | ntfs_error(sb, "Failed to allocate root directory."); | 2975 | ntfs_error(sb, "Failed to allocate root directory."); |
@@ -3059,6 +3089,7 @@ err_out_now: | |||
3059 | sb->s_fs_info = NULL; | 3089 | sb->s_fs_info = NULL; |
3060 | kfree(vol); | 3090 | kfree(vol); |
3061 | ntfs_debug("Failed, returning -EINVAL."); | 3091 | ntfs_debug("Failed, returning -EINVAL."); |
3092 | lockdep_on(); | ||
3062 | return -EINVAL; | 3093 | return -EINVAL; |
3063 | } | 3094 | } |
3064 | 3095 | ||
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 1b8346dd0572..9503240ef0e5 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -2375,7 +2375,6 @@ leave: | |||
2375 | mlog(0, "returning %d\n", ret); | 2375 | mlog(0, "returning %d\n", ret); |
2376 | return ret; | 2376 | return ret; |
2377 | } | 2377 | } |
2378 | EXPORT_SYMBOL_GPL(dlm_migrate_lockres); | ||
2379 | 2378 | ||
2380 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock) | 2379 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock) |
2381 | { | 2380 | { |
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index b0c3134f4f70..37be4b2e0d4a 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c | |||
@@ -155,7 +155,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, | |||
155 | else | 155 | else |
156 | status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions); | 156 | status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions); |
157 | 157 | ||
158 | if (status != DLM_NORMAL) | 158 | if (status != DLM_NORMAL && (status != DLM_CANCELGRANT || !master_node)) |
159 | goto leave; | 159 | goto leave; |
160 | 160 | ||
161 | /* By now this has been masked out of cancel requests. */ | 161 | /* By now this has been masked out of cancel requests. */ |
@@ -183,8 +183,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, | |||
183 | spin_lock(&lock->spinlock); | 183 | spin_lock(&lock->spinlock); |
184 | /* if the master told us the lock was already granted, | 184 | /* if the master told us the lock was already granted, |
185 | * let the ast handle all of these actions */ | 185 | * let the ast handle all of these actions */ |
186 | if (status == DLM_NORMAL && | 186 | if (status == DLM_CANCELGRANT) { |
187 | lksb->status == DLM_CANCELGRANT) { | ||
188 | actions &= ~(DLM_UNLOCK_REMOVE_LOCK| | 187 | actions &= ~(DLM_UNLOCK_REMOVE_LOCK| |
189 | DLM_UNLOCK_REGRANT_LOCK| | 188 | DLM_UNLOCK_REGRANT_LOCK| |
190 | DLM_UNLOCK_CLEAR_CONVERT_TYPE); | 189 | DLM_UNLOCK_CLEAR_CONVERT_TYPE); |
@@ -349,14 +348,9 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm, | |||
349 | vec, veclen, owner, &status); | 348 | vec, veclen, owner, &status); |
350 | if (tmpret >= 0) { | 349 | if (tmpret >= 0) { |
351 | // successfully sent and received | 350 | // successfully sent and received |
352 | if (status == DLM_CANCELGRANT) | 351 | if (status == DLM_FORWARD) |
353 | ret = DLM_NORMAL; | ||
354 | else if (status == DLM_FORWARD) { | ||
355 | mlog(0, "master was in-progress. retry\n"); | 352 | mlog(0, "master was in-progress. retry\n"); |
356 | ret = DLM_FORWARD; | 353 | ret = status; |
357 | } else | ||
358 | ret = status; | ||
359 | lksb->status = status; | ||
360 | } else { | 354 | } else { |
361 | mlog_errno(tmpret); | 355 | mlog_errno(tmpret); |
362 | if (dlm_is_host_down(tmpret)) { | 356 | if (dlm_is_host_down(tmpret)) { |
@@ -372,7 +366,6 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm, | |||
372 | /* something bad. this will BUG in ocfs2 */ | 366 | /* something bad. this will BUG in ocfs2 */ |
373 | ret = dlm_err_to_dlm_status(tmpret); | 367 | ret = dlm_err_to_dlm_status(tmpret); |
374 | } | 368 | } |
375 | lksb->status = ret; | ||
376 | } | 369 | } |
377 | 370 | ||
378 | return ret; | 371 | return ret; |
@@ -483,6 +476,10 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data) | |||
483 | 476 | ||
484 | /* lock was found on queue */ | 477 | /* lock was found on queue */ |
485 | lksb = lock->lksb; | 478 | lksb = lock->lksb; |
479 | if (flags & (LKM_VALBLK|LKM_PUT_LVB) && | ||
480 | lock->ml.type != LKM_EXMODE) | ||
481 | flags &= ~(LKM_VALBLK|LKM_PUT_LVB); | ||
482 | |||
486 | /* unlockast only called on originating node */ | 483 | /* unlockast only called on originating node */ |
487 | if (flags & LKM_PUT_LVB) { | 484 | if (flags & LKM_PUT_LVB) { |
488 | lksb->flags |= DLM_LKSB_PUT_LVB; | 485 | lksb->flags |= DLM_LKSB_PUT_LVB; |
@@ -507,11 +504,8 @@ not_found: | |||
507 | "cookie=%u:%llu\n", | 504 | "cookie=%u:%llu\n", |
508 | dlm_get_lock_cookie_node(unlock->cookie), | 505 | dlm_get_lock_cookie_node(unlock->cookie), |
509 | dlm_get_lock_cookie_seq(unlock->cookie)); | 506 | dlm_get_lock_cookie_seq(unlock->cookie)); |
510 | else { | 507 | else |
511 | /* send the lksb->status back to the other node */ | ||
512 | status = lksb->status; | ||
513 | dlm_lock_put(lock); | 508 | dlm_lock_put(lock); |
514 | } | ||
515 | 509 | ||
516 | leave: | 510 | leave: |
517 | if (res) | 511 | if (res) |
@@ -533,26 +527,22 @@ static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm, | |||
533 | 527 | ||
534 | if (dlm_lock_on_list(&res->blocked, lock)) { | 528 | if (dlm_lock_on_list(&res->blocked, lock)) { |
535 | /* cancel this outright */ | 529 | /* cancel this outright */ |
536 | lksb->status = DLM_NORMAL; | ||
537 | status = DLM_NORMAL; | 530 | status = DLM_NORMAL; |
538 | *actions = (DLM_UNLOCK_CALL_AST | | 531 | *actions = (DLM_UNLOCK_CALL_AST | |
539 | DLM_UNLOCK_REMOVE_LOCK); | 532 | DLM_UNLOCK_REMOVE_LOCK); |
540 | } else if (dlm_lock_on_list(&res->converting, lock)) { | 533 | } else if (dlm_lock_on_list(&res->converting, lock)) { |
541 | /* cancel the request, put back on granted */ | 534 | /* cancel the request, put back on granted */ |
542 | lksb->status = DLM_NORMAL; | ||
543 | status = DLM_NORMAL; | 535 | status = DLM_NORMAL; |
544 | *actions = (DLM_UNLOCK_CALL_AST | | 536 | *actions = (DLM_UNLOCK_CALL_AST | |
545 | DLM_UNLOCK_REMOVE_LOCK | | 537 | DLM_UNLOCK_REMOVE_LOCK | |
546 | DLM_UNLOCK_REGRANT_LOCK | | 538 | DLM_UNLOCK_REGRANT_LOCK | |
547 | DLM_UNLOCK_CLEAR_CONVERT_TYPE); | 539 | DLM_UNLOCK_CLEAR_CONVERT_TYPE); |
548 | } else if (dlm_lock_on_list(&res->granted, lock)) { | 540 | } else if (dlm_lock_on_list(&res->granted, lock)) { |
549 | /* too late, already granted. DLM_CANCELGRANT */ | 541 | /* too late, already granted. */ |
550 | lksb->status = DLM_CANCELGRANT; | 542 | status = DLM_CANCELGRANT; |
551 | status = DLM_NORMAL; | ||
552 | *actions = DLM_UNLOCK_CALL_AST; | 543 | *actions = DLM_UNLOCK_CALL_AST; |
553 | } else { | 544 | } else { |
554 | mlog(ML_ERROR, "lock to cancel is not on any list!\n"); | 545 | mlog(ML_ERROR, "lock to cancel is not on any list!\n"); |
555 | lksb->status = DLM_IVLOCKID; | ||
556 | status = DLM_IVLOCKID; | 546 | status = DLM_IVLOCKID; |
557 | *actions = 0; | 547 | *actions = 0; |
558 | } | 548 | } |
@@ -569,13 +559,11 @@ static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm, | |||
569 | 559 | ||
570 | /* unlock request */ | 560 | /* unlock request */ |
571 | if (!dlm_lock_on_list(&res->granted, lock)) { | 561 | if (!dlm_lock_on_list(&res->granted, lock)) { |
572 | lksb->status = DLM_DENIED; | ||
573 | status = DLM_DENIED; | 562 | status = DLM_DENIED; |
574 | dlm_error(status); | 563 | dlm_error(status); |
575 | *actions = 0; | 564 | *actions = 0; |
576 | } else { | 565 | } else { |
577 | /* unlock granted lock */ | 566 | /* unlock granted lock */ |
578 | lksb->status = DLM_NORMAL; | ||
579 | status = DLM_NORMAL; | 567 | status = DLM_NORMAL; |
580 | *actions = (DLM_UNLOCK_FREE_LOCK | | 568 | *actions = (DLM_UNLOCK_FREE_LOCK | |
581 | DLM_UNLOCK_CALL_AST | | 569 | DLM_UNLOCK_CALL_AST | |
@@ -632,6 +620,8 @@ retry: | |||
632 | 620 | ||
633 | spin_lock(&res->spinlock); | 621 | spin_lock(&res->spinlock); |
634 | is_master = (res->owner == dlm->node_num); | 622 | is_master = (res->owner == dlm->node_num); |
623 | if (flags & LKM_VALBLK && lock->ml.type != LKM_EXMODE) | ||
624 | flags &= ~LKM_VALBLK; | ||
635 | spin_unlock(&res->spinlock); | 625 | spin_unlock(&res->spinlock); |
636 | 626 | ||
637 | if (is_master) { | 627 | if (is_master) { |
@@ -665,7 +655,7 @@ retry: | |||
665 | } | 655 | } |
666 | 656 | ||
667 | if (call_ast) { | 657 | if (call_ast) { |
668 | mlog(0, "calling unlockast(%p, %d)\n", data, lksb->status); | 658 | mlog(0, "calling unlockast(%p, %d)\n", data, status); |
669 | if (is_master) { | 659 | if (is_master) { |
670 | /* it is possible that there is one last bast | 660 | /* it is possible that there is one last bast |
671 | * pending. make sure it is flushed, then | 661 | * pending. make sure it is flushed, then |
@@ -677,9 +667,12 @@ retry: | |||
677 | wait_event(dlm->ast_wq, | 667 | wait_event(dlm->ast_wq, |
678 | dlm_lock_basts_flushed(dlm, lock)); | 668 | dlm_lock_basts_flushed(dlm, lock)); |
679 | } | 669 | } |
680 | (*unlockast)(data, lksb->status); | 670 | (*unlockast)(data, status); |
681 | } | 671 | } |
682 | 672 | ||
673 | if (status == DLM_CANCELGRANT) | ||
674 | status = DLM_NORMAL; | ||
675 | |||
683 | if (status == DLM_NORMAL) { | 676 | if (status == DLM_NORMAL) { |
684 | mlog(0, "kicking the thread\n"); | 677 | mlog(0, "kicking the thread\n"); |
685 | dlm_kick_thread(dlm, res); | 678 | dlm_kick_thread(dlm, res); |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 0d1973ea32b0..1f17a4d08287 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -840,6 +840,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
840 | 840 | ||
841 | mlog(0, "Allocating %u clusters for a new window.\n", | 841 | mlog(0, "Allocating %u clusters for a new window.\n", |
842 | ocfs2_local_alloc_window_bits(osb)); | 842 | ocfs2_local_alloc_window_bits(osb)); |
843 | |||
844 | /* Instruct the allocation code to try the most recently used | ||
845 | * cluster group. We'll re-record the group used this pass | ||
846 | * below. */ | ||
847 | ac->ac_last_group = osb->la_last_gd; | ||
848 | |||
843 | /* we used the generic suballoc reserve function, but we set | 849 | /* we used the generic suballoc reserve function, but we set |
844 | * everything up nicely, so there's no reason why we can't use | 850 | * everything up nicely, so there's no reason why we can't use |
845 | * the more specific cluster api to claim bits. */ | 851 | * the more specific cluster api to claim bits. */ |
@@ -852,6 +858,8 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
852 | goto bail; | 858 | goto bail; |
853 | } | 859 | } |
854 | 860 | ||
861 | osb->la_last_gd = ac->ac_last_group; | ||
862 | |||
855 | la->la_bm_off = cpu_to_le32(cluster_off); | 863 | la->la_bm_off = cpu_to_le32(cluster_off); |
856 | alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); | 864 | alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); |
857 | /* just in case... In the future when we find space ourselves, | 865 | /* just in case... In the future when we find space ourselves, |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index cd4a6f253d13..0462a7f4e21b 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -197,7 +197,6 @@ struct ocfs2_super | |||
197 | struct ocfs2_node_map recovery_map; | 197 | struct ocfs2_node_map recovery_map; |
198 | struct ocfs2_node_map umount_map; | 198 | struct ocfs2_node_map umount_map; |
199 | 199 | ||
200 | u32 num_clusters; | ||
201 | u64 root_blkno; | 200 | u64 root_blkno; |
202 | u64 system_dir_blkno; | 201 | u64 system_dir_blkno; |
203 | u64 bitmap_blkno; | 202 | u64 bitmap_blkno; |
@@ -237,6 +236,7 @@ struct ocfs2_super | |||
237 | 236 | ||
238 | enum ocfs2_local_alloc_state local_alloc_state; | 237 | enum ocfs2_local_alloc_state local_alloc_state; |
239 | struct buffer_head *local_alloc_bh; | 238 | struct buffer_head *local_alloc_bh; |
239 | u64 la_last_gd; | ||
240 | 240 | ||
241 | /* Next two fields are for local node slot recovery during | 241 | /* Next two fields are for local node slot recovery during |
242 | * mount. */ | 242 | * mount. */ |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 195523090c87..9d91e66f51a9 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -70,12 +70,6 @@ static int ocfs2_block_group_search(struct inode *inode, | |||
70 | struct buffer_head *group_bh, | 70 | struct buffer_head *group_bh, |
71 | u32 bits_wanted, u32 min_bits, | 71 | u32 bits_wanted, u32 min_bits, |
72 | u16 *bit_off, u16 *bits_found); | 72 | u16 *bit_off, u16 *bits_found); |
73 | static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | ||
74 | u32 bits_wanted, | ||
75 | u32 min_bits, | ||
76 | u16 *bit_off, | ||
77 | unsigned int *num_bits, | ||
78 | u64 *bg_blkno); | ||
79 | static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, | 73 | static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, |
80 | struct ocfs2_alloc_context *ac, | 74 | struct ocfs2_alloc_context *ac, |
81 | u32 bits_wanted, | 75 | u32 bits_wanted, |
@@ -85,11 +79,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, | |||
85 | u64 *bg_blkno); | 79 | u64 *bg_blkno); |
86 | static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, | 80 | static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, |
87 | int nr); | 81 | int nr); |
88 | static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, | ||
89 | struct buffer_head *bg_bh, | ||
90 | unsigned int bits_wanted, | ||
91 | u16 *bit_off, | ||
92 | u16 *bits_found); | ||
93 | static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, | 82 | static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, |
94 | struct inode *alloc_inode, | 83 | struct inode *alloc_inode, |
95 | struct ocfs2_group_desc *bg, | 84 | struct ocfs2_group_desc *bg, |
@@ -143,6 +132,64 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) | |||
143 | return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); | 132 | return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); |
144 | } | 133 | } |
145 | 134 | ||
135 | /* somewhat more expensive than our other checks, so use sparingly. */ | ||
136 | static int ocfs2_check_group_descriptor(struct super_block *sb, | ||
137 | struct ocfs2_dinode *di, | ||
138 | struct ocfs2_group_desc *gd) | ||
139 | { | ||
140 | unsigned int max_bits; | ||
141 | |||
142 | if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { | ||
143 | OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd); | ||
144 | return -EIO; | ||
145 | } | ||
146 | |||
147 | if (di->i_blkno != gd->bg_parent_dinode) { | ||
148 | ocfs2_error(sb, "Group descriptor # %llu has bad parent " | ||
149 | "pointer (%llu, expected %llu)", | ||
150 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
151 | (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), | ||
152 | (unsigned long long)le64_to_cpu(di->i_blkno)); | ||
153 | return -EIO; | ||
154 | } | ||
155 | |||
156 | max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); | ||
157 | if (le16_to_cpu(gd->bg_bits) > max_bits) { | ||
158 | ocfs2_error(sb, "Group descriptor # %llu has bit count of %u", | ||
159 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
160 | le16_to_cpu(gd->bg_bits)); | ||
161 | return -EIO; | ||
162 | } | ||
163 | |||
164 | if (le16_to_cpu(gd->bg_chain) >= | ||
165 | le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { | ||
166 | ocfs2_error(sb, "Group descriptor # %llu has bad chain %u", | ||
167 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
168 | le16_to_cpu(gd->bg_chain)); | ||
169 | return -EIO; | ||
170 | } | ||
171 | |||
172 | if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { | ||
173 | ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " | ||
174 | "claims that %u are free", | ||
175 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
176 | le16_to_cpu(gd->bg_bits), | ||
177 | le16_to_cpu(gd->bg_free_bits_count)); | ||
178 | return -EIO; | ||
179 | } | ||
180 | |||
181 | if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { | ||
182 | ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " | ||
183 | "max bitmap bits of %u", | ||
184 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
185 | le16_to_cpu(gd->bg_bits), | ||
186 | 8 * le16_to_cpu(gd->bg_size)); | ||
187 | return -EIO; | ||
188 | } | ||
189 | |||
190 | return 0; | ||
191 | } | ||
192 | |||
146 | static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, | 193 | static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, |
147 | struct inode *alloc_inode, | 194 | struct inode *alloc_inode, |
148 | struct buffer_head *bg_bh, | 195 | struct buffer_head *bg_bh, |
@@ -663,6 +710,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, | |||
663 | static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, | 710 | static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, |
664 | struct buffer_head *bg_bh, | 711 | struct buffer_head *bg_bh, |
665 | unsigned int bits_wanted, | 712 | unsigned int bits_wanted, |
713 | unsigned int total_bits, | ||
666 | u16 *bit_off, | 714 | u16 *bit_off, |
667 | u16 *bits_found) | 715 | u16 *bits_found) |
668 | { | 716 | { |
@@ -679,10 +727,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, | |||
679 | found = start = best_offset = best_size = 0; | 727 | found = start = best_offset = best_size = 0; |
680 | bitmap = bg->bg_bitmap; | 728 | bitmap = bg->bg_bitmap; |
681 | 729 | ||
682 | while((offset = ocfs2_find_next_zero_bit(bitmap, | 730 | while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) { |
683 | le16_to_cpu(bg->bg_bits), | 731 | if (offset == total_bits) |
684 | start)) != -1) { | ||
685 | if (offset == le16_to_cpu(bg->bg_bits)) | ||
686 | break; | 732 | break; |
687 | 733 | ||
688 | if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) { | 734 | if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) { |
@@ -911,14 +957,35 @@ static int ocfs2_cluster_group_search(struct inode *inode, | |||
911 | { | 957 | { |
912 | int search = -ENOSPC; | 958 | int search = -ENOSPC; |
913 | int ret; | 959 | int ret; |
914 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; | 960 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; |
915 | u16 tmp_off, tmp_found; | 961 | u16 tmp_off, tmp_found; |
962 | unsigned int max_bits, gd_cluster_off; | ||
916 | 963 | ||
917 | BUG_ON(!ocfs2_is_cluster_bitmap(inode)); | 964 | BUG_ON(!ocfs2_is_cluster_bitmap(inode)); |
918 | 965 | ||
919 | if (bg->bg_free_bits_count) { | 966 | if (gd->bg_free_bits_count) { |
967 | max_bits = le16_to_cpu(gd->bg_bits); | ||
968 | |||
969 | /* Tail groups in cluster bitmaps which aren't cpg | ||
970 | * aligned are prone to partial extention by a failed | ||
971 | * fs resize. If the file system resize never got to | ||
972 | * update the dinode cluster count, then we don't want | ||
973 | * to trust any clusters past it, regardless of what | ||
974 | * the group descriptor says. */ | ||
975 | gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb, | ||
976 | le64_to_cpu(gd->bg_blkno)); | ||
977 | if ((gd_cluster_off + max_bits) > | ||
978 | OCFS2_I(inode)->ip_clusters) { | ||
979 | max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off; | ||
980 | mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n", | ||
981 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
982 | le16_to_cpu(gd->bg_bits), | ||
983 | OCFS2_I(inode)->ip_clusters, max_bits); | ||
984 | } | ||
985 | |||
920 | ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), | 986 | ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), |
921 | group_bh, bits_wanted, | 987 | group_bh, bits_wanted, |
988 | max_bits, | ||
922 | &tmp_off, &tmp_found); | 989 | &tmp_off, &tmp_found); |
923 | if (ret) | 990 | if (ret) |
924 | return ret; | 991 | return ret; |
@@ -951,17 +1018,109 @@ static int ocfs2_block_group_search(struct inode *inode, | |||
951 | if (bg->bg_free_bits_count) | 1018 | if (bg->bg_free_bits_count) |
952 | ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), | 1019 | ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), |
953 | group_bh, bits_wanted, | 1020 | group_bh, bits_wanted, |
1021 | le16_to_cpu(bg->bg_bits), | ||
954 | bit_off, bits_found); | 1022 | bit_off, bits_found); |
955 | 1023 | ||
956 | return ret; | 1024 | return ret; |
957 | } | 1025 | } |
958 | 1026 | ||
1027 | static int ocfs2_alloc_dinode_update_counts(struct inode *inode, | ||
1028 | struct ocfs2_journal_handle *handle, | ||
1029 | struct buffer_head *di_bh, | ||
1030 | u32 num_bits, | ||
1031 | u16 chain) | ||
1032 | { | ||
1033 | int ret; | ||
1034 | u32 tmp_used; | ||
1035 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; | ||
1036 | struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain; | ||
1037 | |||
1038 | ret = ocfs2_journal_access(handle, inode, di_bh, | ||
1039 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1040 | if (ret < 0) { | ||
1041 | mlog_errno(ret); | ||
1042 | goto out; | ||
1043 | } | ||
1044 | |||
1045 | tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); | ||
1046 | di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); | ||
1047 | le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); | ||
1048 | |||
1049 | ret = ocfs2_journal_dirty(handle, di_bh); | ||
1050 | if (ret < 0) | ||
1051 | mlog_errno(ret); | ||
1052 | |||
1053 | out: | ||
1054 | return ret; | ||
1055 | } | ||
1056 | |||
1057 | static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, | ||
1058 | u32 bits_wanted, | ||
1059 | u32 min_bits, | ||
1060 | u16 *bit_off, | ||
1061 | unsigned int *num_bits, | ||
1062 | u64 gd_blkno, | ||
1063 | u16 *bits_left) | ||
1064 | { | ||
1065 | int ret; | ||
1066 | u16 found; | ||
1067 | struct buffer_head *group_bh = NULL; | ||
1068 | struct ocfs2_group_desc *gd; | ||
1069 | struct inode *alloc_inode = ac->ac_inode; | ||
1070 | struct ocfs2_journal_handle *handle = ac->ac_handle; | ||
1071 | |||
1072 | ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno, | ||
1073 | &group_bh, OCFS2_BH_CACHED, alloc_inode); | ||
1074 | if (ret < 0) { | ||
1075 | mlog_errno(ret); | ||
1076 | return ret; | ||
1077 | } | ||
1078 | |||
1079 | gd = (struct ocfs2_group_desc *) group_bh->b_data; | ||
1080 | if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { | ||
1081 | OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd); | ||
1082 | ret = -EIO; | ||
1083 | goto out; | ||
1084 | } | ||
1085 | |||
1086 | ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, | ||
1087 | bit_off, &found); | ||
1088 | if (ret < 0) { | ||
1089 | if (ret != -ENOSPC) | ||
1090 | mlog_errno(ret); | ||
1091 | goto out; | ||
1092 | } | ||
1093 | |||
1094 | *num_bits = found; | ||
1095 | |||
1096 | ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, | ||
1097 | *num_bits, | ||
1098 | le16_to_cpu(gd->bg_chain)); | ||
1099 | if (ret < 0) { | ||
1100 | mlog_errno(ret); | ||
1101 | goto out; | ||
1102 | } | ||
1103 | |||
1104 | ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, | ||
1105 | *bit_off, *num_bits); | ||
1106 | if (ret < 0) | ||
1107 | mlog_errno(ret); | ||
1108 | |||
1109 | *bits_left = le16_to_cpu(gd->bg_free_bits_count); | ||
1110 | |||
1111 | out: | ||
1112 | brelse(group_bh); | ||
1113 | |||
1114 | return ret; | ||
1115 | } | ||
1116 | |||
959 | static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | 1117 | static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, |
960 | u32 bits_wanted, | 1118 | u32 bits_wanted, |
961 | u32 min_bits, | 1119 | u32 min_bits, |
962 | u16 *bit_off, | 1120 | u16 *bit_off, |
963 | unsigned int *num_bits, | 1121 | unsigned int *num_bits, |
964 | u64 *bg_blkno) | 1122 | u64 *bg_blkno, |
1123 | u16 *bits_left) | ||
965 | { | 1124 | { |
966 | int status; | 1125 | int status; |
967 | u16 chain, tmp_bits; | 1126 | u16 chain, tmp_bits; |
@@ -988,9 +1147,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
988 | goto bail; | 1147 | goto bail; |
989 | } | 1148 | } |
990 | bg = (struct ocfs2_group_desc *) group_bh->b_data; | 1149 | bg = (struct ocfs2_group_desc *) group_bh->b_data; |
991 | if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { | 1150 | status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); |
992 | OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); | 1151 | if (status) { |
993 | status = -EIO; | 1152 | mlog_errno(status); |
994 | goto bail; | 1153 | goto bail; |
995 | } | 1154 | } |
996 | 1155 | ||
@@ -1018,9 +1177,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1018 | goto bail; | 1177 | goto bail; |
1019 | } | 1178 | } |
1020 | bg = (struct ocfs2_group_desc *) group_bh->b_data; | 1179 | bg = (struct ocfs2_group_desc *) group_bh->b_data; |
1021 | if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { | 1180 | status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); |
1022 | OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); | 1181 | if (status) { |
1023 | status = -EIO; | 1182 | mlog_errno(status); |
1024 | goto bail; | 1183 | goto bail; |
1025 | } | 1184 | } |
1026 | } | 1185 | } |
@@ -1099,6 +1258,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1099 | (unsigned long long)fe->i_blkno); | 1258 | (unsigned long long)fe->i_blkno); |
1100 | 1259 | ||
1101 | *bg_blkno = le64_to_cpu(bg->bg_blkno); | 1260 | *bg_blkno = le64_to_cpu(bg->bg_blkno); |
1261 | *bits_left = le16_to_cpu(bg->bg_free_bits_count); | ||
1102 | bail: | 1262 | bail: |
1103 | if (group_bh) | 1263 | if (group_bh) |
1104 | brelse(group_bh); | 1264 | brelse(group_bh); |
@@ -1120,6 +1280,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, | |||
1120 | { | 1280 | { |
1121 | int status; | 1281 | int status; |
1122 | u16 victim, i; | 1282 | u16 victim, i; |
1283 | u16 bits_left = 0; | ||
1284 | u64 hint_blkno = ac->ac_last_group; | ||
1123 | struct ocfs2_chain_list *cl; | 1285 | struct ocfs2_chain_list *cl; |
1124 | struct ocfs2_dinode *fe; | 1286 | struct ocfs2_dinode *fe; |
1125 | 1287 | ||
@@ -1146,6 +1308,28 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, | |||
1146 | goto bail; | 1308 | goto bail; |
1147 | } | 1309 | } |
1148 | 1310 | ||
1311 | if (hint_blkno) { | ||
1312 | /* Attempt to short-circuit the usual search mechanism | ||
1313 | * by jumping straight to the most recently used | ||
1314 | * allocation group. This helps us mantain some | ||
1315 | * contiguousness across allocations. */ | ||
1316 | status = ocfs2_search_one_group(ac, bits_wanted, min_bits, | ||
1317 | bit_off, num_bits, | ||
1318 | hint_blkno, &bits_left); | ||
1319 | if (!status) { | ||
1320 | /* Be careful to update *bg_blkno here as the | ||
1321 | * caller is expecting it to be filled in, and | ||
1322 | * ocfs2_search_one_group() won't do that for | ||
1323 | * us. */ | ||
1324 | *bg_blkno = hint_blkno; | ||
1325 | goto set_hint; | ||
1326 | } | ||
1327 | if (status < 0 && status != -ENOSPC) { | ||
1328 | mlog_errno(status); | ||
1329 | goto bail; | ||
1330 | } | ||
1331 | } | ||
1332 | |||
1149 | cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; | 1333 | cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; |
1150 | 1334 | ||
1151 | victim = ocfs2_find_victim_chain(cl); | 1335 | victim = ocfs2_find_victim_chain(cl); |
@@ -1153,9 +1337,9 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, | |||
1153 | ac->ac_allow_chain_relink = 1; | 1337 | ac->ac_allow_chain_relink = 1; |
1154 | 1338 | ||
1155 | status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off, | 1339 | status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off, |
1156 | num_bits, bg_blkno); | 1340 | num_bits, bg_blkno, &bits_left); |
1157 | if (!status) | 1341 | if (!status) |
1158 | goto bail; | 1342 | goto set_hint; |
1159 | if (status < 0 && status != -ENOSPC) { | 1343 | if (status < 0 && status != -ENOSPC) { |
1160 | mlog_errno(status); | 1344 | mlog_errno(status); |
1161 | goto bail; | 1345 | goto bail; |
@@ -1177,8 +1361,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, | |||
1177 | 1361 | ||
1178 | ac->ac_chain = i; | 1362 | ac->ac_chain = i; |
1179 | status = ocfs2_search_chain(ac, bits_wanted, min_bits, | 1363 | status = ocfs2_search_chain(ac, bits_wanted, min_bits, |
1180 | bit_off, num_bits, | 1364 | bit_off, num_bits, bg_blkno, |
1181 | bg_blkno); | 1365 | &bits_left); |
1182 | if (!status) | 1366 | if (!status) |
1183 | break; | 1367 | break; |
1184 | if (status < 0 && status != -ENOSPC) { | 1368 | if (status < 0 && status != -ENOSPC) { |
@@ -1186,8 +1370,19 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, | |||
1186 | goto bail; | 1370 | goto bail; |
1187 | } | 1371 | } |
1188 | } | 1372 | } |
1189 | bail: | ||
1190 | 1373 | ||
1374 | set_hint: | ||
1375 | if (status != -ENOSPC) { | ||
1376 | /* If the next search of this group is not likely to | ||
1377 | * yield a suitable extent, then we reset the last | ||
1378 | * group hint so as to not waste a disk read */ | ||
1379 | if (bits_left < min_bits) | ||
1380 | ac->ac_last_group = 0; | ||
1381 | else | ||
1382 | ac->ac_last_group = *bg_blkno; | ||
1383 | } | ||
1384 | |||
1385 | bail: | ||
1191 | mlog_exit(status); | 1386 | mlog_exit(status); |
1192 | return status; | 1387 | return status; |
1193 | } | 1388 | } |
@@ -1341,7 +1536,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, | |||
1341 | { | 1536 | { |
1342 | int status; | 1537 | int status; |
1343 | unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; | 1538 | unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; |
1344 | u64 bg_blkno; | 1539 | u64 bg_blkno = 0; |
1345 | u16 bg_bit_off; | 1540 | u16 bg_bit_off; |
1346 | 1541 | ||
1347 | mlog_entry_void(); | 1542 | mlog_entry_void(); |
@@ -1494,9 +1689,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle, | |||
1494 | } | 1689 | } |
1495 | 1690 | ||
1496 | group = (struct ocfs2_group_desc *) group_bh->b_data; | 1691 | group = (struct ocfs2_group_desc *) group_bh->b_data; |
1497 | if (!OCFS2_IS_VALID_GROUP_DESC(group)) { | 1692 | status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group); |
1498 | OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, group); | 1693 | if (status) { |
1499 | status = -EIO; | 1694 | mlog_errno(status); |
1500 | goto bail; | 1695 | goto bail; |
1501 | } | 1696 | } |
1502 | BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); | 1697 | BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); |
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index a76c82a7ceac..c787838d1052 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -49,6 +49,8 @@ struct ocfs2_alloc_context { | |||
49 | u16 ac_chain; | 49 | u16 ac_chain; |
50 | int ac_allow_chain_relink; | 50 | int ac_allow_chain_relink; |
51 | group_search_t *ac_group_search; | 51 | group_search_t *ac_group_search; |
52 | |||
53 | u64 ac_last_group; | ||
52 | }; | 54 | }; |
53 | 55 | ||
54 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); | 56 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 382706a67ffd..d17e33e66a1e 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -1442,8 +1442,13 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1442 | 1442 | ||
1443 | osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; | 1443 | osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; |
1444 | 1444 | ||
1445 | /* We don't have a cluster lock on the bitmap here because | ||
1446 | * we're only interested in static information and the extra | ||
1447 | * complexity at mount time isn't worht it. Don't pass the | ||
1448 | * inode in to the read function though as we don't want it to | ||
1449 | * be put in the cache. */ | ||
1445 | status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0, | 1450 | status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0, |
1446 | inode); | 1451 | NULL); |
1447 | iput(inode); | 1452 | iput(inode); |
1448 | if (status < 0) { | 1453 | if (status < 0) { |
1449 | mlog_errno(status); | 1454 | mlog_errno(status); |
@@ -1452,7 +1457,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1452 | 1457 | ||
1453 | di = (struct ocfs2_dinode *) bitmap_bh->b_data; | 1458 | di = (struct ocfs2_dinode *) bitmap_bh->b_data; |
1454 | osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg); | 1459 | osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg); |
1455 | osb->num_clusters = le32_to_cpu(di->id1.bitmap1.i_total); | ||
1456 | brelse(bitmap_bh); | 1460 | brelse(bitmap_bh); |
1457 | mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n", | 1461 | mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n", |
1458 | (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg); | 1462 | (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg); |
diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig index c9a478099281..e478f1941831 100644 --- a/fs/partitions/Kconfig +++ b/fs/partitions/Kconfig | |||
@@ -99,7 +99,7 @@ config IBM_PARTITION | |||
99 | 99 | ||
100 | config MAC_PARTITION | 100 | config MAC_PARTITION |
101 | bool "Macintosh partition map support" if PARTITION_ADVANCED | 101 | bool "Macintosh partition map support" if PARTITION_ADVANCED |
102 | default y if MAC | 102 | default y if (MAC || PPC_PMAC) |
103 | help | 103 | help |
104 | Say Y here if you would like to use hard disks under Linux which | 104 | Say Y here if you would like to use hard disks under Linux which |
105 | were partitioned on a Macintosh. | 105 | were partitioned on a Macintosh. |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 839634026eb5..51c6a748df49 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -339,6 +339,7 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len) | |||
339 | p->start_sect = start; | 339 | p->start_sect = start; |
340 | p->nr_sects = len; | 340 | p->nr_sects = len; |
341 | p->partno = part; | 341 | p->partno = part; |
342 | p->policy = disk->policy; | ||
342 | 343 | ||
343 | if (isdigit(disk->kobj.name[strlen(disk->kobj.name)-1])) | 344 | if (isdigit(disk->kobj.name[strlen(disk->kobj.name)-1])) |
344 | snprintf(p->kobj.name,KOBJ_NAME_LEN,"%sp%d",disk->kobj.name,part); | 345 | snprintf(p->kobj.name,KOBJ_NAME_LEN,"%sp%d",disk->kobj.name,part); |
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c index abe91ca03edf..0a5927c806ca 100644 --- a/fs/partitions/sun.c +++ b/fs/partitions/sun.c | |||
@@ -74,7 +74,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev) | |||
74 | spc = be16_to_cpu(label->ntrks) * be16_to_cpu(label->nsect); | 74 | spc = be16_to_cpu(label->ntrks) * be16_to_cpu(label->nsect); |
75 | for (i = 0; i < 8; i++, p++) { | 75 | for (i = 0; i < 8; i++, p++) { |
76 | unsigned long st_sector; | 76 | unsigned long st_sector; |
77 | int num_sectors; | 77 | unsigned int num_sectors; |
78 | 78 | ||
79 | st_sector = be32_to_cpu(p->start_cylinder) * spc; | 79 | st_sector = be32_to_cpu(p->start_cylinder) * spc; |
80 | num_sectors = be32_to_cpu(p->num_sectors); | 80 | num_sectors = be32_to_cpu(p->num_sectors); |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 7495d3e20775..0b615d62a159 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -74,6 +74,7 @@ | |||
74 | #include <linux/times.h> | 74 | #include <linux/times.h> |
75 | #include <linux/cpuset.h> | 75 | #include <linux/cpuset.h> |
76 | #include <linux/rcupdate.h> | 76 | #include <linux/rcupdate.h> |
77 | #include <linux/delayacct.h> | ||
77 | 78 | ||
78 | #include <asm/uaccess.h> | 79 | #include <asm/uaccess.h> |
79 | #include <asm/pgtable.h> | 80 | #include <asm/pgtable.h> |
@@ -411,7 +412,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
411 | 412 | ||
412 | res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ | 413 | res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ |
413 | %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ | 414 | %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ |
414 | %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n", | 415 | %lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu %llu\n", |
415 | task->pid, | 416 | task->pid, |
416 | tcomm, | 417 | tcomm, |
417 | state, | 418 | state, |
@@ -455,7 +456,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) | |||
455 | task->exit_signal, | 456 | task->exit_signal, |
456 | task_cpu(task), | 457 | task_cpu(task), |
457 | task->rt_priority, | 458 | task->rt_priority, |
458 | task->policy); | 459 | task->policy, |
460 | (unsigned long long)delayacct_blkio_ticks(task)); | ||
459 | if(mm) | 461 | if(mm) |
460 | mmput(mm); | 462 | mmput(mm); |
461 | return res; | 463 | return res; |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 243a94af0427..fe8d55fb17cc 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -551,6 +551,27 @@ static int proc_fd_access_allowed(struct inode *inode) | |||
551 | return allowed; | 551 | return allowed; |
552 | } | 552 | } |
553 | 553 | ||
554 | static int proc_setattr(struct dentry *dentry, struct iattr *attr) | ||
555 | { | ||
556 | int error; | ||
557 | struct inode *inode = dentry->d_inode; | ||
558 | |||
559 | if (attr->ia_valid & ATTR_MODE) | ||
560 | return -EPERM; | ||
561 | |||
562 | error = inode_change_ok(inode, attr); | ||
563 | if (!error) { | ||
564 | error = security_inode_setattr(dentry, attr); | ||
565 | if (!error) | ||
566 | error = inode_setattr(inode, attr); | ||
567 | } | ||
568 | return error; | ||
569 | } | ||
570 | |||
571 | static struct inode_operations proc_def_inode_operations = { | ||
572 | .setattr = proc_setattr, | ||
573 | }; | ||
574 | |||
554 | extern struct seq_operations mounts_op; | 575 | extern struct seq_operations mounts_op; |
555 | struct proc_mounts { | 576 | struct proc_mounts { |
556 | struct seq_file m; | 577 | struct seq_file m; |
@@ -1111,7 +1132,8 @@ out: | |||
1111 | 1132 | ||
1112 | static struct inode_operations proc_pid_link_inode_operations = { | 1133 | static struct inode_operations proc_pid_link_inode_operations = { |
1113 | .readlink = proc_pid_readlink, | 1134 | .readlink = proc_pid_readlink, |
1114 | .follow_link = proc_pid_follow_link | 1135 | .follow_link = proc_pid_follow_link, |
1136 | .setattr = proc_setattr, | ||
1115 | }; | 1137 | }; |
1116 | 1138 | ||
1117 | static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) | 1139 | static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) |
@@ -1285,6 +1307,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st | |||
1285 | ei = PROC_I(inode); | 1307 | ei = PROC_I(inode); |
1286 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 1308 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
1287 | inode->i_ino = fake_ino(task->pid, ino); | 1309 | inode->i_ino = fake_ino(task->pid, ino); |
1310 | inode->i_op = &proc_def_inode_operations; | ||
1288 | 1311 | ||
1289 | /* | 1312 | /* |
1290 | * grab the reference to task. | 1313 | * grab the reference to task. |
@@ -1339,6 +1362,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1339 | inode->i_uid = 0; | 1362 | inode->i_uid = 0; |
1340 | inode->i_gid = 0; | 1363 | inode->i_gid = 0; |
1341 | } | 1364 | } |
1365 | inode->i_mode &= ~(S_ISUID | S_ISGID); | ||
1342 | security_task_to_inode(task, inode); | 1366 | security_task_to_inode(task, inode); |
1343 | put_task_struct(task); | 1367 | put_task_struct(task); |
1344 | return 1; | 1368 | return 1; |
@@ -1389,6 +1413,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1389 | inode->i_uid = 0; | 1413 | inode->i_uid = 0; |
1390 | inode->i_gid = 0; | 1414 | inode->i_gid = 0; |
1391 | } | 1415 | } |
1416 | inode->i_mode &= ~(S_ISUID | S_ISGID); | ||
1392 | security_task_to_inode(task, inode); | 1417 | security_task_to_inode(task, inode); |
1393 | put_task_struct(task); | 1418 | put_task_struct(task); |
1394 | return 1; | 1419 | return 1; |
@@ -1527,11 +1552,13 @@ static struct file_operations proc_task_operations = { | |||
1527 | */ | 1552 | */ |
1528 | static struct inode_operations proc_fd_inode_operations = { | 1553 | static struct inode_operations proc_fd_inode_operations = { |
1529 | .lookup = proc_lookupfd, | 1554 | .lookup = proc_lookupfd, |
1555 | .setattr = proc_setattr, | ||
1530 | }; | 1556 | }; |
1531 | 1557 | ||
1532 | static struct inode_operations proc_task_inode_operations = { | 1558 | static struct inode_operations proc_task_inode_operations = { |
1533 | .lookup = proc_task_lookup, | 1559 | .lookup = proc_task_lookup, |
1534 | .getattr = proc_task_getattr, | 1560 | .getattr = proc_task_getattr, |
1561 | .setattr = proc_setattr, | ||
1535 | }; | 1562 | }; |
1536 | 1563 | ||
1537 | #ifdef CONFIG_SECURITY | 1564 | #ifdef CONFIG_SECURITY |
@@ -1845,11 +1872,13 @@ static struct file_operations proc_tid_base_operations = { | |||
1845 | static struct inode_operations proc_tgid_base_inode_operations = { | 1872 | static struct inode_operations proc_tgid_base_inode_operations = { |
1846 | .lookup = proc_tgid_base_lookup, | 1873 | .lookup = proc_tgid_base_lookup, |
1847 | .getattr = pid_getattr, | 1874 | .getattr = pid_getattr, |
1875 | .setattr = proc_setattr, | ||
1848 | }; | 1876 | }; |
1849 | 1877 | ||
1850 | static struct inode_operations proc_tid_base_inode_operations = { | 1878 | static struct inode_operations proc_tid_base_inode_operations = { |
1851 | .lookup = proc_tid_base_lookup, | 1879 | .lookup = proc_tid_base_lookup, |
1852 | .getattr = pid_getattr, | 1880 | .getattr = pid_getattr, |
1881 | .setattr = proc_setattr, | ||
1853 | }; | 1882 | }; |
1854 | 1883 | ||
1855 | #ifdef CONFIG_SECURITY | 1884 | #ifdef CONFIG_SECURITY |
@@ -1892,11 +1921,13 @@ static struct dentry *proc_tid_attr_lookup(struct inode *dir, | |||
1892 | static struct inode_operations proc_tgid_attr_inode_operations = { | 1921 | static struct inode_operations proc_tgid_attr_inode_operations = { |
1893 | .lookup = proc_tgid_attr_lookup, | 1922 | .lookup = proc_tgid_attr_lookup, |
1894 | .getattr = pid_getattr, | 1923 | .getattr = pid_getattr, |
1924 | .setattr = proc_setattr, | ||
1895 | }; | 1925 | }; |
1896 | 1926 | ||
1897 | static struct inode_operations proc_tid_attr_inode_operations = { | 1927 | static struct inode_operations proc_tid_attr_inode_operations = { |
1898 | .lookup = proc_tid_attr_lookup, | 1928 | .lookup = proc_tid_attr_lookup, |
1899 | .getattr = pid_getattr, | 1929 | .getattr = pid_getattr, |
1930 | .setattr = proc_setattr, | ||
1900 | }; | 1931 | }; |
1901 | #endif | 1932 | #endif |
1902 | 1933 | ||
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 6dcef089e18e..49dfb2ab783e 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -192,7 +192,7 @@ int proc_fill_super(struct super_block *s, void *data, int silent) | |||
192 | { | 192 | { |
193 | struct inode * root_inode; | 193 | struct inode * root_inode; |
194 | 194 | ||
195 | s->s_flags |= MS_NODIRATIME; | 195 | s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; |
196 | s->s_blocksize = 1024; | 196 | s->s_blocksize = 1024; |
197 | s->s_blocksize_bits = 10; | 197 | s->s_blocksize_bits = 10; |
198 | s->s_magic = PROC_SUPER_MAGIC; | 198 | s->s_magic = PROC_SUPER_MAGIC; |
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 036d14d83627..6a984f64edd7 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
@@ -42,8 +42,6 @@ const struct file_operations proc_kcore_operations = { | |||
42 | #define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET) | 42 | #define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET) |
43 | #endif | 43 | #endif |
44 | 44 | ||
45 | #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) | ||
46 | |||
47 | /* An ELF note in memory */ | 45 | /* An ELF note in memory */ |
48 | struct memelfnote | 46 | struct memelfnote |
49 | { | 47 | { |
@@ -384,7 +382,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) | |||
384 | */ | 382 | */ |
385 | if (n) { | 383 | if (n) { |
386 | if (clear_user(buffer + tsz - n, | 384 | if (clear_user(buffer + tsz - n, |
387 | tsz - n)) | 385 | n)) |
388 | return -EFAULT; | 386 | return -EFAULT; |
389 | } | 387 | } |
390 | } else { | 388 | } else { |
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 9f2cfc30f9cf..942156225447 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c | |||
@@ -169,7 +169,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
169 | "Mapped: %8lu kB\n" | 169 | "Mapped: %8lu kB\n" |
170 | "Slab: %8lu kB\n" | 170 | "Slab: %8lu kB\n" |
171 | "PageTables: %8lu kB\n" | 171 | "PageTables: %8lu kB\n" |
172 | "NFS Unstable: %8lu kB\n" | 172 | "NFS_Unstable: %8lu kB\n" |
173 | "Bounce: %8lu kB\n" | 173 | "Bounce: %8lu kB\n" |
174 | "CommitLimit: %8lu kB\n" | 174 | "CommitLimit: %8lu kB\n" |
175 | "Committed_AS: %8lu kB\n" | 175 | "Committed_AS: %8lu kB\n" |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index af69f28277b6..4616ed50ffcd 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
@@ -107,7 +107,7 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount * | |||
107 | { | 107 | { |
108 | struct vm_list_struct *vml; | 108 | struct vm_list_struct *vml; |
109 | struct vm_area_struct *vma; | 109 | struct vm_area_struct *vma; |
110 | struct task_struct *task = proc_task(inode); | 110 | struct task_struct *task = get_proc_task(inode); |
111 | struct mm_struct *mm = get_task_mm(task); | 111 | struct mm_struct *mm = get_task_mm(task); |
112 | int result = -ENOENT; | 112 | int result = -ENOENT; |
113 | 113 | ||
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 99fffc9e1bfd..677139b48e00 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
@@ -283,9 +283,9 @@ unsigned long ramfs_nommu_get_unmapped_area(struct file *file, | |||
283 | 283 | ||
284 | /*****************************************************************************/ | 284 | /*****************************************************************************/ |
285 | /* | 285 | /* |
286 | * set up a mapping | 286 | * set up a mapping for shared memory segments |
287 | */ | 287 | */ |
288 | int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma) | 288 | int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma) |
289 | { | 289 | { |
290 | return 0; | 290 | return vma->vm_flags & VM_SHARED ? 0 : -ENOSYS; |
291 | } | 291 | } |
diff --git a/fs/read_write.c b/fs/read_write.c index 5bc0e9234f9d..d4cb3183c99c 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -436,7 +436,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) | |||
436 | return seg; | 436 | return seg; |
437 | } | 437 | } |
438 | 438 | ||
439 | EXPORT_SYMBOL(iov_shorten); | 439 | EXPORT_UNUSED_SYMBOL(iov_shorten); /* June 2006 */ |
440 | 440 | ||
441 | /* A write operation does a read from user space and vice versa */ | 441 | /* A write operation does a read from user space and vice versa */ |
442 | #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) | 442 | #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) |
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 752cea12e30f..1627edd50810 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -48,8 +48,8 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) | |||
48 | return 0; | 48 | return 0; |
49 | } | 49 | } |
50 | 50 | ||
51 | reiserfs_write_lock(inode->i_sb); | ||
52 | mutex_lock(&inode->i_mutex); | 51 | mutex_lock(&inode->i_mutex); |
52 | reiserfs_write_lock(inode->i_sb); | ||
53 | /* freeing preallocation only involves relogging blocks that | 53 | /* freeing preallocation only involves relogging blocks that |
54 | * are already in the current transaction. preallocation gets | 54 | * are already in the current transaction. preallocation gets |
55 | * freed at the end of each transaction, so it is impossible for | 55 | * freed at the end of each transaction, so it is impossible for |
@@ -860,8 +860,12 @@ static int reiserfs_submit_file_region_for_write(struct reiserfs_transaction_han | |||
860 | // this sets the proper flags for O_SYNC to trigger a commit | 860 | // this sets the proper flags for O_SYNC to trigger a commit |
861 | mark_inode_dirty(inode); | 861 | mark_inode_dirty(inode); |
862 | reiserfs_write_unlock(inode->i_sb); | 862 | reiserfs_write_unlock(inode->i_sb); |
863 | } else | 863 | } else { |
864 | reiserfs_write_lock(inode->i_sb); | ||
865 | reiserfs_update_inode_transaction(inode); | ||
864 | mark_inode_dirty(inode); | 866 | mark_inode_dirty(inode); |
867 | reiserfs_write_unlock(inode->i_sb); | ||
868 | } | ||
865 | 869 | ||
866 | sd_update = 1; | 870 | sd_update = 1; |
867 | } | 871 | } |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 12dfdcfbee3d..52f1e2136546 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -39,14 +39,10 @@ void reiserfs_delete_inode(struct inode *inode) | |||
39 | 39 | ||
40 | /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ | 40 | /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ |
41 | if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ | 41 | if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ |
42 | mutex_lock(&inode->i_mutex); | ||
43 | |||
44 | reiserfs_delete_xattrs(inode); | 42 | reiserfs_delete_xattrs(inode); |
45 | 43 | ||
46 | if (journal_begin(&th, inode->i_sb, jbegin_count)) { | 44 | if (journal_begin(&th, inode->i_sb, jbegin_count)) |
47 | mutex_unlock(&inode->i_mutex); | ||
48 | goto out; | 45 | goto out; |
49 | } | ||
50 | reiserfs_update_inode_transaction(inode); | 46 | reiserfs_update_inode_transaction(inode); |
51 | 47 | ||
52 | err = reiserfs_delete_object(&th, inode); | 48 | err = reiserfs_delete_object(&th, inode); |
@@ -57,12 +53,8 @@ void reiserfs_delete_inode(struct inode *inode) | |||
57 | if (!err) | 53 | if (!err) |
58 | DQUOT_FREE_INODE(inode); | 54 | DQUOT_FREE_INODE(inode); |
59 | 55 | ||
60 | if (journal_end(&th, inode->i_sb, jbegin_count)) { | 56 | if (journal_end(&th, inode->i_sb, jbegin_count)) |
61 | mutex_unlock(&inode->i_mutex); | ||
62 | goto out; | 57 | goto out; |
63 | } | ||
64 | |||
65 | mutex_unlock(&inode->i_mutex); | ||
66 | 58 | ||
67 | /* check return value from reiserfs_delete_object after | 59 | /* check return value from reiserfs_delete_object after |
68 | * ending the transaction | 60 | * ending the transaction |
@@ -2348,6 +2340,7 @@ static int reiserfs_write_full_page(struct page *page, | |||
2348 | unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; | 2340 | unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; |
2349 | int error = 0; | 2341 | int error = 0; |
2350 | unsigned long block; | 2342 | unsigned long block; |
2343 | sector_t last_block; | ||
2351 | struct buffer_head *head, *bh; | 2344 | struct buffer_head *head, *bh; |
2352 | int partial = 0; | 2345 | int partial = 0; |
2353 | int nr = 0; | 2346 | int nr = 0; |
@@ -2395,10 +2388,19 @@ static int reiserfs_write_full_page(struct page *page, | |||
2395 | } | 2388 | } |
2396 | bh = head; | 2389 | bh = head; |
2397 | block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); | 2390 | block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); |
2391 | last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; | ||
2398 | /* first map all the buffers, logging any direct items we find */ | 2392 | /* first map all the buffers, logging any direct items we find */ |
2399 | do { | 2393 | do { |
2400 | if ((checked || buffer_dirty(bh)) && (!buffer_mapped(bh) || | 2394 | if (block > last_block) { |
2401 | (buffer_mapped(bh) | 2395 | /* |
2396 | * This can happen when the block size is less than | ||
2397 | * the page size. The corresponding bytes in the page | ||
2398 | * were zero filled above | ||
2399 | */ | ||
2400 | clear_buffer_dirty(bh); | ||
2401 | set_buffer_uptodate(bh); | ||
2402 | } else if ((checked || buffer_dirty(bh)) && | ||
2403 | (!buffer_mapped(bh) || (buffer_mapped(bh) | ||
2402 | && bh->b_blocknr == | 2404 | && bh->b_blocknr == |
2403 | 0))) { | 2405 | 0))) { |
2404 | /* not mapped yet, or it points to a direct item, search | 2406 | /* not mapped yet, or it points to a direct item, search |
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 745c88100895..a986b5e1e288 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c | |||
@@ -116,12 +116,12 @@ static int reiserfs_unpack(struct inode *inode, struct file *filp) | |||
116 | if (REISERFS_I(inode)->i_flags & i_nopack_mask) { | 116 | if (REISERFS_I(inode)->i_flags & i_nopack_mask) { |
117 | return 0; | 117 | return 0; |
118 | } | 118 | } |
119 | reiserfs_write_lock(inode->i_sb); | ||
120 | 119 | ||
121 | /* we need to make sure nobody is changing the file size beneath | 120 | /* we need to make sure nobody is changing the file size beneath |
122 | ** us | 121 | ** us |
123 | */ | 122 | */ |
124 | mutex_lock(&inode->i_mutex); | 123 | mutex_lock(&inode->i_mutex); |
124 | reiserfs_write_lock(inode->i_sb); | ||
125 | 125 | ||
126 | write_from = inode->i_size & (blocksize - 1); | 126 | write_from = inode->i_size & (blocksize - 1); |
127 | /* if we are on a block boundary, we are already unpacked. */ | 127 | /* if we are on a block boundary, we are already unpacked. */ |
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 5d8a8cfebc70..c533ec1bcaec 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c | |||
@@ -492,9 +492,17 @@ static void add_file(struct super_block *sb, char *name, | |||
492 | 492 | ||
493 | int reiserfs_proc_info_init(struct super_block *sb) | 493 | int reiserfs_proc_info_init(struct super_block *sb) |
494 | { | 494 | { |
495 | char b[BDEVNAME_SIZE]; | ||
496 | char *s; | ||
497 | |||
498 | /* Some block devices use /'s */ | ||
499 | strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE); | ||
500 | s = strchr(b, '/'); | ||
501 | if (s) | ||
502 | *s = '!'; | ||
503 | |||
495 | spin_lock_init(&__PINFO(sb).lock); | 504 | spin_lock_init(&__PINFO(sb).lock); |
496 | REISERFS_SB(sb)->procdir = | 505 | REISERFS_SB(sb)->procdir = proc_mkdir(b, proc_info_root); |
497 | proc_mkdir(reiserfs_bdevname(sb), proc_info_root); | ||
498 | if (REISERFS_SB(sb)->procdir) { | 506 | if (REISERFS_SB(sb)->procdir) { |
499 | REISERFS_SB(sb)->procdir->owner = THIS_MODULE; | 507 | REISERFS_SB(sb)->procdir->owner = THIS_MODULE; |
500 | REISERFS_SB(sb)->procdir->data = sb; | 508 | REISERFS_SB(sb)->procdir->data = sb; |
@@ -508,13 +516,22 @@ int reiserfs_proc_info_init(struct super_block *sb) | |||
508 | return 0; | 516 | return 0; |
509 | } | 517 | } |
510 | reiserfs_warning(sb, "reiserfs: cannot create /proc/%s/%s", | 518 | reiserfs_warning(sb, "reiserfs: cannot create /proc/%s/%s", |
511 | proc_info_root_name, reiserfs_bdevname(sb)); | 519 | proc_info_root_name, b); |
512 | return 1; | 520 | return 1; |
513 | } | 521 | } |
514 | 522 | ||
515 | int reiserfs_proc_info_done(struct super_block *sb) | 523 | int reiserfs_proc_info_done(struct super_block *sb) |
516 | { | 524 | { |
517 | struct proc_dir_entry *de = REISERFS_SB(sb)->procdir; | 525 | struct proc_dir_entry *de = REISERFS_SB(sb)->procdir; |
526 | char b[BDEVNAME_SIZE]; | ||
527 | char *s; | ||
528 | |||
529 | /* Some block devices use /'s */ | ||
530 | strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE); | ||
531 | s = strchr(b, '/'); | ||
532 | if (s) | ||
533 | *s = '!'; | ||
534 | |||
518 | if (de) { | 535 | if (de) { |
519 | remove_proc_entry("journal", de); | 536 | remove_proc_entry("journal", de); |
520 | remove_proc_entry("oidmap", de); | 537 | remove_proc_entry("oidmap", de); |
@@ -528,7 +545,7 @@ int reiserfs_proc_info_done(struct super_block *sb) | |||
528 | __PINFO(sb).exiting = 1; | 545 | __PINFO(sb).exiting = 1; |
529 | spin_unlock(&__PINFO(sb).lock); | 546 | spin_unlock(&__PINFO(sb).lock); |
530 | if (proc_info_root) { | 547 | if (proc_info_root) { |
531 | remove_proc_entry(reiserfs_bdevname(sb), proc_info_root); | 548 | remove_proc_entry(b, proc_info_root); |
532 | REISERFS_SB(sb)->procdir = NULL; | 549 | REISERFS_SB(sb)->procdir = NULL; |
533 | } | 550 | } |
534 | return 0; | 551 | return 0; |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 28eb3c886034..5567328f1041 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -2203,7 +2203,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, | |||
2203 | size_t towrite = len; | 2203 | size_t towrite = len; |
2204 | struct buffer_head tmp_bh, *bh; | 2204 | struct buffer_head tmp_bh, *bh; |
2205 | 2205 | ||
2206 | mutex_lock(&inode->i_mutex); | 2206 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); |
2207 | while (towrite > 0) { | 2207 | while (towrite > 0) { |
2208 | tocopy = sb->s_blocksize - offset < towrite ? | 2208 | tocopy = sb->s_blocksize - offset < towrite ? |
2209 | sb->s_blocksize - offset : towrite; | 2209 | sb->s_blocksize - offset : towrite; |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 39fedaa88a0c..d935fb9394e3 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -424,7 +424,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf) | |||
424 | int res = -ENOTDIR; | 424 | int res = -ENOTDIR; |
425 | if (!file->f_op || !file->f_op->readdir) | 425 | if (!file->f_op || !file->f_op->readdir) |
426 | goto out; | 426 | goto out; |
427 | mutex_lock(&inode->i_mutex); | 427 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR); |
428 | // down(&inode->i_zombie); | 428 | // down(&inode->i_zombie); |
429 | res = -ENOENT; | 429 | res = -ENOENT; |
430 | if (!IS_DEADDIR(inode)) { | 430 | if (!IS_DEADDIR(inode)) { |
diff --git a/fs/splice.c b/fs/splice.c index 05fd2787be98..684bca3d3a10 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -1307,6 +1307,85 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, | |||
1307 | } | 1307 | } |
1308 | 1308 | ||
1309 | /* | 1309 | /* |
1310 | * Make sure there's data to read. Wait for input if we can, otherwise | ||
1311 | * return an appropriate error. | ||
1312 | */ | ||
1313 | static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | ||
1314 | { | ||
1315 | int ret; | ||
1316 | |||
1317 | /* | ||
1318 | * Check ->nrbufs without the inode lock first. This function | ||
1319 | * is speculative anyways, so missing one is ok. | ||
1320 | */ | ||
1321 | if (pipe->nrbufs) | ||
1322 | return 0; | ||
1323 | |||
1324 | ret = 0; | ||
1325 | mutex_lock(&pipe->inode->i_mutex); | ||
1326 | |||
1327 | while (!pipe->nrbufs) { | ||
1328 | if (signal_pending(current)) { | ||
1329 | ret = -ERESTARTSYS; | ||
1330 | break; | ||
1331 | } | ||
1332 | if (!pipe->writers) | ||
1333 | break; | ||
1334 | if (!pipe->waiting_writers) { | ||
1335 | if (flags & SPLICE_F_NONBLOCK) { | ||
1336 | ret = -EAGAIN; | ||
1337 | break; | ||
1338 | } | ||
1339 | } | ||
1340 | pipe_wait(pipe); | ||
1341 | } | ||
1342 | |||
1343 | mutex_unlock(&pipe->inode->i_mutex); | ||
1344 | return ret; | ||
1345 | } | ||
1346 | |||
1347 | /* | ||
1348 | * Make sure there's writeable room. Wait for room if we can, otherwise | ||
1349 | * return an appropriate error. | ||
1350 | */ | ||
1351 | static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | ||
1352 | { | ||
1353 | int ret; | ||
1354 | |||
1355 | /* | ||
1356 | * Check ->nrbufs without the inode lock first. This function | ||
1357 | * is speculative anyways, so missing one is ok. | ||
1358 | */ | ||
1359 | if (pipe->nrbufs < PIPE_BUFFERS) | ||
1360 | return 0; | ||
1361 | |||
1362 | ret = 0; | ||
1363 | mutex_lock(&pipe->inode->i_mutex); | ||
1364 | |||
1365 | while (pipe->nrbufs >= PIPE_BUFFERS) { | ||
1366 | if (!pipe->readers) { | ||
1367 | send_sig(SIGPIPE, current, 0); | ||
1368 | ret = -EPIPE; | ||
1369 | break; | ||
1370 | } | ||
1371 | if (flags & SPLICE_F_NONBLOCK) { | ||
1372 | ret = -EAGAIN; | ||
1373 | break; | ||
1374 | } | ||
1375 | if (signal_pending(current)) { | ||
1376 | ret = -ERESTARTSYS; | ||
1377 | break; | ||
1378 | } | ||
1379 | pipe->waiting_writers++; | ||
1380 | pipe_wait(pipe); | ||
1381 | pipe->waiting_writers--; | ||
1382 | } | ||
1383 | |||
1384 | mutex_unlock(&pipe->inode->i_mutex); | ||
1385 | return ret; | ||
1386 | } | ||
1387 | |||
1388 | /* | ||
1310 | * Link contents of ipipe to opipe. | 1389 | * Link contents of ipipe to opipe. |
1311 | */ | 1390 | */ |
1312 | static int link_pipe(struct pipe_inode_info *ipipe, | 1391 | static int link_pipe(struct pipe_inode_info *ipipe, |
@@ -1314,9 +1393,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1314 | size_t len, unsigned int flags) | 1393 | size_t len, unsigned int flags) |
1315 | { | 1394 | { |
1316 | struct pipe_buffer *ibuf, *obuf; | 1395 | struct pipe_buffer *ibuf, *obuf; |
1317 | int ret, do_wakeup, i, ipipe_first; | 1396 | int ret = 0, i = 0, nbuf; |
1318 | |||
1319 | ret = do_wakeup = ipipe_first = 0; | ||
1320 | 1397 | ||
1321 | /* | 1398 | /* |
1322 | * Potential ABBA deadlock, work around it by ordering lock | 1399 | * Potential ABBA deadlock, work around it by ordering lock |
@@ -1324,126 +1401,62 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1324 | * could deadlock (one doing tee from A -> B, the other from B -> A). | 1401 | * could deadlock (one doing tee from A -> B, the other from B -> A). |
1325 | */ | 1402 | */ |
1326 | if (ipipe->inode < opipe->inode) { | 1403 | if (ipipe->inode < opipe->inode) { |
1327 | ipipe_first = 1; | 1404 | mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT); |
1328 | mutex_lock(&ipipe->inode->i_mutex); | 1405 | mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD); |
1329 | mutex_lock(&opipe->inode->i_mutex); | ||
1330 | } else { | 1406 | } else { |
1331 | mutex_lock(&opipe->inode->i_mutex); | 1407 | mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT); |
1332 | mutex_lock(&ipipe->inode->i_mutex); | 1408 | mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD); |
1333 | } | 1409 | } |
1334 | 1410 | ||
1335 | for (i = 0;; i++) { | 1411 | do { |
1336 | if (!opipe->readers) { | 1412 | if (!opipe->readers) { |
1337 | send_sig(SIGPIPE, current, 0); | 1413 | send_sig(SIGPIPE, current, 0); |
1338 | if (!ret) | 1414 | if (!ret) |
1339 | ret = -EPIPE; | 1415 | ret = -EPIPE; |
1340 | break; | 1416 | break; |
1341 | } | 1417 | } |
1342 | if (ipipe->nrbufs - i) { | ||
1343 | ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); | ||
1344 | 1418 | ||
1345 | /* | 1419 | /* |
1346 | * If we have room, fill this buffer | 1420 | * If we have iterated all input buffers or ran out of |
1347 | */ | 1421 | * output room, break. |
1348 | if (opipe->nrbufs < PIPE_BUFFERS) { | 1422 | */ |
1349 | int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); | 1423 | if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) |
1350 | 1424 | break; | |
1351 | /* | ||
1352 | * Get a reference to this pipe buffer, | ||
1353 | * so we can copy the contents over. | ||
1354 | */ | ||
1355 | ibuf->ops->get(ipipe, ibuf); | ||
1356 | |||
1357 | obuf = opipe->bufs + nbuf; | ||
1358 | *obuf = *ibuf; | ||
1359 | |||
1360 | /* | ||
1361 | * Don't inherit the gift flag, we need to | ||
1362 | * prevent multiple steals of this page. | ||
1363 | */ | ||
1364 | obuf->flags &= ~PIPE_BUF_FLAG_GIFT; | ||
1365 | |||
1366 | if (obuf->len > len) | ||
1367 | obuf->len = len; | ||
1368 | |||
1369 | opipe->nrbufs++; | ||
1370 | do_wakeup = 1; | ||
1371 | ret += obuf->len; | ||
1372 | len -= obuf->len; | ||
1373 | |||
1374 | if (!len) | ||
1375 | break; | ||
1376 | if (opipe->nrbufs < PIPE_BUFFERS) | ||
1377 | continue; | ||
1378 | } | ||
1379 | |||
1380 | /* | ||
1381 | * We have input available, but no output room. | ||
1382 | * If we already copied data, return that. If we | ||
1383 | * need to drop the opipe lock, it must be ordered | ||
1384 | * last to avoid deadlocks. | ||
1385 | */ | ||
1386 | if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) { | ||
1387 | if (!ret) | ||
1388 | ret = -EAGAIN; | ||
1389 | break; | ||
1390 | } | ||
1391 | if (signal_pending(current)) { | ||
1392 | if (!ret) | ||
1393 | ret = -ERESTARTSYS; | ||
1394 | break; | ||
1395 | } | ||
1396 | if (do_wakeup) { | ||
1397 | smp_mb(); | ||
1398 | if (waitqueue_active(&opipe->wait)) | ||
1399 | wake_up_interruptible(&opipe->wait); | ||
1400 | kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); | ||
1401 | do_wakeup = 0; | ||
1402 | } | ||
1403 | 1425 | ||
1404 | opipe->waiting_writers++; | 1426 | ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); |
1405 | pipe_wait(opipe); | 1427 | nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); |
1406 | opipe->waiting_writers--; | ||
1407 | continue; | ||
1408 | } | ||
1409 | 1428 | ||
1410 | /* | 1429 | /* |
1411 | * No input buffers, do the usual checks for available | 1430 | * Get a reference to this pipe buffer, |
1412 | * writers and blocking and wait if necessary | 1431 | * so we can copy the contents over. |
1413 | */ | 1432 | */ |
1414 | if (!ipipe->writers) | 1433 | ibuf->ops->get(ipipe, ibuf); |
1415 | break; | 1434 | |
1416 | if (!ipipe->waiting_writers) { | 1435 | obuf = opipe->bufs + nbuf; |
1417 | if (ret) | 1436 | *obuf = *ibuf; |
1418 | break; | 1437 | |
1419 | } | ||
1420 | /* | 1438 | /* |
1421 | * pipe_wait() drops the ipipe mutex. To avoid deadlocks | 1439 | * Don't inherit the gift flag, we need to |
1422 | * with another process, we can only safely do that if | 1440 | * prevent multiple steals of this page. |
1423 | * the ipipe lock is ordered last. | ||
1424 | */ | 1441 | */ |
1425 | if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) { | 1442 | obuf->flags &= ~PIPE_BUF_FLAG_GIFT; |
1426 | if (!ret) | ||
1427 | ret = -EAGAIN; | ||
1428 | break; | ||
1429 | } | ||
1430 | if (signal_pending(current)) { | ||
1431 | if (!ret) | ||
1432 | ret = -ERESTARTSYS; | ||
1433 | break; | ||
1434 | } | ||
1435 | 1443 | ||
1436 | if (waitqueue_active(&ipipe->wait)) | 1444 | if (obuf->len > len) |
1437 | wake_up_interruptible_sync(&ipipe->wait); | 1445 | obuf->len = len; |
1438 | kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT); | ||
1439 | 1446 | ||
1440 | pipe_wait(ipipe); | 1447 | opipe->nrbufs++; |
1441 | } | 1448 | ret += obuf->len; |
1449 | len -= obuf->len; | ||
1450 | i++; | ||
1451 | } while (len); | ||
1442 | 1452 | ||
1443 | mutex_unlock(&ipipe->inode->i_mutex); | 1453 | mutex_unlock(&ipipe->inode->i_mutex); |
1444 | mutex_unlock(&opipe->inode->i_mutex); | 1454 | mutex_unlock(&opipe->inode->i_mutex); |
1445 | 1455 | ||
1446 | if (do_wakeup) { | 1456 | /* |
1457 | * If we put data in the output pipe, wakeup any potential readers. | ||
1458 | */ | ||
1459 | if (ret > 0) { | ||
1447 | smp_mb(); | 1460 | smp_mb(); |
1448 | if (waitqueue_active(&opipe->wait)) | 1461 | if (waitqueue_active(&opipe->wait)) |
1449 | wake_up_interruptible(&opipe->wait); | 1462 | wake_up_interruptible(&opipe->wait); |
@@ -1464,14 +1477,29 @@ static long do_tee(struct file *in, struct file *out, size_t len, | |||
1464 | { | 1477 | { |
1465 | struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe; | 1478 | struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe; |
1466 | struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe; | 1479 | struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe; |
1480 | int ret = -EINVAL; | ||
1467 | 1481 | ||
1468 | /* | 1482 | /* |
1469 | * Link ipipe to the two output pipes, consuming as we go along. | 1483 | * Duplicate the contents of ipipe to opipe without actually |
1484 | * copying the data. | ||
1470 | */ | 1485 | */ |
1471 | if (ipipe && opipe) | 1486 | if (ipipe && opipe && ipipe != opipe) { |
1472 | return link_pipe(ipipe, opipe, len, flags); | 1487 | /* |
1488 | * Keep going, unless we encounter an error. The ipipe/opipe | ||
1489 | * ordering doesn't really matter. | ||
1490 | */ | ||
1491 | ret = link_ipipe_prep(ipipe, flags); | ||
1492 | if (!ret) { | ||
1493 | ret = link_opipe_prep(opipe, flags); | ||
1494 | if (!ret) { | ||
1495 | ret = link_pipe(ipipe, opipe, len, flags); | ||
1496 | if (!ret && (flags & SPLICE_F_NONBLOCK)) | ||
1497 | ret = -EAGAIN; | ||
1498 | } | ||
1499 | } | ||
1500 | } | ||
1473 | 1501 | ||
1474 | return -EINVAL; | 1502 | return ret; |
1475 | } | 1503 | } |
1476 | 1504 | ||
1477 | asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags) | 1505 | asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags) |
diff --git a/fs/super.c b/fs/super.c index 9b780c42d845..5c4c94d5495e 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -49,11 +49,12 @@ DEFINE_SPINLOCK(sb_lock); | |||
49 | 49 | ||
50 | /** | 50 | /** |
51 | * alloc_super - create new superblock | 51 | * alloc_super - create new superblock |
52 | * @type: filesystem type superblock should belong to | ||
52 | * | 53 | * |
53 | * Allocates and initializes a new &struct super_block. alloc_super() | 54 | * Allocates and initializes a new &struct super_block. alloc_super() |
54 | * returns a pointer new superblock or %NULL if allocation had failed. | 55 | * returns a pointer new superblock or %NULL if allocation had failed. |
55 | */ | 56 | */ |
56 | static struct super_block *alloc_super(void) | 57 | static struct super_block *alloc_super(struct file_system_type *type) |
57 | { | 58 | { |
58 | struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); | 59 | struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); |
59 | static struct super_operations default_op; | 60 | static struct super_operations default_op; |
@@ -72,6 +73,13 @@ static struct super_block *alloc_super(void) | |||
72 | INIT_LIST_HEAD(&s->s_inodes); | 73 | INIT_LIST_HEAD(&s->s_inodes); |
73 | init_rwsem(&s->s_umount); | 74 | init_rwsem(&s->s_umount); |
74 | mutex_init(&s->s_lock); | 75 | mutex_init(&s->s_lock); |
76 | lockdep_set_class(&s->s_umount, &type->s_umount_key); | ||
77 | /* | ||
78 | * The locking rules for s_lock are up to the | ||
79 | * filesystem. For example ext3fs has different | ||
80 | * lock ordering than usbfs: | ||
81 | */ | ||
82 | lockdep_set_class(&s->s_lock, &type->s_lock_key); | ||
75 | down_write(&s->s_umount); | 83 | down_write(&s->s_umount); |
76 | s->s_count = S_BIAS; | 84 | s->s_count = S_BIAS; |
77 | atomic_set(&s->s_active, 1); | 85 | atomic_set(&s->s_active, 1); |
@@ -295,7 +303,7 @@ retry: | |||
295 | } | 303 | } |
296 | if (!s) { | 304 | if (!s) { |
297 | spin_unlock(&sb_lock); | 305 | spin_unlock(&sb_lock); |
298 | s = alloc_super(); | 306 | s = alloc_super(type); |
299 | if (!s) | 307 | if (!s) |
300 | return ERR_PTR(-ENOMEM); | 308 | return ERR_PTR(-ENOMEM); |
301 | goto retry; | 309 | goto retry; |
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 5e0e31cc46f5..9889e54e1f13 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c | |||
@@ -109,6 +109,17 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr) | |||
109 | inode->i_ctime = iattr->ia_ctime; | 109 | inode->i_ctime = iattr->ia_ctime; |
110 | } | 110 | } |
111 | 111 | ||
112 | |||
113 | /* | ||
114 | * sysfs has a different i_mutex lock order behavior for i_mutex than other | ||
115 | * filesystems; sysfs i_mutex is called in many places with subsystem locks | ||
116 | * held. At the same time, many of the VFS locking rules do not apply to | ||
117 | * sysfs at all (cross directory rename for example). To untangle this mess | ||
118 | * (which gives false positives in lockdep), we're giving sysfs inodes their | ||
119 | * own class for i_mutex. | ||
120 | */ | ||
121 | static struct lock_class_key sysfs_inode_imutex_key; | ||
122 | |||
112 | struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) | 123 | struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) |
113 | { | 124 | { |
114 | struct inode * inode = new_inode(sysfs_sb); | 125 | struct inode * inode = new_inode(sysfs_sb); |
@@ -118,6 +129,7 @@ struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) | |||
118 | inode->i_mapping->a_ops = &sysfs_aops; | 129 | inode->i_mapping->a_ops = &sysfs_aops; |
119 | inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; | 130 | inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; |
120 | inode->i_op = &sysfs_inode_operations; | 131 | inode->i_op = &sysfs_inode_operations; |
132 | lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key); | ||
121 | 133 | ||
122 | if (sd->s_iattr) { | 134 | if (sd->s_iattr) { |
123 | /* sysfs_dirent has non-default attributes | 135 | /* sysfs_dirent has non-default attributes |
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 3873c672cb4c..33323473e3c4 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c | |||
@@ -75,6 +75,12 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err) | |||
75 | } | 75 | } |
76 | *err = -ENOSPC; | 76 | *err = -ENOSPC; |
77 | 77 | ||
78 | UDF_I_UNIQUE(inode) = 0; | ||
79 | UDF_I_LENEXTENTS(inode) = 0; | ||
80 | UDF_I_NEXT_ALLOC_BLOCK(inode) = 0; | ||
81 | UDF_I_NEXT_ALLOC_GOAL(inode) = 0; | ||
82 | UDF_I_STRAT4096(inode) = 0; | ||
83 | |||
78 | block = udf_new_block(dir->i_sb, NULL, UDF_I_LOCATION(dir).partitionReferenceNum, | 84 | block = udf_new_block(dir->i_sb, NULL, UDF_I_LOCATION(dir).partitionReferenceNum, |
79 | start, err); | 85 | start, err); |
80 | if (*err) | 86 | if (*err) |
@@ -84,11 +90,6 @@ struct inode * udf_new_inode (struct inode *dir, int mode, int * err) | |||
84 | } | 90 | } |
85 | 91 | ||
86 | mutex_lock(&sbi->s_alloc_mutex); | 92 | mutex_lock(&sbi->s_alloc_mutex); |
87 | UDF_I_UNIQUE(inode) = 0; | ||
88 | UDF_I_LENEXTENTS(inode) = 0; | ||
89 | UDF_I_NEXT_ALLOC_BLOCK(inode) = 0; | ||
90 | UDF_I_NEXT_ALLOC_GOAL(inode) = 0; | ||
91 | UDF_I_STRAT4096(inode) = 0; | ||
92 | if (UDF_SB_LVIDBH(sb)) | 93 | if (UDF_SB_LVIDBH(sb)) |
93 | { | 94 | { |
94 | struct logicalVolHeaderDesc *lvhd; | 95 | struct logicalVolHeaderDesc *lvhd; |
diff --git a/fs/udf/super.c b/fs/udf/super.c index 4df822c881b6..fcce1a21a51b 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -115,6 +115,13 @@ static struct inode *udf_alloc_inode(struct super_block *sb) | |||
115 | ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, SLAB_KERNEL); | 115 | ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, SLAB_KERNEL); |
116 | if (!ei) | 116 | if (!ei) |
117 | return NULL; | 117 | return NULL; |
118 | |||
119 | ei->i_unique = 0; | ||
120 | ei->i_lenExtents = 0; | ||
121 | ei->i_next_alloc_block = 0; | ||
122 | ei->i_next_alloc_goal = 0; | ||
123 | ei->i_strat4096 = 0; | ||
124 | |||
118 | return &ei->vfs_inode; | 125 | return &ei->vfs_inode; |
119 | } | 126 | } |
120 | 127 | ||
@@ -1652,7 +1659,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
1652 | iput(inode); | 1659 | iput(inode); |
1653 | goto error_out; | 1660 | goto error_out; |
1654 | } | 1661 | } |
1655 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 1662 | sb->s_maxbytes = 1<<30; |
1656 | return 0; | 1663 | return 0; |
1657 | 1664 | ||
1658 | error_out: | 1665 | error_out: |
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index e1b0e8cfecb4..0abd66ce36ea 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c | |||
@@ -239,37 +239,51 @@ void udf_truncate_extents(struct inode * inode) | |||
239 | { | 239 | { |
240 | if (offset) | 240 | if (offset) |
241 | { | 241 | { |
242 | extoffset -= adsize; | 242 | /* |
243 | etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1); | 243 | * OK, there is not extent covering inode->i_size and |
244 | if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) | 244 | * no extent above inode->i_size => truncate is |
245 | { | 245 | * extending the file by 'offset'. |
246 | extoffset -= adsize; | 246 | */ |
247 | elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset); | 247 | if ((!bh && extoffset == udf_file_entry_alloc_offset(inode)) || |
248 | udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0); | 248 | (bh && extoffset == sizeof(struct allocExtDesc))) { |
249 | /* File has no extents at all! */ | ||
250 | memset(&eloc, 0x00, sizeof(kernel_lb_addr)); | ||
251 | elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset; | ||
252 | udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1); | ||
249 | } | 253 | } |
250 | else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) | 254 | else { |
251 | { | ||
252 | kernel_lb_addr neloc = { 0, 0 }; | ||
253 | extoffset -= adsize; | 255 | extoffset -= adsize; |
254 | nelen = EXT_NOT_RECORDED_NOT_ALLOCATED | | 256 | etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1); |
255 | ((elen + offset + inode->i_sb->s_blocksize - 1) & | 257 | if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) |
256 | ~(inode->i_sb->s_blocksize - 1)); | 258 | { |
257 | udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1); | 259 | extoffset -= adsize; |
258 | udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1); | 260 | elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset); |
259 | } | 261 | udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0); |
260 | else | 262 | } |
261 | { | 263 | else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) |
262 | if (elen & (inode->i_sb->s_blocksize - 1)) | ||
263 | { | 264 | { |
265 | kernel_lb_addr neloc = { 0, 0 }; | ||
264 | extoffset -= adsize; | 266 | extoffset -= adsize; |
265 | elen = EXT_RECORDED_ALLOCATED | | 267 | nelen = EXT_NOT_RECORDED_NOT_ALLOCATED | |
266 | ((elen + inode->i_sb->s_blocksize - 1) & | 268 | ((elen + offset + inode->i_sb->s_blocksize - 1) & |
267 | ~(inode->i_sb->s_blocksize - 1)); | 269 | ~(inode->i_sb->s_blocksize - 1)); |
268 | udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1); | 270 | udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1); |
271 | udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1); | ||
272 | } | ||
273 | else | ||
274 | { | ||
275 | if (elen & (inode->i_sb->s_blocksize - 1)) | ||
276 | { | ||
277 | extoffset -= adsize; | ||
278 | elen = EXT_RECORDED_ALLOCATED | | ||
279 | ((elen + inode->i_sb->s_blocksize - 1) & | ||
280 | ~(inode->i_sb->s_blocksize - 1)); | ||
281 | udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1); | ||
282 | } | ||
283 | memset(&eloc, 0x00, sizeof(kernel_lb_addr)); | ||
284 | elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset; | ||
285 | udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1); | ||
269 | } | 286 | } |
270 | memset(&eloc, 0x00, sizeof(kernel_lb_addr)); | ||
271 | elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset; | ||
272 | udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1); | ||
273 | } | 287 | } |
274 | } | 288 | } |
275 | } | 289 | } |
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index b01804baa120..b82381475779 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c | |||
@@ -248,7 +248,7 @@ static void ufs_change_blocknr(struct inode *inode, unsigned int baseblk, | |||
248 | 248 | ||
249 | if (likely(cur_index != index)) { | 249 | if (likely(cur_index != index)) { |
250 | page = ufs_get_locked_page(mapping, index); | 250 | page = ufs_get_locked_page(mapping, index); |
251 | if (IS_ERR(page)) | 251 | if (!page || IS_ERR(page)) /* it was truncated or EIO */ |
252 | continue; | 252 | continue; |
253 | } else | 253 | } else |
254 | page = locked_page; | 254 | page = locked_page; |
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index e7c8615beb65..30c6e8a9446c 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c | |||
@@ -169,18 +169,20 @@ static void ufs_clear_frag(struct inode *inode, struct buffer_head *bh) | |||
169 | 169 | ||
170 | static struct buffer_head * | 170 | static struct buffer_head * |
171 | ufs_clear_frags(struct inode *inode, sector_t beg, | 171 | ufs_clear_frags(struct inode *inode, sector_t beg, |
172 | unsigned int n) | 172 | unsigned int n, sector_t want) |
173 | { | 173 | { |
174 | struct buffer_head *res, *bh; | 174 | struct buffer_head *res = NULL, *bh; |
175 | sector_t end = beg + n; | 175 | sector_t end = beg + n; |
176 | 176 | ||
177 | res = sb_getblk(inode->i_sb, beg); | 177 | for (; beg < end; ++beg) { |
178 | ufs_clear_frag(inode, res); | ||
179 | for (++beg; beg < end; ++beg) { | ||
180 | bh = sb_getblk(inode->i_sb, beg); | 178 | bh = sb_getblk(inode->i_sb, beg); |
181 | ufs_clear_frag(inode, bh); | 179 | ufs_clear_frag(inode, bh); |
182 | brelse(bh); | 180 | if (want != beg) |
181 | brelse(bh); | ||
182 | else | ||
183 | res = bh; | ||
183 | } | 184 | } |
185 | BUG_ON(!res); | ||
184 | return res; | 186 | return res; |
185 | } | 187 | } |
186 | 188 | ||
@@ -265,7 +267,9 @@ repeat: | |||
265 | lastfrag = ufsi->i_lastfrag; | 267 | lastfrag = ufsi->i_lastfrag; |
266 | 268 | ||
267 | } | 269 | } |
268 | goal = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]) + uspi->s_fpb; | 270 | tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]); |
271 | if (tmp) | ||
272 | goal = tmp + uspi->s_fpb; | ||
269 | tmp = ufs_new_fragments (inode, p, fragment - blockoff, | 273 | tmp = ufs_new_fragments (inode, p, fragment - blockoff, |
270 | goal, required + blockoff, | 274 | goal, required + blockoff, |
271 | err, locked_page); | 275 | err, locked_page); |
@@ -277,13 +281,15 @@ repeat: | |||
277 | tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff), | 281 | tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff), |
278 | fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff), | 282 | fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff), |
279 | err, locked_page); | 283 | err, locked_page); |
280 | } | 284 | } else /* (lastblock > block) */ { |
281 | /* | 285 | /* |
282 | * We will allocate new block before last allocated block | 286 | * We will allocate new block before last allocated block |
283 | */ | 287 | */ |
284 | else /* (lastblock > block) */ { | 288 | if (block) { |
285 | if (lastblock && (tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock-1]))) | 289 | tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[block-1]); |
286 | goal = tmp + uspi->s_fpb; | 290 | if (tmp) |
291 | goal = tmp + uspi->s_fpb; | ||
292 | } | ||
287 | tmp = ufs_new_fragments(inode, p, fragment - blockoff, | 293 | tmp = ufs_new_fragments(inode, p, fragment - blockoff, |
288 | goal, uspi->s_fpb, err, locked_page); | 294 | goal, uspi->s_fpb, err, locked_page); |
289 | } | 295 | } |
@@ -296,7 +302,7 @@ repeat: | |||
296 | } | 302 | } |
297 | 303 | ||
298 | if (!phys) { | 304 | if (!phys) { |
299 | result = ufs_clear_frags(inode, tmp + blockoff, required); | 305 | result = ufs_clear_frags(inode, tmp, required, tmp + blockoff); |
300 | } else { | 306 | } else { |
301 | *phys = tmp + blockoff; | 307 | *phys = tmp + blockoff; |
302 | result = NULL; | 308 | result = NULL; |
@@ -383,7 +389,7 @@ repeat: | |||
383 | } | 389 | } |
384 | } | 390 | } |
385 | 391 | ||
386 | if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1]) + uspi->s_fpb)) | 392 | if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1]))) |
387 | goal = tmp + uspi->s_fpb; | 393 | goal = tmp + uspi->s_fpb; |
388 | else | 394 | else |
389 | goal = bh->b_blocknr + uspi->s_fpb; | 395 | goal = bh->b_blocknr + uspi->s_fpb; |
@@ -397,7 +403,8 @@ repeat: | |||
397 | 403 | ||
398 | 404 | ||
399 | if (!phys) { | 405 | if (!phys) { |
400 | result = ufs_clear_frags(inode, tmp + blockoff, uspi->s_fpb); | 406 | result = ufs_clear_frags(inode, tmp, uspi->s_fpb, |
407 | tmp + blockoff); | ||
401 | } else { | 408 | } else { |
402 | *phys = tmp + blockoff; | 409 | *phys = tmp + blockoff; |
403 | *new = 1; | 410 | *new = 1; |
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index abd5f23a426d..d344b411e261 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c | |||
@@ -129,7 +129,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, | |||
129 | struct inode * inode; | 129 | struct inode * inode; |
130 | 130 | ||
131 | if (l > sb->s_blocksize) | 131 | if (l > sb->s_blocksize) |
132 | goto out; | 132 | goto out_notlocked; |
133 | 133 | ||
134 | lock_kernel(); | 134 | lock_kernel(); |
135 | inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); | 135 | inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); |
@@ -155,6 +155,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, | |||
155 | err = ufs_add_nondir(dentry, inode); | 155 | err = ufs_add_nondir(dentry, inode); |
156 | out: | 156 | out: |
157 | unlock_kernel(); | 157 | unlock_kernel(); |
158 | out_notlocked: | ||
158 | return err; | 159 | return err; |
159 | 160 | ||
160 | out_fail: | 161 | out_fail: |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 19a99726e58d..992ee0b87cc3 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
@@ -1326,7 +1326,7 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type, | |||
1326 | size_t towrite = len; | 1326 | size_t towrite = len; |
1327 | struct buffer_head *bh; | 1327 | struct buffer_head *bh; |
1328 | 1328 | ||
1329 | mutex_lock(&inode->i_mutex); | 1329 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); |
1330 | while (towrite > 0) { | 1330 | while (towrite > 0) { |
1331 | tocopy = sb->s_blocksize - offset < towrite ? | 1331 | tocopy = sb->s_blocksize - offset < towrite ? |
1332 | sb->s_blocksize - offset : towrite; | 1332 | sb->s_blocksize - offset : towrite; |
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index c9b55872079b..ea11d04c41a0 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c | |||
@@ -375,17 +375,15 @@ static int ufs_alloc_lastblock(struct inode *inode) | |||
375 | int err = 0; | 375 | int err = 0; |
376 | struct address_space *mapping = inode->i_mapping; | 376 | struct address_space *mapping = inode->i_mapping; |
377 | struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi; | 377 | struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi; |
378 | struct ufs_inode_info *ufsi = UFS_I(inode); | ||
379 | unsigned lastfrag, i, end; | 378 | unsigned lastfrag, i, end; |
380 | struct page *lastpage; | 379 | struct page *lastpage; |
381 | struct buffer_head *bh; | 380 | struct buffer_head *bh; |
382 | 381 | ||
383 | lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift; | 382 | lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift; |
384 | 383 | ||
385 | if (!lastfrag) { | 384 | if (!lastfrag) |
386 | ufsi->i_lastfrag = 0; | ||
387 | goto out; | 385 | goto out; |
388 | } | 386 | |
389 | lastfrag--; | 387 | lastfrag--; |
390 | 388 | ||
391 | lastpage = ufs_get_locked_page(mapping, lastfrag >> | 389 | lastpage = ufs_get_locked_page(mapping, lastfrag >> |
@@ -400,25 +398,25 @@ static int ufs_alloc_lastblock(struct inode *inode) | |||
400 | for (i = 0; i < end; ++i) | 398 | for (i = 0; i < end; ++i) |
401 | bh = bh->b_this_page; | 399 | bh = bh->b_this_page; |
402 | 400 | ||
403 | if (!buffer_mapped(bh)) { | 401 | |
404 | err = ufs_getfrag_block(inode, lastfrag, bh, 1); | 402 | err = ufs_getfrag_block(inode, lastfrag, bh, 1); |
405 | 403 | ||
406 | if (unlikely(err)) | 404 | if (unlikely(err)) |
407 | goto out_unlock; | 405 | goto out_unlock; |
408 | 406 | ||
409 | if (buffer_new(bh)) { | 407 | if (buffer_new(bh)) { |
410 | clear_buffer_new(bh); | 408 | clear_buffer_new(bh); |
411 | unmap_underlying_metadata(bh->b_bdev, | 409 | unmap_underlying_metadata(bh->b_bdev, |
412 | bh->b_blocknr); | 410 | bh->b_blocknr); |
413 | /* | 411 | /* |
414 | * we do not zeroize fragment, because of | 412 | * we do not zeroize fragment, because of |
415 | * if it maped to hole, it already contains zeroes | 413 | * if it maped to hole, it already contains zeroes |
416 | */ | 414 | */ |
417 | set_buffer_uptodate(bh); | 415 | set_buffer_uptodate(bh); |
418 | mark_buffer_dirty(bh); | 416 | mark_buffer_dirty(bh); |
419 | set_page_dirty(lastpage); | 417 | set_page_dirty(lastpage); |
420 | } | ||
421 | } | 418 | } |
419 | |||
422 | out_unlock: | 420 | out_unlock: |
423 | ufs_put_locked_page(lastpage); | 421 | ufs_put_locked_page(lastpage); |
424 | out: | 422 | out: |
@@ -440,23 +438,11 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size) | |||
440 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | 438 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) |
441 | return -EPERM; | 439 | return -EPERM; |
442 | 440 | ||
443 | if (inode->i_size > old_i_size) { | 441 | err = ufs_alloc_lastblock(inode); |
444 | /* | ||
445 | * if we expand file we should care about | ||
446 | * allocation of block for last byte first of all | ||
447 | */ | ||
448 | err = ufs_alloc_lastblock(inode); | ||
449 | 442 | ||
450 | if (err) { | 443 | if (err) { |
451 | i_size_write(inode, old_i_size); | 444 | i_size_write(inode, old_i_size); |
452 | goto out; | 445 | goto out; |
453 | } | ||
454 | /* | ||
455 | * go away, because of we expand file, and we do not | ||
456 | * need free blocks, and zeroizes page | ||
457 | */ | ||
458 | lock_kernel(); | ||
459 | goto almost_end; | ||
460 | } | 446 | } |
461 | 447 | ||
462 | block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block); | 448 | block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block); |
@@ -477,21 +463,8 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size) | |||
477 | yield(); | 463 | yield(); |
478 | } | 464 | } |
479 | 465 | ||
480 | if (inode->i_size < old_i_size) { | ||
481 | /* | ||
482 | * now we should have enough space | ||
483 | * to allocate block for last byte | ||
484 | */ | ||
485 | err = ufs_alloc_lastblock(inode); | ||
486 | if (err) | ||
487 | /* | ||
488 | * looks like all the same - we have no space, | ||
489 | * but we truncate file already | ||
490 | */ | ||
491 | inode->i_size = (ufsi->i_lastfrag - 1) * uspi->s_fsize; | ||
492 | } | ||
493 | almost_end: | ||
494 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; | 466 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; |
467 | ufsi->i_lastfrag = DIRECT_FRAGMENT; | ||
495 | unlock_kernel(); | 468 | unlock_kernel(); |
496 | mark_inode_dirty(inode); | 469 | mark_inode_dirty(inode); |
497 | out: | 470 | out: |
diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 337cf2c46d10..22f820a9b15c 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c | |||
@@ -251,12 +251,12 @@ struct page *ufs_get_locked_page(struct address_space *mapping, | |||
251 | { | 251 | { |
252 | struct page *page; | 252 | struct page *page; |
253 | 253 | ||
254 | try_again: | ||
255 | page = find_lock_page(mapping, index); | 254 | page = find_lock_page(mapping, index); |
256 | if (!page) { | 255 | if (!page) { |
257 | page = read_cache_page(mapping, index, | 256 | page = read_cache_page(mapping, index, |
258 | (filler_t*)mapping->a_ops->readpage, | 257 | (filler_t*)mapping->a_ops->readpage, |
259 | NULL); | 258 | NULL); |
259 | |||
260 | if (IS_ERR(page)) { | 260 | if (IS_ERR(page)) { |
261 | printk(KERN_ERR "ufs_change_blocknr: " | 261 | printk(KERN_ERR "ufs_change_blocknr: " |
262 | "read_cache_page error: ino %lu, index: %lu\n", | 262 | "read_cache_page error: ino %lu, index: %lu\n", |
@@ -266,6 +266,14 @@ try_again: | |||
266 | 266 | ||
267 | lock_page(page); | 267 | lock_page(page); |
268 | 268 | ||
269 | if (unlikely(page->mapping == NULL)) { | ||
270 | /* Truncate got there first */ | ||
271 | unlock_page(page); | ||
272 | page_cache_release(page); | ||
273 | page = NULL; | ||
274 | goto out; | ||
275 | } | ||
276 | |||
269 | if (!PageUptodate(page) || PageError(page)) { | 277 | if (!PageUptodate(page) || PageError(page)) { |
270 | unlock_page(page); | 278 | unlock_page(page); |
271 | page_cache_release(page); | 279 | page_cache_release(page); |
@@ -275,15 +283,8 @@ try_again: | |||
275 | mapping->host->i_ino, index); | 283 | mapping->host->i_ino, index); |
276 | 284 | ||
277 | page = ERR_PTR(-EIO); | 285 | page = ERR_PTR(-EIO); |
278 | goto out; | ||
279 | } | 286 | } |
280 | } | 287 | } |
281 | |||
282 | if (unlikely(!page->mapping || !page_has_buffers(page))) { | ||
283 | unlock_page(page); | ||
284 | page_cache_release(page); | ||
285 | goto try_again;/*we really need these buffers*/ | ||
286 | } | ||
287 | out: | 288 | out: |
288 | return page; | 289 | return page; |
289 | } | 290 | } |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index c40f81ba9b13..34dcb43a7837 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -1390,11 +1390,19 @@ xfs_vm_direct_IO( | |||
1390 | 1390 | ||
1391 | iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); | 1391 | iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); |
1392 | 1392 | ||
1393 | ret = blockdev_direct_IO_own_locking(rw, iocb, inode, | 1393 | if (rw == WRITE) { |
1394 | iomap.iomap_target->bt_bdev, | 1394 | ret = blockdev_direct_IO_own_locking(rw, iocb, inode, |
1395 | iov, offset, nr_segs, | 1395 | iomap.iomap_target->bt_bdev, |
1396 | xfs_get_blocks_direct, | 1396 | iov, offset, nr_segs, |
1397 | xfs_end_io_direct); | 1397 | xfs_get_blocks_direct, |
1398 | xfs_end_io_direct); | ||
1399 | } else { | ||
1400 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | ||
1401 | iomap.iomap_target->bt_bdev, | ||
1402 | iov, offset, nr_segs, | ||
1403 | xfs_get_blocks_direct, | ||
1404 | xfs_end_io_direct); | ||
1405 | } | ||
1398 | 1406 | ||
1399 | if (unlikely(ret <= 0 && iocb->private)) | 1407 | if (unlikely(ret <= 0 && iocb->private)) |
1400 | xfs_destroy_ioend(iocb->private); | 1408 | xfs_destroy_ioend(iocb->private); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index ceda3a2859d2..7858703ed84c 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -246,8 +246,8 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); | |||
246 | #define BUF_BUSY XBF_DONT_BLOCK | 246 | #define BUF_BUSY XBF_DONT_BLOCK |
247 | 247 | ||
248 | #define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) | 248 | #define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) |
249 | #define XFS_BUF_ZEROFLAGS(bp) \ | 249 | #define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ |
250 | ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI)) | 250 | ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) |
251 | 251 | ||
252 | #define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE) | 252 | #define XFS_BUF_STALE(bp) ((bp)->b_flags |= XFS_B_STALE) |
253 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE) | 253 | #define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XFS_B_STALE) |
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 8c021dc57d1f..a13f75c1a936 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
@@ -215,7 +215,6 @@ BUFFER_FNS(PrivateStart, unwritten); | |||
215 | #define MIN(a,b) (min(a,b)) | 215 | #define MIN(a,b) (min(a,b)) |
216 | #define MAX(a,b) (max(a,b)) | 216 | #define MAX(a,b) (max(a,b)) |
217 | #define howmany(x, y) (((x)+((y)-1))/(y)) | 217 | #define howmany(x, y) (((x)+((y)-1))/(y)) |
218 | #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) | ||
219 | 218 | ||
220 | /* | 219 | /* |
221 | * Various platform dependent calls that don't fit anywhere else | 220 | * Various platform dependent calls that don't fit anywhere else |
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 5d9cfd91ad08..ee788b1cb364 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c | |||
@@ -264,7 +264,9 @@ xfs_read( | |||
264 | dmflags, &locktype); | 264 | dmflags, &locktype); |
265 | if (ret) { | 265 | if (ret) { |
266 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 266 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
267 | goto unlock_mutex; | 267 | if (unlikely(ioflags & IO_ISDIRECT)) |
268 | mutex_unlock(&inode->i_mutex); | ||
269 | return ret; | ||
268 | } | 270 | } |
269 | } | 271 | } |
270 | 272 | ||
@@ -272,6 +274,9 @@ xfs_read( | |||
272 | bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)), | 274 | bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)), |
273 | -1, FI_REMAPF_LOCKED); | 275 | -1, FI_REMAPF_LOCKED); |
274 | 276 | ||
277 | if (unlikely(ioflags & IO_ISDIRECT)) | ||
278 | mutex_unlock(&inode->i_mutex); | ||
279 | |||
275 | xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, | 280 | xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, |
276 | (void *)iovp, segs, *offset, ioflags); | 281 | (void *)iovp, segs, *offset, ioflags); |
277 | ret = __generic_file_aio_read(iocb, iovp, segs, offset); | 282 | ret = __generic_file_aio_read(iocb, iovp, segs, offset); |
@@ -281,10 +286,6 @@ xfs_read( | |||
281 | XFS_STATS_ADD(xs_read_bytes, ret); | 286 | XFS_STATS_ADD(xs_read_bytes, ret); |
282 | 287 | ||
283 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 288 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
284 | |||
285 | unlock_mutex: | ||
286 | if (unlikely(ioflags & IO_ISDIRECT)) | ||
287 | mutex_unlock(&inode->i_mutex); | ||
288 | return ret; | 289 | return ret; |
289 | } | 290 | } |
290 | 291 | ||
@@ -390,6 +391,8 @@ xfs_splice_write( | |||
390 | xfs_inode_t *ip = XFS_BHVTOI(bdp); | 391 | xfs_inode_t *ip = XFS_BHVTOI(bdp); |
391 | xfs_mount_t *mp = ip->i_mount; | 392 | xfs_mount_t *mp = ip->i_mount; |
392 | ssize_t ret; | 393 | ssize_t ret; |
394 | struct inode *inode = outfilp->f_mapping->host; | ||
395 | xfs_fsize_t isize; | ||
393 | 396 | ||
394 | XFS_STATS_INC(xs_write_calls); | 397 | XFS_STATS_INC(xs_write_calls); |
395 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 398 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
@@ -416,6 +419,20 @@ xfs_splice_write( | |||
416 | if (ret > 0) | 419 | if (ret > 0) |
417 | XFS_STATS_ADD(xs_write_bytes, ret); | 420 | XFS_STATS_ADD(xs_write_bytes, ret); |
418 | 421 | ||
422 | isize = i_size_read(inode); | ||
423 | if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) | ||
424 | *ppos = isize; | ||
425 | |||
426 | if (*ppos > ip->i_d.di_size) { | ||
427 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
428 | if (*ppos > ip->i_d.di_size) { | ||
429 | ip->i_d.di_size = *ppos; | ||
430 | i_size_write(inode, *ppos); | ||
431 | ip->i_update_core = 1; | ||
432 | ip->i_update_size = 1; | ||
433 | } | ||
434 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
435 | } | ||
419 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 436 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
420 | return ret; | 437 | return ret; |
421 | } | 438 | } |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 9bdef9d51900..4754f342a5d3 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -314,6 +314,13 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp) | |||
314 | return; | 314 | return; |
315 | } | 315 | } |
316 | 316 | ||
317 | if (xfs_readonly_buftarg(mp->m_ddev_targp)) { | ||
318 | xfs_fs_cmn_err(CE_NOTE, mp, | ||
319 | "Disabling barriers, underlying device is readonly"); | ||
320 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | ||
321 | return; | ||
322 | } | ||
323 | |||
317 | error = xfs_barrier_test(mp); | 324 | error = xfs_barrier_test(mp); |
318 | if (error) { | 325 | if (error) { |
319 | xfs_fs_cmn_err(CE_NOTE, mp, | 326 | xfs_fs_cmn_err(CE_NOTE, mp, |
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index e95e99f7168f..db8872be8c87 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c | |||
@@ -203,7 +203,7 @@ xfs_qm_statvfs( | |||
203 | if (error || !vnode) | 203 | if (error || !vnode) |
204 | return error; | 204 | return error; |
205 | 205 | ||
206 | mp = XFS_BHVTOM(bhv); | 206 | mp = xfs_vfstom(bhvtovfs(bhv)); |
207 | ip = xfs_vtoi(vnode); | 207 | ip = xfs_vtoi(vnode); |
208 | 208 | ||
209 | if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)) | 209 | if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)) |
@@ -217,17 +217,24 @@ xfs_qm_statvfs( | |||
217 | return 0; | 217 | return 0; |
218 | dp = &dqp->q_core; | 218 | dp = &dqp->q_core; |
219 | 219 | ||
220 | limit = dp->d_blk_softlimit ? dp->d_blk_softlimit : dp->d_blk_hardlimit; | 220 | limit = dp->d_blk_softlimit ? |
221 | be64_to_cpu(dp->d_blk_softlimit) : | ||
222 | be64_to_cpu(dp->d_blk_hardlimit); | ||
221 | if (limit && statp->f_blocks > limit) { | 223 | if (limit && statp->f_blocks > limit) { |
222 | statp->f_blocks = limit; | 224 | statp->f_blocks = limit; |
223 | statp->f_bfree = (statp->f_blocks > dp->d_bcount) ? | 225 | statp->f_bfree = |
224 | (statp->f_blocks - dp->d_bcount) : 0; | 226 | (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ? |
227 | (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0; | ||
225 | } | 228 | } |
226 | limit = dp->d_ino_softlimit ? dp->d_ino_softlimit : dp->d_ino_hardlimit; | 229 | |
230 | limit = dp->d_ino_softlimit ? | ||
231 | be64_to_cpu(dp->d_ino_softlimit) : | ||
232 | be64_to_cpu(dp->d_ino_hardlimit); | ||
227 | if (limit && statp->f_files > limit) { | 233 | if (limit && statp->f_files > limit) { |
228 | statp->f_files = limit; | 234 | statp->f_files = limit; |
229 | statp->f_ffree = (statp->f_files > dp->d_icount) ? | 235 | statp->f_ffree = |
230 | (statp->f_ffree - dp->d_icount) : 0; | 236 | (statp->f_files > be64_to_cpu(dp->d_icount)) ? |
237 | (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0; | ||
231 | } | 238 | } |
232 | 239 | ||
233 | xfs_qm_dqput(dqp); | 240 | xfs_qm_dqput(dqp); |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index eef6763f3a67..d2bbcd882a69 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -1835,40 +1835,47 @@ xfs_alloc_fix_freelist( | |||
1835 | &agbp))) | 1835 | &agbp))) |
1836 | return error; | 1836 | return error; |
1837 | if (!pag->pagf_init) { | 1837 | if (!pag->pagf_init) { |
1838 | ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); | ||
1839 | ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); | ||
1838 | args->agbp = NULL; | 1840 | args->agbp = NULL; |
1839 | return 0; | 1841 | return 0; |
1840 | } | 1842 | } |
1841 | } else | 1843 | } else |
1842 | agbp = NULL; | 1844 | agbp = NULL; |
1843 | 1845 | ||
1844 | /* If this is a metadata preferred pag and we are user data | 1846 | /* |
1847 | * If this is a metadata preferred pag and we are user data | ||
1845 | * then try somewhere else if we are not being asked to | 1848 | * then try somewhere else if we are not being asked to |
1846 | * try harder at this point | 1849 | * try harder at this point |
1847 | */ | 1850 | */ |
1848 | if (pag->pagf_metadata && args->userdata && flags) { | 1851 | if (pag->pagf_metadata && args->userdata && |
1852 | (flags & XFS_ALLOC_FLAG_TRYLOCK)) { | ||
1853 | ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); | ||
1849 | args->agbp = NULL; | 1854 | args->agbp = NULL; |
1850 | return 0; | 1855 | return 0; |
1851 | } | 1856 | } |
1852 | 1857 | ||
1853 | need = XFS_MIN_FREELIST_PAG(pag, mp); | 1858 | if (!(flags & XFS_ALLOC_FLAG_FREEING)) { |
1854 | delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0; | 1859 | need = XFS_MIN_FREELIST_PAG(pag, mp); |
1855 | /* | 1860 | delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0; |
1856 | * If it looks like there isn't a long enough extent, or enough | 1861 | /* |
1857 | * total blocks, reject it. | 1862 | * If it looks like there isn't a long enough extent, or enough |
1858 | */ | 1863 | * total blocks, reject it. |
1859 | longest = (pag->pagf_longest > delta) ? | 1864 | */ |
1860 | (pag->pagf_longest - delta) : | 1865 | longest = (pag->pagf_longest > delta) ? |
1861 | (pag->pagf_flcount > 0 || pag->pagf_longest > 0); | 1866 | (pag->pagf_longest - delta) : |
1862 | if (args->minlen + args->alignment + args->minalignslop - 1 > longest || | 1867 | (pag->pagf_flcount > 0 || pag->pagf_longest > 0); |
1863 | (!(flags & XFS_ALLOC_FLAG_FREEING) && | 1868 | if ((args->minlen + args->alignment + args->minalignslop - 1) > |
1864 | (int)(pag->pagf_freeblks + pag->pagf_flcount - | 1869 | longest || |
1865 | need - args->total) < | 1870 | ((int)(pag->pagf_freeblks + pag->pagf_flcount - |
1866 | (int)args->minleft)) { | 1871 | need - args->total) < (int)args->minleft)) { |
1867 | if (agbp) | 1872 | if (agbp) |
1868 | xfs_trans_brelse(tp, agbp); | 1873 | xfs_trans_brelse(tp, agbp); |
1869 | args->agbp = NULL; | 1874 | args->agbp = NULL; |
1870 | return 0; | 1875 | return 0; |
1876 | } | ||
1871 | } | 1877 | } |
1878 | |||
1872 | /* | 1879 | /* |
1873 | * Get the a.g. freespace buffer. | 1880 | * Get the a.g. freespace buffer. |
1874 | * Can fail if we're not blocking on locks, and it's held. | 1881 | * Can fail if we're not blocking on locks, and it's held. |
@@ -1878,6 +1885,8 @@ xfs_alloc_fix_freelist( | |||
1878 | &agbp))) | 1885 | &agbp))) |
1879 | return error; | 1886 | return error; |
1880 | if (agbp == NULL) { | 1887 | if (agbp == NULL) { |
1888 | ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); | ||
1889 | ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); | ||
1881 | args->agbp = NULL; | 1890 | args->agbp = NULL; |
1882 | return 0; | 1891 | return 0; |
1883 | } | 1892 | } |
@@ -1887,22 +1896,24 @@ xfs_alloc_fix_freelist( | |||
1887 | */ | 1896 | */ |
1888 | agf = XFS_BUF_TO_AGF(agbp); | 1897 | agf = XFS_BUF_TO_AGF(agbp); |
1889 | need = XFS_MIN_FREELIST(agf, mp); | 1898 | need = XFS_MIN_FREELIST(agf, mp); |
1890 | delta = need > be32_to_cpu(agf->agf_flcount) ? | ||
1891 | (need - be32_to_cpu(agf->agf_flcount)) : 0; | ||
1892 | /* | 1899 | /* |
1893 | * If there isn't enough total or single-extent, reject it. | 1900 | * If there isn't enough total or single-extent, reject it. |
1894 | */ | 1901 | */ |
1895 | longest = be32_to_cpu(agf->agf_longest); | 1902 | if (!(flags & XFS_ALLOC_FLAG_FREEING)) { |
1896 | longest = (longest > delta) ? (longest - delta) : | 1903 | delta = need > be32_to_cpu(agf->agf_flcount) ? |
1897 | (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); | 1904 | (need - be32_to_cpu(agf->agf_flcount)) : 0; |
1898 | if (args->minlen + args->alignment + args->minalignslop - 1 > longest || | 1905 | longest = be32_to_cpu(agf->agf_longest); |
1899 | (!(flags & XFS_ALLOC_FLAG_FREEING) && | 1906 | longest = (longest > delta) ? (longest - delta) : |
1900 | (int)(be32_to_cpu(agf->agf_freeblks) + | 1907 | (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); |
1901 | be32_to_cpu(agf->agf_flcount) - need - args->total) < | 1908 | if ((args->minlen + args->alignment + args->minalignslop - 1) > |
1902 | (int)args->minleft)) { | 1909 | longest || |
1903 | xfs_trans_brelse(tp, agbp); | 1910 | ((int)(be32_to_cpu(agf->agf_freeblks) + |
1904 | args->agbp = NULL; | 1911 | be32_to_cpu(agf->agf_flcount) - need - args->total) < |
1905 | return 0; | 1912 | (int)args->minleft)) { |
1913 | xfs_trans_brelse(tp, agbp); | ||
1914 | args->agbp = NULL; | ||
1915 | return 0; | ||
1916 | } | ||
1906 | } | 1917 | } |
1907 | /* | 1918 | /* |
1908 | * Make the freelist shorter if it's too long. | 1919 | * Make the freelist shorter if it's too long. |
@@ -1950,12 +1961,11 @@ xfs_alloc_fix_freelist( | |||
1950 | * on a completely full ag. | 1961 | * on a completely full ag. |
1951 | */ | 1962 | */ |
1952 | if (targs.agbno == NULLAGBLOCK) { | 1963 | if (targs.agbno == NULLAGBLOCK) { |
1953 | if (!(flags & XFS_ALLOC_FLAG_FREEING)) { | 1964 | if (flags & XFS_ALLOC_FLAG_FREEING) |
1954 | xfs_trans_brelse(tp, agflbp); | 1965 | break; |
1955 | args->agbp = NULL; | 1966 | xfs_trans_brelse(tp, agflbp); |
1956 | return 0; | 1967 | args->agbp = NULL; |
1957 | } | 1968 | return 0; |
1958 | break; | ||
1959 | } | 1969 | } |
1960 | /* | 1970 | /* |
1961 | * Put each allocated block on the list. | 1971 | * Put each allocated block on the list. |
@@ -2442,31 +2452,26 @@ xfs_free_extent( | |||
2442 | xfs_fsblock_t bno, /* starting block number of extent */ | 2452 | xfs_fsblock_t bno, /* starting block number of extent */ |
2443 | xfs_extlen_t len) /* length of extent */ | 2453 | xfs_extlen_t len) /* length of extent */ |
2444 | { | 2454 | { |
2445 | #ifdef DEBUG | 2455 | xfs_alloc_arg_t args; |
2446 | xfs_agf_t *agf; /* a.g. freespace header */ | ||
2447 | #endif | ||
2448 | xfs_alloc_arg_t args; /* allocation argument structure */ | ||
2449 | int error; | 2456 | int error; |
2450 | 2457 | ||
2451 | ASSERT(len != 0); | 2458 | ASSERT(len != 0); |
2459 | memset(&args, 0, sizeof(xfs_alloc_arg_t)); | ||
2452 | args.tp = tp; | 2460 | args.tp = tp; |
2453 | args.mp = tp->t_mountp; | 2461 | args.mp = tp->t_mountp; |
2454 | args.agno = XFS_FSB_TO_AGNO(args.mp, bno); | 2462 | args.agno = XFS_FSB_TO_AGNO(args.mp, bno); |
2455 | ASSERT(args.agno < args.mp->m_sb.sb_agcount); | 2463 | ASSERT(args.agno < args.mp->m_sb.sb_agcount); |
2456 | args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); | 2464 | args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); |
2457 | args.alignment = 1; | ||
2458 | args.minlen = args.minleft = args.minalignslop = 0; | ||
2459 | down_read(&args.mp->m_peraglock); | 2465 | down_read(&args.mp->m_peraglock); |
2460 | args.pag = &args.mp->m_perag[args.agno]; | 2466 | args.pag = &args.mp->m_perag[args.agno]; |
2461 | if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) | 2467 | if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) |
2462 | goto error0; | 2468 | goto error0; |
2463 | #ifdef DEBUG | 2469 | #ifdef DEBUG |
2464 | ASSERT(args.agbp != NULL); | 2470 | ASSERT(args.agbp != NULL); |
2465 | agf = XFS_BUF_TO_AGF(args.agbp); | 2471 | ASSERT((args.agbno + len) <= |
2466 | ASSERT(args.agbno + len <= be32_to_cpu(agf->agf_length)); | 2472 | be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); |
2467 | #endif | 2473 | #endif |
2468 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, | 2474 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); |
2469 | len, 0); | ||
2470 | error0: | 2475 | error0: |
2471 | up_read(&args.mp->m_peraglock); | 2476 | up_read(&args.mp->m_peraglock); |
2472 | return error; | 2477 | return error; |
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 650591f999ae..5a4256120ccc 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
@@ -44,6 +44,26 @@ typedef enum xfs_alloctype | |||
44 | #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ | 44 | #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ |
45 | 45 | ||
46 | /* | 46 | /* |
47 | * In order to avoid ENOSPC-related deadlock caused by | ||
48 | * out-of-order locking of AGF buffer (PV 947395), we place | ||
49 | * constraints on the relationship among actual allocations for | ||
50 | * data blocks, freelist blocks, and potential file data bmap | ||
51 | * btree blocks. However, these restrictions may result in no | ||
52 | * actual space allocated for a delayed extent, for example, a data | ||
53 | * block in a certain AG is allocated but there is no additional | ||
54 | * block for the additional bmap btree block due to a split of the | ||
55 | * bmap btree of the file. The result of this may lead to an | ||
56 | * infinite loop in xfssyncd when the file gets flushed to disk and | ||
57 | * all delayed extents need to be actually allocated. To get around | ||
58 | * this, we explicitly set aside a few blocks which will not be | ||
59 | * reserved in delayed allocation. Considering the minimum number of | ||
60 | * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap | ||
61 | * btree requires 1 fsb, so we set the number of set-aside blocks | ||
62 | * to 4 + 4*agcount. | ||
63 | */ | ||
64 | #define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) | ||
65 | |||
66 | /* | ||
47 | * Argument structure for xfs_alloc routines. | 67 | * Argument structure for xfs_alloc routines. |
48 | * This is turned into a structure to avoid having 20 arguments passed | 68 | * This is turned into a structure to avoid having 20 arguments passed |
49 | * down several levels of the stack. | 69 | * down several levels of the stack. |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3a6137539064..bf46fae303af 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -4993,7 +4993,7 @@ xfs_bmapi( | |||
4993 | bma.firstblock = *firstblock; | 4993 | bma.firstblock = *firstblock; |
4994 | bma.alen = alen; | 4994 | bma.alen = alen; |
4995 | bma.off = aoff; | 4995 | bma.off = aoff; |
4996 | bma.conv = (flags & XFS_BMAPI_CONVERT); | 4996 | bma.conv = !!(flags & XFS_BMAPI_CONVERT); |
4997 | bma.wasdel = wasdelay; | 4997 | bma.wasdel = wasdelay; |
4998 | bma.minlen = minlen; | 4998 | bma.minlen = minlen; |
4999 | bma.low = flist->xbf_low; | 4999 | bma.low = flist->xbf_low; |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 077629bab532..c064e72ada9e 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -462,7 +462,7 @@ xfs_fs_counts( | |||
462 | 462 | ||
463 | xfs_icsb_sync_counters_lazy(mp); | 463 | xfs_icsb_sync_counters_lazy(mp); |
464 | s = XFS_SB_LOCK(mp); | 464 | s = XFS_SB_LOCK(mp); |
465 | cnt->freedata = mp->m_sb.sb_fdblocks; | 465 | cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); |
466 | cnt->freertx = mp->m_sb.sb_frextents; | 466 | cnt->freertx = mp->m_sb.sb_frextents; |
467 | cnt->freeino = mp->m_sb.sb_ifree; | 467 | cnt->freeino = mp->m_sb.sb_ifree; |
468 | cnt->allocino = mp->m_sb.sb_icount; | 468 | cnt->allocino = mp->m_sb.sb_icount; |
@@ -519,15 +519,19 @@ xfs_reserve_blocks( | |||
519 | } | 519 | } |
520 | mp->m_resblks = request; | 520 | mp->m_resblks = request; |
521 | } else { | 521 | } else { |
522 | __int64_t free; | ||
523 | |||
524 | free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); | ||
522 | delta = request - mp->m_resblks; | 525 | delta = request - mp->m_resblks; |
523 | lcounter = mp->m_sb.sb_fdblocks - delta; | 526 | lcounter = free - delta; |
524 | if (lcounter < 0) { | 527 | if (lcounter < 0) { |
525 | /* We can't satisfy the request, just get what we can */ | 528 | /* We can't satisfy the request, just get what we can */ |
526 | mp->m_resblks += mp->m_sb.sb_fdblocks; | 529 | mp->m_resblks += free; |
527 | mp->m_resblks_avail += mp->m_sb.sb_fdblocks; | 530 | mp->m_resblks_avail += free; |
528 | mp->m_sb.sb_fdblocks = 0; | 531 | mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp); |
529 | } else { | 532 | } else { |
530 | mp->m_sb.sb_fdblocks = lcounter; | 533 | mp->m_sb.sb_fdblocks = |
534 | lcounter + XFS_ALLOC_SET_ASIDE(mp); | ||
531 | mp->m_resblks = request; | 535 | mp->m_resblks = request; |
532 | mp->m_resblks_avail += delta; | 536 | mp->m_resblks_avail += delta; |
533 | } | 537 | } |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 86c1bf0bba9e..1f8ecff8553a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -334,10 +334,9 @@ xfs_itobp( | |||
334 | #if !defined(__KERNEL__) | 334 | #if !defined(__KERNEL__) |
335 | ni = 0; | 335 | ni = 0; |
336 | #elif defined(DEBUG) | 336 | #elif defined(DEBUG) |
337 | ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : | 337 | ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog; |
338 | (BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog); | ||
339 | #else /* usual case */ | 338 | #else /* usual case */ |
340 | ni = (imap_flags & XFS_IMAP_BULKSTAT) ? 0 : 1; | 339 | ni = 1; |
341 | #endif | 340 | #endif |
342 | 341 | ||
343 | for (i = 0; i < ni; i++) { | 342 | for (i = 0; i < ni; i++) { |
@@ -348,11 +347,15 @@ xfs_itobp( | |||
348 | (i << mp->m_sb.sb_inodelog)); | 347 | (i << mp->m_sb.sb_inodelog)); |
349 | di_ok = INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC && | 348 | di_ok = INT_GET(dip->di_core.di_magic, ARCH_CONVERT) == XFS_DINODE_MAGIC && |
350 | XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT)); | 349 | XFS_DINODE_GOOD_VERSION(INT_GET(dip->di_core.di_version, ARCH_CONVERT)); |
351 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, | 350 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, |
352 | XFS_RANDOM_ITOBP_INOTOBP))) { | 351 | XFS_ERRTAG_ITOBP_INOTOBP, |
352 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
353 | if (imap_flags & XFS_IMAP_BULKSTAT) { | ||
354 | xfs_trans_brelse(tp, bp); | ||
355 | return XFS_ERROR(EINVAL); | ||
356 | } | ||
353 | #ifdef DEBUG | 357 | #ifdef DEBUG |
354 | if (!(imap_flags & XFS_IMAP_BULKSTAT)) | 358 | cmn_err(CE_ALERT, |
355 | cmn_err(CE_ALERT, | ||
356 | "Device %s - bad inode magic/vsn " | 359 | "Device %s - bad inode magic/vsn " |
357 | "daddr %lld #%d (magic=%x)", | 360 | "daddr %lld #%d (magic=%x)", |
358 | XFS_BUFTARG_NAME(mp->m_ddev_targp), | 361 | XFS_BUFTARG_NAME(mp->m_ddev_targp), |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index e730328636c3..21ac1a67e3e0 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -1413,7 +1413,7 @@ xlog_sync(xlog_t *log, | |||
1413 | ops = iclog->ic_header.h_num_logops; | 1413 | ops = iclog->ic_header.h_num_logops; |
1414 | INT_SET(iclog->ic_header.h_num_logops, ARCH_CONVERT, ops); | 1414 | INT_SET(iclog->ic_header.h_num_logops, ARCH_CONVERT, ops); |
1415 | 1415 | ||
1416 | bp = iclog->ic_bp; | 1416 | bp = iclog->ic_bp; |
1417 | ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1); | 1417 | ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1); |
1418 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); | 1418 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); |
1419 | XFS_BUF_SET_ADDR(bp, BLOCK_LSN(INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT))); | 1419 | XFS_BUF_SET_ADDR(bp, BLOCK_LSN(INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT))); |
@@ -1430,15 +1430,14 @@ xlog_sync(xlog_t *log, | |||
1430 | } | 1430 | } |
1431 | XFS_BUF_SET_PTR(bp, (xfs_caddr_t) &(iclog->ic_header), count); | 1431 | XFS_BUF_SET_PTR(bp, (xfs_caddr_t) &(iclog->ic_header), count); |
1432 | XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */ | 1432 | XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */ |
1433 | XFS_BUF_ZEROFLAGS(bp); | ||
1433 | XFS_BUF_BUSY(bp); | 1434 | XFS_BUF_BUSY(bp); |
1434 | XFS_BUF_ASYNC(bp); | 1435 | XFS_BUF_ASYNC(bp); |
1435 | /* | 1436 | /* |
1436 | * Do an ordered write for the log block. | 1437 | * Do an ordered write for the log block. |
1437 | * | 1438 | * Its unnecessary to flush the first split block in the log wrap case. |
1438 | * It may not be needed to flush the first split block in the log wrap | ||
1439 | * case, but do it anyways to be safe -AK | ||
1440 | */ | 1439 | */ |
1441 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) | 1440 | if (!split && (log->l_mp->m_flags & XFS_MOUNT_BARRIER)) |
1442 | XFS_BUF_ORDERED(bp); | 1441 | XFS_BUF_ORDERED(bp); |
1443 | 1442 | ||
1444 | ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); | 1443 | ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); |
@@ -1460,7 +1459,7 @@ xlog_sync(xlog_t *log, | |||
1460 | return error; | 1459 | return error; |
1461 | } | 1460 | } |
1462 | if (split) { | 1461 | if (split) { |
1463 | bp = iclog->ic_log->l_xbuf; | 1462 | bp = iclog->ic_log->l_xbuf; |
1464 | ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == | 1463 | ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == |
1465 | (unsigned long)1); | 1464 | (unsigned long)1); |
1466 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); | 1465 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2); |
@@ -1468,6 +1467,7 @@ xlog_sync(xlog_t *log, | |||
1468 | XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+ | 1467 | XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+ |
1469 | (__psint_t)count), split); | 1468 | (__psint_t)count), split); |
1470 | XFS_BUF_SET_FSPRIVATE(bp, iclog); | 1469 | XFS_BUF_SET_FSPRIVATE(bp, iclog); |
1470 | XFS_BUF_ZEROFLAGS(bp); | ||
1471 | XFS_BUF_BUSY(bp); | 1471 | XFS_BUF_BUSY(bp); |
1472 | XFS_BUF_ASYNC(bp); | 1472 | XFS_BUF_ASYNC(bp); |
1473 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) | 1473 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 4be5c0b2d296..9dfae18d995f 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -1243,24 +1243,6 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) | |||
1243 | xfs_trans_log_buf(tp, bp, first, last); | 1243 | xfs_trans_log_buf(tp, bp, first, last); |
1244 | } | 1244 | } |
1245 | 1245 | ||
1246 | /* | ||
1247 | * In order to avoid ENOSPC-related deadlock caused by | ||
1248 | * out-of-order locking of AGF buffer (PV 947395), we place | ||
1249 | * constraints on the relationship among actual allocations for | ||
1250 | * data blocks, freelist blocks, and potential file data bmap | ||
1251 | * btree blocks. However, these restrictions may result in no | ||
1252 | * actual space allocated for a delayed extent, for example, a data | ||
1253 | * block in a certain AG is allocated but there is no additional | ||
1254 | * block for the additional bmap btree block due to a split of the | ||
1255 | * bmap btree of the file. The result of this may lead to an | ||
1256 | * infinite loop in xfssyncd when the file gets flushed to disk and | ||
1257 | * all delayed extents need to be actually allocated. To get around | ||
1258 | * this, we explicitly set aside a few blocks which will not be | ||
1259 | * reserved in delayed allocation. Considering the minimum number of | ||
1260 | * needed freelist blocks is 4 fsbs, a potential split of file's bmap | ||
1261 | * btree requires 1 fsb, so we set the number of set-aside blocks to 8. | ||
1262 | */ | ||
1263 | #define SET_ASIDE_BLOCKS 8 | ||
1264 | 1246 | ||
1265 | /* | 1247 | /* |
1266 | * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply | 1248 | * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply |
@@ -1306,7 +1288,8 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, | |||
1306 | return 0; | 1288 | return 0; |
1307 | case XFS_SBS_FDBLOCKS: | 1289 | case XFS_SBS_FDBLOCKS: |
1308 | 1290 | ||
1309 | lcounter = (long long)mp->m_sb.sb_fdblocks - SET_ASIDE_BLOCKS; | 1291 | lcounter = (long long) |
1292 | mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); | ||
1310 | res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); | 1293 | res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); |
1311 | 1294 | ||
1312 | if (delta > 0) { /* Putting blocks back */ | 1295 | if (delta > 0) { /* Putting blocks back */ |
@@ -1340,7 +1323,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, | |||
1340 | } | 1323 | } |
1341 | } | 1324 | } |
1342 | 1325 | ||
1343 | mp->m_sb.sb_fdblocks = lcounter + SET_ASIDE_BLOCKS; | 1326 | mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); |
1344 | return 0; | 1327 | return 0; |
1345 | case XFS_SBS_FREXTENTS: | 1328 | case XFS_SBS_FREXTENTS: |
1346 | lcounter = (long long)mp->m_sb.sb_frextents; | 1329 | lcounter = (long long)mp->m_sb.sb_frextents; |
@@ -2021,7 +2004,8 @@ xfs_icsb_sync_counters_lazy( | |||
2021 | * when we get near ENOSPC. | 2004 | * when we get near ENOSPC. |
2022 | */ | 2005 | */ |
2023 | #define XFS_ICSB_INO_CNTR_REENABLE 64 | 2006 | #define XFS_ICSB_INO_CNTR_REENABLE 64 |
2024 | #define XFS_ICSB_FDBLK_CNTR_REENABLE 512 | 2007 | #define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \ |
2008 | (512 + XFS_ALLOC_SET_ASIDE(mp)) | ||
2025 | STATIC void | 2009 | STATIC void |
2026 | xfs_icsb_balance_counter( | 2010 | xfs_icsb_balance_counter( |
2027 | xfs_mount_t *mp, | 2011 | xfs_mount_t *mp, |
@@ -2055,7 +2039,7 @@ xfs_icsb_balance_counter( | |||
2055 | case XFS_SBS_FDBLOCKS: | 2039 | case XFS_SBS_FDBLOCKS: |
2056 | count = mp->m_sb.sb_fdblocks; | 2040 | count = mp->m_sb.sb_fdblocks; |
2057 | resid = do_div(count, weight); | 2041 | resid = do_div(count, weight); |
2058 | if (count < XFS_ICSB_FDBLK_CNTR_REENABLE) | 2042 | if (count < XFS_ICSB_FDBLK_CNTR_REENABLE(mp)) |
2059 | goto out; | 2043 | goto out; |
2060 | break; | 2044 | break; |
2061 | default: | 2045 | default: |
@@ -2110,11 +2094,11 @@ again: | |||
2110 | case XFS_SBS_FDBLOCKS: | 2094 | case XFS_SBS_FDBLOCKS: |
2111 | BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); | 2095 | BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); |
2112 | 2096 | ||
2113 | lcounter = icsbp->icsb_fdblocks; | 2097 | lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); |
2114 | lcounter += delta; | 2098 | lcounter += delta; |
2115 | if (unlikely(lcounter < 0)) | 2099 | if (unlikely(lcounter < 0)) |
2116 | goto slow_path; | 2100 | goto slow_path; |
2117 | icsbp->icsb_fdblocks = lcounter; | 2101 | icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); |
2118 | break; | 2102 | break; |
2119 | default: | 2103 | default: |
2120 | BUG(); | 2104 | BUG(); |
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 6c96391f3f1a..a34796e57afb 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c | |||
@@ -515,7 +515,7 @@ xfs_mount( | |||
515 | if (error) | 515 | if (error) |
516 | goto error2; | 516 | goto error2; |
517 | 517 | ||
518 | if ((mp->m_flags & XFS_MOUNT_BARRIER) && !(vfsp->vfs_flag & VFS_RDONLY)) | 518 | if (mp->m_flags & XFS_MOUNT_BARRIER) |
519 | xfs_mountfs_check_barriers(mp); | 519 | xfs_mountfs_check_barriers(mp); |
520 | 520 | ||
521 | error = XFS_IOINIT(vfsp, args, flags); | 521 | error = XFS_IOINIT(vfsp, args, flags); |
@@ -811,7 +811,8 @@ xfs_statvfs( | |||
811 | statp->f_bsize = sbp->sb_blocksize; | 811 | statp->f_bsize = sbp->sb_blocksize; |
812 | lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; | 812 | lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; |
813 | statp->f_blocks = sbp->sb_dblocks - lsize; | 813 | statp->f_blocks = sbp->sb_dblocks - lsize; |
814 | statp->f_bfree = statp->f_bavail = sbp->sb_fdblocks; | 814 | statp->f_bfree = statp->f_bavail = |
815 | sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); | ||
815 | fakeinos = statp->f_bfree << sbp->sb_inopblog; | 816 | fakeinos = statp->f_bfree << sbp->sb_inopblog; |
816 | #if XFS_BIG_INUMS | 817 | #if XFS_BIG_INUMS |
817 | fakeinos += mp->m_inoadd; | 818 | fakeinos += mp->m_inoadd; |