diff options
Diffstat (limited to 'fs')
174 files changed, 5659 insertions, 4008 deletions
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 842d00048a65..01443ce43ee7 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
@@ -548,15 +548,6 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, | |||
548 | 548 | ||
549 | spin_lock(&sbi->fs_lock); | 549 | spin_lock(&sbi->fs_lock); |
550 | ino->flags &= ~AUTOFS_INF_EXPIRING; | 550 | ino->flags &= ~AUTOFS_INF_EXPIRING; |
551 | spin_lock(&dentry->d_lock); | ||
552 | if (!ret) { | ||
553 | if ((IS_ROOT(dentry) || | ||
554 | (autofs_type_indirect(sbi->type) && | ||
555 | IS_ROOT(dentry->d_parent))) && | ||
556 | !(dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) | ||
557 | __managed_dentry_set_automount(dentry); | ||
558 | } | ||
559 | spin_unlock(&dentry->d_lock); | ||
560 | complete_all(&ino->expire_complete); | 551 | complete_all(&ino->expire_complete); |
561 | spin_unlock(&sbi->fs_lock); | 552 | spin_unlock(&sbi->fs_lock); |
562 | dput(dentry); | 553 | dput(dentry); |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 91b11650722e..c93447604da8 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -124,13 +124,10 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) | |||
124 | * it. | 124 | * it. |
125 | */ | 125 | */ |
126 | spin_lock(&sbi->lookup_lock); | 126 | spin_lock(&sbi->lookup_lock); |
127 | spin_lock(&dentry->d_lock); | 127 | if (!d_mountpoint(dentry) && simple_empty(dentry)) { |
128 | if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { | ||
129 | spin_unlock(&dentry->d_lock); | ||
130 | spin_unlock(&sbi->lookup_lock); | 128 | spin_unlock(&sbi->lookup_lock); |
131 | return -ENOENT; | 129 | return -ENOENT; |
132 | } | 130 | } |
133 | spin_unlock(&dentry->d_lock); | ||
134 | spin_unlock(&sbi->lookup_lock); | 131 | spin_unlock(&sbi->lookup_lock); |
135 | 132 | ||
136 | out: | 133 | out: |
@@ -355,7 +352,6 @@ static struct vfsmount *autofs4_d_automount(struct path *path) | |||
355 | status = autofs4_mount_wait(dentry); | 352 | status = autofs4_mount_wait(dentry); |
356 | if (status) | 353 | if (status) |
357 | return ERR_PTR(status); | 354 | return ERR_PTR(status); |
358 | spin_lock(&sbi->fs_lock); | ||
359 | goto done; | 355 | goto done; |
360 | } | 356 | } |
361 | 357 | ||
@@ -364,8 +360,11 @@ static struct vfsmount *autofs4_d_automount(struct path *path) | |||
364 | * having d_mountpoint() true, so there's no need to call back | 360 | * having d_mountpoint() true, so there's no need to call back |
365 | * to the daemon. | 361 | * to the daemon. |
366 | */ | 362 | */ |
367 | if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)) | 363 | if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)) { |
364 | spin_unlock(&sbi->fs_lock); | ||
368 | goto done; | 365 | goto done; |
366 | } | ||
367 | |||
369 | if (!d_mountpoint(dentry)) { | 368 | if (!d_mountpoint(dentry)) { |
370 | /* | 369 | /* |
371 | * It's possible that user space hasn't removed directories | 370 | * It's possible that user space hasn't removed directories |
@@ -379,15 +378,13 @@ static struct vfsmount *autofs4_d_automount(struct path *path) | |||
379 | * require user space behave. | 378 | * require user space behave. |
380 | */ | 379 | */ |
381 | if (sbi->version > 4) { | 380 | if (sbi->version > 4) { |
382 | if (have_submounts(dentry)) | 381 | if (have_submounts(dentry)) { |
382 | spin_unlock(&sbi->fs_lock); | ||
383 | goto done; | 383 | goto done; |
384 | } | ||
384 | } else { | 385 | } else { |
385 | spin_lock(&dentry->d_lock); | 386 | if (!simple_empty(dentry)) |
386 | if (!list_empty(&dentry->d_subdirs)) { | ||
387 | spin_unlock(&dentry->d_lock); | ||
388 | goto done; | 387 | goto done; |
389 | } | ||
390 | spin_unlock(&dentry->d_lock); | ||
391 | } | 388 | } |
392 | ino->flags |= AUTOFS_INF_PENDING; | 389 | ino->flags |= AUTOFS_INF_PENDING; |
393 | spin_unlock(&sbi->fs_lock); | 390 | spin_unlock(&sbi->fs_lock); |
@@ -399,28 +396,8 @@ static struct vfsmount *autofs4_d_automount(struct path *path) | |||
399 | return ERR_PTR(status); | 396 | return ERR_PTR(status); |
400 | } | 397 | } |
401 | } | 398 | } |
402 | done: | ||
403 | if (!(ino->flags & AUTOFS_INF_EXPIRING)) { | ||
404 | /* | ||
405 | * Any needed mounting has been completed and the path | ||
406 | * updated so clear DCACHE_NEED_AUTOMOUNT so we don't | ||
407 | * call ->d_automount() on rootless multi-mounts since | ||
408 | * it can lead to an incorrect ELOOP error return. | ||
409 | * | ||
410 | * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and | ||
411 | * symlinks as in all other cases the dentry will be covered by | ||
412 | * an actual mount so ->d_automount() won't be called during | ||
413 | * the follow. | ||
414 | */ | ||
415 | spin_lock(&dentry->d_lock); | ||
416 | if ((!d_mountpoint(dentry) && | ||
417 | !list_empty(&dentry->d_subdirs)) || | ||
418 | (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))) | ||
419 | __managed_dentry_clear_automount(dentry); | ||
420 | spin_unlock(&dentry->d_lock); | ||
421 | } | ||
422 | spin_unlock(&sbi->fs_lock); | 399 | spin_unlock(&sbi->fs_lock); |
423 | 400 | done: | |
424 | /* Mount succeeded, check if we ended up with a new dentry */ | 401 | /* Mount succeeded, check if we ended up with a new dentry */ |
425 | dentry = autofs4_mountpoint_changed(path); | 402 | dentry = autofs4_mountpoint_changed(path); |
426 | if (!dentry) | 403 | if (!dentry) |
@@ -432,6 +409,8 @@ done: | |||
432 | int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) | 409 | int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) |
433 | { | 410 | { |
434 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 411 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
412 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | ||
413 | int status; | ||
435 | 414 | ||
436 | DPRINTK("dentry=%p %.*s", | 415 | DPRINTK("dentry=%p %.*s", |
437 | dentry, dentry->d_name.len, dentry->d_name.name); | 416 | dentry, dentry->d_name.len, dentry->d_name.name); |
@@ -456,7 +435,32 @@ int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) | |||
456 | * This dentry may be under construction so wait on mount | 435 | * This dentry may be under construction so wait on mount |
457 | * completion. | 436 | * completion. |
458 | */ | 437 | */ |
459 | return autofs4_mount_wait(dentry); | 438 | status = autofs4_mount_wait(dentry); |
439 | if (status) | ||
440 | return status; | ||
441 | |||
442 | spin_lock(&sbi->fs_lock); | ||
443 | /* | ||
444 | * If the dentry has been selected for expire while we slept | ||
445 | * on the lock then it might go away. We'll deal with that in | ||
446 | * ->d_automount() and wait on a new mount if the expire | ||
447 | * succeeds or return here if it doesn't (since there's no | ||
448 | * mount to follow with a rootless multi-mount). | ||
449 | */ | ||
450 | if (!(ino->flags & AUTOFS_INF_EXPIRING)) { | ||
451 | /* | ||
452 | * Any needed mounting has been completed and the path | ||
453 | * updated so check if this is a rootless multi-mount so | ||
454 | * we can avoid needless calls ->d_automount() and avoid | ||
455 | * an incorrect ELOOP error return. | ||
456 | */ | ||
457 | if ((!d_mountpoint(dentry) && !simple_empty(dentry)) || | ||
458 | (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))) | ||
459 | status = -EISDIR; | ||
460 | } | ||
461 | spin_unlock(&sbi->fs_lock); | ||
462 | |||
463 | return status; | ||
460 | } | 464 | } |
461 | 465 | ||
462 | /* Lookups in the root directory */ | 466 | /* Lookups in the root directory */ |
@@ -599,9 +603,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) | |||
599 | 603 | ||
600 | spin_lock(&sbi->lookup_lock); | 604 | spin_lock(&sbi->lookup_lock); |
601 | __autofs4_add_expiring(dentry); | 605 | __autofs4_add_expiring(dentry); |
602 | spin_lock(&dentry->d_lock); | 606 | d_drop(dentry); |
603 | __d_drop(dentry); | ||
604 | spin_unlock(&dentry->d_lock); | ||
605 | spin_unlock(&sbi->lookup_lock); | 607 | spin_unlock(&sbi->lookup_lock); |
606 | 608 | ||
607 | return 0; | 609 | return 0; |
@@ -672,15 +674,12 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) | |||
672 | return -EACCES; | 674 | return -EACCES; |
673 | 675 | ||
674 | spin_lock(&sbi->lookup_lock); | 676 | spin_lock(&sbi->lookup_lock); |
675 | spin_lock(&dentry->d_lock); | 677 | if (!simple_empty(dentry)) { |
676 | if (!list_empty(&dentry->d_subdirs)) { | ||
677 | spin_unlock(&dentry->d_lock); | ||
678 | spin_unlock(&sbi->lookup_lock); | 678 | spin_unlock(&sbi->lookup_lock); |
679 | return -ENOTEMPTY; | 679 | return -ENOTEMPTY; |
680 | } | 680 | } |
681 | __autofs4_add_expiring(dentry); | 681 | __autofs4_add_expiring(dentry); |
682 | __d_drop(dentry); | 682 | d_drop(dentry); |
683 | spin_unlock(&dentry->d_lock); | ||
684 | spin_unlock(&sbi->lookup_lock); | 683 | spin_unlock(&sbi->lookup_lock); |
685 | 684 | ||
686 | if (sbi->version < 5) | 685 | if (sbi->version < 5) |
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 0e7a6f81ae36..6043567b95c2 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c | |||
@@ -30,7 +30,7 @@ | |||
30 | #include <asm/cacheflush.h> | 30 | #include <asm/cacheflush.h> |
31 | #include <asm/a.out-core.h> | 31 | #include <asm/a.out-core.h> |
32 | 32 | ||
33 | static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); | 33 | static int load_aout_binary(struct linux_binprm *); |
34 | static int load_aout_library(struct file*); | 34 | static int load_aout_library(struct file*); |
35 | 35 | ||
36 | #ifdef CONFIG_COREDUMP | 36 | #ifdef CONFIG_COREDUMP |
@@ -201,8 +201,9 @@ static unsigned long __user *create_aout_tables(char __user *p, struct linux_bin | |||
201 | * libraries. There is no binary dependent code anywhere else. | 201 | * libraries. There is no binary dependent code anywhere else. |
202 | */ | 202 | */ |
203 | 203 | ||
204 | static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) | 204 | static int load_aout_binary(struct linux_binprm * bprm) |
205 | { | 205 | { |
206 | struct pt_regs *regs = current_pt_regs(); | ||
206 | struct exec ex; | 207 | struct exec ex; |
207 | unsigned long error; | 208 | unsigned long error; |
208 | unsigned long fd_offset; | 209 | unsigned long fd_offset; |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index fbd9f60bd763..6d7d1647a68c 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -44,7 +44,7 @@ | |||
44 | #define user_siginfo_t siginfo_t | 44 | #define user_siginfo_t siginfo_t |
45 | #endif | 45 | #endif |
46 | 46 | ||
47 | static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); | 47 | static int load_elf_binary(struct linux_binprm *bprm); |
48 | static int load_elf_library(struct file *); | 48 | static int load_elf_library(struct file *); |
49 | static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, | 49 | static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, |
50 | int, int, unsigned long); | 50 | int, int, unsigned long); |
@@ -558,7 +558,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top) | |||
558 | #endif | 558 | #endif |
559 | } | 559 | } |
560 | 560 | ||
561 | static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | 561 | static int load_elf_binary(struct linux_binprm *bprm) |
562 | { | 562 | { |
563 | struct file *interpreter = NULL; /* to shut gcc up */ | 563 | struct file *interpreter = NULL; /* to shut gcc up */ |
564 | unsigned long load_addr = 0, load_bias = 0; | 564 | unsigned long load_addr = 0, load_bias = 0; |
@@ -575,6 +575,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
575 | unsigned long reloc_func_desc __maybe_unused = 0; | 575 | unsigned long reloc_func_desc __maybe_unused = 0; |
576 | int executable_stack = EXSTACK_DEFAULT; | 576 | int executable_stack = EXSTACK_DEFAULT; |
577 | unsigned long def_flags = 0; | 577 | unsigned long def_flags = 0; |
578 | struct pt_regs *regs = current_pt_regs(); | ||
578 | struct { | 579 | struct { |
579 | struct elfhdr elf_ex; | 580 | struct elfhdr elf_ex; |
580 | struct elfhdr interp_elf_ex; | 581 | struct elfhdr interp_elf_ex; |
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index a46049154107..dc84732e554f 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c | |||
@@ -56,7 +56,7 @@ typedef char *elf_caddr_t; | |||
56 | 56 | ||
57 | MODULE_LICENSE("GPL"); | 57 | MODULE_LICENSE("GPL"); |
58 | 58 | ||
59 | static int load_elf_fdpic_binary(struct linux_binprm *, struct pt_regs *); | 59 | static int load_elf_fdpic_binary(struct linux_binprm *); |
60 | static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *, struct file *); | 60 | static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *, struct file *); |
61 | static int elf_fdpic_map_file(struct elf_fdpic_params *, struct file *, | 61 | static int elf_fdpic_map_file(struct elf_fdpic_params *, struct file *, |
62 | struct mm_struct *, const char *); | 62 | struct mm_struct *, const char *); |
@@ -164,10 +164,10 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, | |||
164 | /* | 164 | /* |
165 | * load an fdpic binary into various bits of memory | 165 | * load an fdpic binary into various bits of memory |
166 | */ | 166 | */ |
167 | static int load_elf_fdpic_binary(struct linux_binprm *bprm, | 167 | static int load_elf_fdpic_binary(struct linux_binprm *bprm) |
168 | struct pt_regs *regs) | ||
169 | { | 168 | { |
170 | struct elf_fdpic_params exec_params, interp_params; | 169 | struct elf_fdpic_params exec_params, interp_params; |
170 | struct pt_regs *regs = current_pt_regs(); | ||
171 | struct elf_phdr *phdr; | 171 | struct elf_phdr *phdr; |
172 | unsigned long stack_size, entryaddr; | 172 | unsigned long stack_size, entryaddr; |
173 | #ifdef ELF_FDPIC_PLAT_INIT | 173 | #ifdef ELF_FDPIC_PLAT_INIT |
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index 2790c7e1912e..4e6cce57d113 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c | |||
@@ -22,7 +22,7 @@ | |||
22 | #define EM86_INTERP "/usr/bin/em86" | 22 | #define EM86_INTERP "/usr/bin/em86" |
23 | #define EM86_I_NAME "em86" | 23 | #define EM86_I_NAME "em86" |
24 | 24 | ||
25 | static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) | 25 | static int load_em86(struct linux_binprm *bprm) |
26 | { | 26 | { |
27 | char *interp, *i_name, *i_arg; | 27 | char *interp, *i_name, *i_arg; |
28 | struct file * file; | 28 | struct file * file; |
@@ -90,7 +90,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) | |||
90 | if (retval < 0) | 90 | if (retval < 0) |
91 | return retval; | 91 | return retval; |
92 | 92 | ||
93 | return search_binary_handler(bprm, regs); | 93 | return search_binary_handler(bprm); |
94 | } | 94 | } |
95 | 95 | ||
96 | static struct linux_binfmt em86_format = { | 96 | static struct linux_binfmt em86_format = { |
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index e280352b28f9..b56371981d16 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -88,7 +88,7 @@ struct lib_info { | |||
88 | static int load_flat_shared_library(int id, struct lib_info *p); | 88 | static int load_flat_shared_library(int id, struct lib_info *p); |
89 | #endif | 89 | #endif |
90 | 90 | ||
91 | static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs); | 91 | static int load_flat_binary(struct linux_binprm *); |
92 | static int flat_core_dump(struct coredump_params *cprm); | 92 | static int flat_core_dump(struct coredump_params *cprm); |
93 | 93 | ||
94 | static struct linux_binfmt flat_format = { | 94 | static struct linux_binfmt flat_format = { |
@@ -858,9 +858,10 @@ out: | |||
858 | * libraries. There is no binary dependent code anywhere else. | 858 | * libraries. There is no binary dependent code anywhere else. |
859 | */ | 859 | */ |
860 | 860 | ||
861 | static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) | 861 | static int load_flat_binary(struct linux_binprm * bprm) |
862 | { | 862 | { |
863 | struct lib_info libinfo; | 863 | struct lib_info libinfo; |
864 | struct pt_regs *regs = current_pt_regs(); | ||
864 | unsigned long p = bprm->p; | 865 | unsigned long p = bprm->p; |
865 | unsigned long stack_len; | 866 | unsigned long stack_len; |
866 | unsigned long start_addr; | 867 | unsigned long start_addr; |
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 790b3cddca67..b0b70fbea06c 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c | |||
@@ -104,7 +104,7 @@ static Node *check_file(struct linux_binprm *bprm) | |||
104 | /* | 104 | /* |
105 | * the loader itself | 105 | * the loader itself |
106 | */ | 106 | */ |
107 | static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) | 107 | static int load_misc_binary(struct linux_binprm *bprm) |
108 | { | 108 | { |
109 | Node *fmt; | 109 | Node *fmt; |
110 | struct file * interp_file = NULL; | 110 | struct file * interp_file = NULL; |
@@ -199,7 +199,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
199 | 199 | ||
200 | bprm->recursion_depth++; | 200 | bprm->recursion_depth++; |
201 | 201 | ||
202 | retval = search_binary_handler (bprm, regs); | 202 | retval = search_binary_handler(bprm); |
203 | if (retval < 0) | 203 | if (retval < 0) |
204 | goto _error; | 204 | goto _error; |
205 | 205 | ||
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index d3b8c1f63155..8c954997e7f7 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c | |||
@@ -14,7 +14,7 @@ | |||
14 | #include <linux/err.h> | 14 | #include <linux/err.h> |
15 | #include <linux/fs.h> | 15 | #include <linux/fs.h> |
16 | 16 | ||
17 | static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) | 17 | static int load_script(struct linux_binprm *bprm) |
18 | { | 18 | { |
19 | const char *i_arg, *i_name; | 19 | const char *i_arg, *i_name; |
20 | char *cp; | 20 | char *cp; |
@@ -95,7 +95,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) | |||
95 | retval = prepare_binprm(bprm); | 95 | retval = prepare_binprm(bprm); |
96 | if (retval < 0) | 96 | if (retval < 0) |
97 | return retval; | 97 | return retval; |
98 | return search_binary_handler(bprm,regs); | 98 | return search_binary_handler(bprm); |
99 | } | 99 | } |
100 | 100 | ||
101 | static struct linux_binfmt script_format = { | 101 | static struct linux_binfmt script_format = { |
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 4517aaff61b4..4e00ed68d4a6 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c | |||
@@ -35,7 +35,7 @@ | |||
35 | 35 | ||
36 | #include <linux/elf.h> | 36 | #include <linux/elf.h> |
37 | 37 | ||
38 | static int load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs); | 38 | static int load_som_binary(struct linux_binprm * bprm); |
39 | static int load_som_library(struct file *); | 39 | static int load_som_library(struct file *); |
40 | 40 | ||
41 | /* | 41 | /* |
@@ -180,13 +180,14 @@ out: | |||
180 | */ | 180 | */ |
181 | 181 | ||
182 | static int | 182 | static int |
183 | load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) | 183 | load_som_binary(struct linux_binprm * bprm) |
184 | { | 184 | { |
185 | int retval; | 185 | int retval; |
186 | unsigned int size; | 186 | unsigned int size; |
187 | unsigned long som_entry; | 187 | unsigned long som_entry; |
188 | struct som_hdr *som_ex; | 188 | struct som_hdr *som_ex; |
189 | struct som_exec_auxhdr *hpuxhdr; | 189 | struct som_exec_auxhdr *hpuxhdr; |
190 | struct pt_regs *regs = current_pt_regs(); | ||
190 | 191 | ||
191 | /* Get the exec-header */ | 192 | /* Get the exec-header */ |
192 | som_ex = (struct som_hdr *) bprm->buf; | 193 | som_ex = (struct som_hdr *) bprm->buf; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 1a1e5e3b1eaf..ab3a456f6650 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -70,19 +70,6 @@ static void bdev_inode_switch_bdi(struct inode *inode, | |||
70 | spin_unlock(&dst->wb.list_lock); | 70 | spin_unlock(&dst->wb.list_lock); |
71 | } | 71 | } |
72 | 72 | ||
73 | sector_t blkdev_max_block(struct block_device *bdev) | ||
74 | { | ||
75 | sector_t retval = ~((sector_t)0); | ||
76 | loff_t sz = i_size_read(bdev->bd_inode); | ||
77 | |||
78 | if (sz) { | ||
79 | unsigned int size = block_size(bdev); | ||
80 | unsigned int sizebits = blksize_bits(size); | ||
81 | retval = (sz >> sizebits); | ||
82 | } | ||
83 | return retval; | ||
84 | } | ||
85 | |||
86 | /* Kill _all_ buffers and pagecache , dirty or not.. */ | 73 | /* Kill _all_ buffers and pagecache , dirty or not.. */ |
87 | void kill_bdev(struct block_device *bdev) | 74 | void kill_bdev(struct block_device *bdev) |
88 | { | 75 | { |
@@ -116,8 +103,6 @@ EXPORT_SYMBOL(invalidate_bdev); | |||
116 | 103 | ||
117 | int set_blocksize(struct block_device *bdev, int size) | 104 | int set_blocksize(struct block_device *bdev, int size) |
118 | { | 105 | { |
119 | struct address_space *mapping; | ||
120 | |||
121 | /* Size must be a power of two, and between 512 and PAGE_SIZE */ | 106 | /* Size must be a power of two, and between 512 and PAGE_SIZE */ |
122 | if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) | 107 | if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) |
123 | return -EINVAL; | 108 | return -EINVAL; |
@@ -126,19 +111,6 @@ int set_blocksize(struct block_device *bdev, int size) | |||
126 | if (size < bdev_logical_block_size(bdev)) | 111 | if (size < bdev_logical_block_size(bdev)) |
127 | return -EINVAL; | 112 | return -EINVAL; |
128 | 113 | ||
129 | /* Prevent starting I/O or mapping the device */ | ||
130 | percpu_down_write(&bdev->bd_block_size_semaphore); | ||
131 | |||
132 | /* Check that the block device is not memory mapped */ | ||
133 | mapping = bdev->bd_inode->i_mapping; | ||
134 | mutex_lock(&mapping->i_mmap_mutex); | ||
135 | if (mapping_mapped(mapping)) { | ||
136 | mutex_unlock(&mapping->i_mmap_mutex); | ||
137 | percpu_up_write(&bdev->bd_block_size_semaphore); | ||
138 | return -EBUSY; | ||
139 | } | ||
140 | mutex_unlock(&mapping->i_mmap_mutex); | ||
141 | |||
142 | /* Don't change the size if it is same as current */ | 114 | /* Don't change the size if it is same as current */ |
143 | if (bdev->bd_block_size != size) { | 115 | if (bdev->bd_block_size != size) { |
144 | sync_blockdev(bdev); | 116 | sync_blockdev(bdev); |
@@ -146,9 +118,6 @@ int set_blocksize(struct block_device *bdev, int size) | |||
146 | bdev->bd_inode->i_blkbits = blksize_bits(size); | 118 | bdev->bd_inode->i_blkbits = blksize_bits(size); |
147 | kill_bdev(bdev); | 119 | kill_bdev(bdev); |
148 | } | 120 | } |
149 | |||
150 | percpu_up_write(&bdev->bd_block_size_semaphore); | ||
151 | |||
152 | return 0; | 121 | return 0; |
153 | } | 122 | } |
154 | 123 | ||
@@ -181,52 +150,12 @@ static int | |||
181 | blkdev_get_block(struct inode *inode, sector_t iblock, | 150 | blkdev_get_block(struct inode *inode, sector_t iblock, |
182 | struct buffer_head *bh, int create) | 151 | struct buffer_head *bh, int create) |
183 | { | 152 | { |
184 | if (iblock >= blkdev_max_block(I_BDEV(inode))) { | ||
185 | if (create) | ||
186 | return -EIO; | ||
187 | |||
188 | /* | ||
189 | * for reads, we're just trying to fill a partial page. | ||
190 | * return a hole, they will have to call get_block again | ||
191 | * before they can fill it, and they will get -EIO at that | ||
192 | * time | ||
193 | */ | ||
194 | return 0; | ||
195 | } | ||
196 | bh->b_bdev = I_BDEV(inode); | 153 | bh->b_bdev = I_BDEV(inode); |
197 | bh->b_blocknr = iblock; | 154 | bh->b_blocknr = iblock; |
198 | set_buffer_mapped(bh); | 155 | set_buffer_mapped(bh); |
199 | return 0; | 156 | return 0; |
200 | } | 157 | } |
201 | 158 | ||
202 | static int | ||
203 | blkdev_get_blocks(struct inode *inode, sector_t iblock, | ||
204 | struct buffer_head *bh, int create) | ||
205 | { | ||
206 | sector_t end_block = blkdev_max_block(I_BDEV(inode)); | ||
207 | unsigned long max_blocks = bh->b_size >> inode->i_blkbits; | ||
208 | |||
209 | if ((iblock + max_blocks) > end_block) { | ||
210 | max_blocks = end_block - iblock; | ||
211 | if ((long)max_blocks <= 0) { | ||
212 | if (create) | ||
213 | return -EIO; /* write fully beyond EOF */ | ||
214 | /* | ||
215 | * It is a read which is fully beyond EOF. We return | ||
216 | * a !buffer_mapped buffer | ||
217 | */ | ||
218 | max_blocks = 0; | ||
219 | } | ||
220 | } | ||
221 | |||
222 | bh->b_bdev = I_BDEV(inode); | ||
223 | bh->b_blocknr = iblock; | ||
224 | bh->b_size = max_blocks << inode->i_blkbits; | ||
225 | if (max_blocks) | ||
226 | set_buffer_mapped(bh); | ||
227 | return 0; | ||
228 | } | ||
229 | |||
230 | static ssize_t | 159 | static ssize_t |
231 | blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | 160 | blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, |
232 | loff_t offset, unsigned long nr_segs) | 161 | loff_t offset, unsigned long nr_segs) |
@@ -235,7 +164,7 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
235 | struct inode *inode = file->f_mapping->host; | 164 | struct inode *inode = file->f_mapping->host; |
236 | 165 | ||
237 | return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset, | 166 | return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset, |
238 | nr_segs, blkdev_get_blocks, NULL, NULL, 0); | 167 | nr_segs, blkdev_get_block, NULL, NULL, 0); |
239 | } | 168 | } |
240 | 169 | ||
241 | int __sync_blockdev(struct block_device *bdev, int wait) | 170 | int __sync_blockdev(struct block_device *bdev, int wait) |
@@ -459,12 +388,6 @@ static struct inode *bdev_alloc_inode(struct super_block *sb) | |||
459 | struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); | 388 | struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); |
460 | if (!ei) | 389 | if (!ei) |
461 | return NULL; | 390 | return NULL; |
462 | |||
463 | if (unlikely(percpu_init_rwsem(&ei->bdev.bd_block_size_semaphore))) { | ||
464 | kmem_cache_free(bdev_cachep, ei); | ||
465 | return NULL; | ||
466 | } | ||
467 | |||
468 | return &ei->vfs_inode; | 391 | return &ei->vfs_inode; |
469 | } | 392 | } |
470 | 393 | ||
@@ -473,8 +396,6 @@ static void bdev_i_callback(struct rcu_head *head) | |||
473 | struct inode *inode = container_of(head, struct inode, i_rcu); | 396 | struct inode *inode = container_of(head, struct inode, i_rcu); |
474 | struct bdev_inode *bdi = BDEV_I(inode); | 397 | struct bdev_inode *bdi = BDEV_I(inode); |
475 | 398 | ||
476 | percpu_free_rwsem(&bdi->bdev.bd_block_size_semaphore); | ||
477 | |||
478 | kmem_cache_free(bdev_cachep, bdi); | 399 | kmem_cache_free(bdev_cachep, bdi); |
479 | } | 400 | } |
480 | 401 | ||
@@ -1593,22 +1514,6 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
1593 | return blkdev_ioctl(bdev, mode, cmd, arg); | 1514 | return blkdev_ioctl(bdev, mode, cmd, arg); |
1594 | } | 1515 | } |
1595 | 1516 | ||
1596 | ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, | ||
1597 | unsigned long nr_segs, loff_t pos) | ||
1598 | { | ||
1599 | ssize_t ret; | ||
1600 | struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); | ||
1601 | |||
1602 | percpu_down_read(&bdev->bd_block_size_semaphore); | ||
1603 | |||
1604 | ret = generic_file_aio_read(iocb, iov, nr_segs, pos); | ||
1605 | |||
1606 | percpu_up_read(&bdev->bd_block_size_semaphore); | ||
1607 | |||
1608 | return ret; | ||
1609 | } | ||
1610 | EXPORT_SYMBOL_GPL(blkdev_aio_read); | ||
1611 | |||
1612 | /* | 1517 | /* |
1613 | * Write data to the block device. Only intended for the block device itself | 1518 | * Write data to the block device. Only intended for the block device itself |
1614 | * and the raw driver which basically is a fake block device. | 1519 | * and the raw driver which basically is a fake block device. |
@@ -1620,16 +1525,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
1620 | unsigned long nr_segs, loff_t pos) | 1525 | unsigned long nr_segs, loff_t pos) |
1621 | { | 1526 | { |
1622 | struct file *file = iocb->ki_filp; | 1527 | struct file *file = iocb->ki_filp; |
1623 | struct block_device *bdev = I_BDEV(file->f_mapping->host); | ||
1624 | struct blk_plug plug; | 1528 | struct blk_plug plug; |
1625 | ssize_t ret; | 1529 | ssize_t ret; |
1626 | 1530 | ||
1627 | BUG_ON(iocb->ki_pos != pos); | 1531 | BUG_ON(iocb->ki_pos != pos); |
1628 | 1532 | ||
1629 | blk_start_plug(&plug); | 1533 | blk_start_plug(&plug); |
1630 | |||
1631 | percpu_down_read(&bdev->bd_block_size_semaphore); | ||
1632 | |||
1633 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); | 1534 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); |
1634 | if (ret > 0 || ret == -EIOCBQUEUED) { | 1535 | if (ret > 0 || ret == -EIOCBQUEUED) { |
1635 | ssize_t err; | 1536 | ssize_t err; |
@@ -1638,62 +1539,27 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
1638 | if (err < 0 && ret > 0) | 1539 | if (err < 0 && ret > 0) |
1639 | ret = err; | 1540 | ret = err; |
1640 | } | 1541 | } |
1641 | |||
1642 | percpu_up_read(&bdev->bd_block_size_semaphore); | ||
1643 | |||
1644 | blk_finish_plug(&plug); | 1542 | blk_finish_plug(&plug); |
1645 | |||
1646 | return ret; | 1543 | return ret; |
1647 | } | 1544 | } |
1648 | EXPORT_SYMBOL_GPL(blkdev_aio_write); | 1545 | EXPORT_SYMBOL_GPL(blkdev_aio_write); |
1649 | 1546 | ||
1650 | static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) | 1547 | static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, |
1651 | { | 1548 | unsigned long nr_segs, loff_t pos) |
1652 | int ret; | ||
1653 | struct block_device *bdev = I_BDEV(file->f_mapping->host); | ||
1654 | |||
1655 | percpu_down_read(&bdev->bd_block_size_semaphore); | ||
1656 | |||
1657 | ret = generic_file_mmap(file, vma); | ||
1658 | |||
1659 | percpu_up_read(&bdev->bd_block_size_semaphore); | ||
1660 | |||
1661 | return ret; | ||
1662 | } | ||
1663 | |||
1664 | static ssize_t blkdev_splice_read(struct file *file, loff_t *ppos, | ||
1665 | struct pipe_inode_info *pipe, size_t len, | ||
1666 | unsigned int flags) | ||
1667 | { | ||
1668 | ssize_t ret; | ||
1669 | struct block_device *bdev = I_BDEV(file->f_mapping->host); | ||
1670 | |||
1671 | percpu_down_read(&bdev->bd_block_size_semaphore); | ||
1672 | |||
1673 | ret = generic_file_splice_read(file, ppos, pipe, len, flags); | ||
1674 | |||
1675 | percpu_up_read(&bdev->bd_block_size_semaphore); | ||
1676 | |||
1677 | return ret; | ||
1678 | } | ||
1679 | |||
1680 | static ssize_t blkdev_splice_write(struct pipe_inode_info *pipe, | ||
1681 | struct file *file, loff_t *ppos, size_t len, | ||
1682 | unsigned int flags) | ||
1683 | { | 1549 | { |
1684 | ssize_t ret; | 1550 | struct file *file = iocb->ki_filp; |
1685 | struct block_device *bdev = I_BDEV(file->f_mapping->host); | 1551 | struct inode *bd_inode = file->f_mapping->host; |
1686 | 1552 | loff_t size = i_size_read(bd_inode); | |
1687 | percpu_down_read(&bdev->bd_block_size_semaphore); | ||
1688 | |||
1689 | ret = generic_file_splice_write(pipe, file, ppos, len, flags); | ||
1690 | 1553 | ||
1691 | percpu_up_read(&bdev->bd_block_size_semaphore); | 1554 | if (pos >= size) |
1555 | return 0; | ||
1692 | 1556 | ||
1693 | return ret; | 1557 | size -= pos; |
1558 | if (size < INT_MAX) | ||
1559 | nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size); | ||
1560 | return generic_file_aio_read(iocb, iov, nr_segs, pos); | ||
1694 | } | 1561 | } |
1695 | 1562 | ||
1696 | |||
1697 | /* | 1563 | /* |
1698 | * Try to release a page associated with block device when the system | 1564 | * Try to release a page associated with block device when the system |
1699 | * is under memory pressure. | 1565 | * is under memory pressure. |
@@ -1724,16 +1590,16 @@ const struct file_operations def_blk_fops = { | |||
1724 | .llseek = block_llseek, | 1590 | .llseek = block_llseek, |
1725 | .read = do_sync_read, | 1591 | .read = do_sync_read, |
1726 | .write = do_sync_write, | 1592 | .write = do_sync_write, |
1727 | .aio_read = blkdev_aio_read, | 1593 | .aio_read = blkdev_aio_read, |
1728 | .aio_write = blkdev_aio_write, | 1594 | .aio_write = blkdev_aio_write, |
1729 | .mmap = blkdev_mmap, | 1595 | .mmap = generic_file_mmap, |
1730 | .fsync = blkdev_fsync, | 1596 | .fsync = blkdev_fsync, |
1731 | .unlocked_ioctl = block_ioctl, | 1597 | .unlocked_ioctl = block_ioctl, |
1732 | #ifdef CONFIG_COMPAT | 1598 | #ifdef CONFIG_COMPAT |
1733 | .compat_ioctl = compat_blkdev_ioctl, | 1599 | .compat_ioctl = compat_blkdev_ioctl, |
1734 | #endif | 1600 | #endif |
1735 | .splice_read = blkdev_splice_read, | 1601 | .splice_read = generic_file_splice_read, |
1736 | .splice_write = blkdev_splice_write, | 1602 | .splice_write = generic_file_splice_write, |
1737 | }; | 1603 | }; |
1738 | 1604 | ||
1739 | int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) | 1605 | int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c72ead869507..596617ecd329 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -413,7 +413,7 @@ struct btrfs_root_backup { | |||
413 | __le64 bytes_used; | 413 | __le64 bytes_used; |
414 | __le64 num_devices; | 414 | __le64 num_devices; |
415 | /* future */ | 415 | /* future */ |
416 | __le64 unsed_64[4]; | 416 | __le64 unused_64[4]; |
417 | 417 | ||
418 | u8 tree_root_level; | 418 | u8 tree_root_level; |
419 | u8 chunk_root_level; | 419 | u8 chunk_root_level; |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7cda51995c1e..22a0439e5a86 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -3416,8 +3416,8 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
3416 | num_dirty = root->fs_info->dirty_metadata_bytes; | 3416 | num_dirty = root->fs_info->dirty_metadata_bytes; |
3417 | 3417 | ||
3418 | if (num_dirty > thresh) { | 3418 | if (num_dirty > thresh) { |
3419 | balance_dirty_pages_ratelimited_nr( | 3419 | balance_dirty_pages_ratelimited( |
3420 | root->fs_info->btree_inode->i_mapping, 1); | 3420 | root->fs_info->btree_inode->i_mapping); |
3421 | } | 3421 | } |
3422 | return; | 3422 | return; |
3423 | } | 3423 | } |
@@ -3437,8 +3437,8 @@ void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
3437 | num_dirty = root->fs_info->dirty_metadata_bytes; | 3437 | num_dirty = root->fs_info->dirty_metadata_bytes; |
3438 | 3438 | ||
3439 | if (num_dirty > thresh) { | 3439 | if (num_dirty > thresh) { |
3440 | balance_dirty_pages_ratelimited_nr( | 3440 | balance_dirty_pages_ratelimited( |
3441 | root->fs_info->btree_inode->i_mapping, 1); | 3441 | root->fs_info->btree_inode->i_mapping); |
3442 | } | 3442 | } |
3443 | return; | 3443 | return; |
3444 | } | 3444 | } |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3d3e2c17d8d1..06b2635073f3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3888,7 +3888,7 @@ static int flush_space(struct btrfs_root *root, | |||
3888 | * @root - the root we're allocating for | 3888 | * @root - the root we're allocating for |
3889 | * @block_rsv - the block_rsv we're allocating for | 3889 | * @block_rsv - the block_rsv we're allocating for |
3890 | * @orig_bytes - the number of bytes we want | 3890 | * @orig_bytes - the number of bytes we want |
3891 | * @flush - wether or not we can flush to make our reservation | 3891 | * @flush - whether or not we can flush to make our reservation |
3892 | * | 3892 | * |
3893 | * This will reserve orgi_bytes number of bytes from the space info associated | 3893 | * This will reserve orgi_bytes number of bytes from the space info associated |
3894 | * with the block_rsv. If there is not enough space it will make an attempt to | 3894 | * with the block_rsv. If there is not enough space it will make an attempt to |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index b8cbc8d5c7f7..ce9f79216723 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -234,12 +234,11 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
234 | } | 234 | } |
235 | 235 | ||
236 | /** | 236 | /** |
237 | * unpint_extent_cache - unpin an extent from the cache | 237 | * unpin_extent_cache - unpin an extent from the cache |
238 | * @tree: tree to unpin the extent in | 238 | * @tree: tree to unpin the extent in |
239 | * @start: logical offset in the file | 239 | * @start: logical offset in the file |
240 | * @len: length of the extent | 240 | * @len: length of the extent |
241 | * @gen: generation that this extent has been modified in | 241 | * @gen: generation that this extent has been modified in |
242 | * @prealloc: if this is set we need to clear the prealloc flag | ||
243 | * | 242 | * |
244 | * Called after an extent has been written to disk properly. Set the generation | 243 | * Called after an extent has been written to disk properly. Set the generation |
245 | * to the generation that actually added the file item to the inode so we know | 244 | * to the generation that actually added the file item to the inode so we know |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9ab1bed88116..a8ee75cb96ee 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1346,8 +1346,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1346 | 1346 | ||
1347 | cond_resched(); | 1347 | cond_resched(); |
1348 | 1348 | ||
1349 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1349 | balance_dirty_pages_ratelimited(inode->i_mapping); |
1350 | dirty_pages); | ||
1351 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | 1350 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) |
1352 | btrfs_btree_balance_dirty(root, 1); | 1351 | btrfs_btree_balance_dirty(root, 1); |
1353 | 1352 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8fcf9a59c28d..5b3429ab8ec1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -1225,7 +1225,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1225 | } | 1225 | } |
1226 | 1226 | ||
1227 | defrag_count += ret; | 1227 | defrag_count += ret; |
1228 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); | 1228 | balance_dirty_pages_ratelimited(inode->i_mapping); |
1229 | mutex_unlock(&inode->i_mutex); | 1229 | mutex_unlock(&inode->i_mutex); |
1230 | 1230 | ||
1231 | if (newer_than) { | 1231 | if (newer_than) { |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index dd27a0b46a37..853fc7beedfa 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -76,7 +76,7 @@ struct btrfs_ordered_sum { | |||
76 | 76 | ||
77 | #define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */ | 77 | #define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */ |
78 | 78 | ||
79 | #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent | 79 | #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent |
80 | * has done its due diligence in updating | 80 | * has done its due diligence in updating |
81 | * the isize. */ | 81 | * the isize. */ |
82 | 82 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0f5ebb72a5ea..e3c6ee3cc2ba 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -4294,7 +4294,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
4294 | 4294 | ||
4295 | rcu_read_lock(); | 4295 | rcu_read_lock(); |
4296 | name = rcu_dereference(dev->name); | 4296 | name = rcu_dereference(dev->name); |
4297 | pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu " | 4297 | pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu " |
4298 | "(%s id %llu), size=%u\n", rw, | 4298 | "(%s id %llu), size=%u\n", rw, |
4299 | (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, | 4299 | (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, |
4300 | name->str, dev->devid, bio->bi_size); | 4300 | name->str, dev->devid, bio->bi_size); |
diff --git a/fs/buffer.c b/fs/buffer.c index b5f044283edb..c017a2dfb909 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -46,8 +46,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); | |||
46 | 46 | ||
47 | #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) | 47 | #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) |
48 | 48 | ||
49 | inline void | 49 | void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) |
50 | init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) | ||
51 | { | 50 | { |
52 | bh->b_end_io = handler; | 51 | bh->b_end_io = handler; |
53 | bh->b_private = private; | 52 | bh->b_private = private; |
@@ -555,7 +554,7 @@ void emergency_thaw_all(void) | |||
555 | */ | 554 | */ |
556 | int sync_mapping_buffers(struct address_space *mapping) | 555 | int sync_mapping_buffers(struct address_space *mapping) |
557 | { | 556 | { |
558 | struct address_space *buffer_mapping = mapping->assoc_mapping; | 557 | struct address_space *buffer_mapping = mapping->private_data; |
559 | 558 | ||
560 | if (buffer_mapping == NULL || list_empty(&mapping->private_list)) | 559 | if (buffer_mapping == NULL || list_empty(&mapping->private_list)) |
561 | return 0; | 560 | return 0; |
@@ -588,10 +587,10 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) | |||
588 | struct address_space *buffer_mapping = bh->b_page->mapping; | 587 | struct address_space *buffer_mapping = bh->b_page->mapping; |
589 | 588 | ||
590 | mark_buffer_dirty(bh); | 589 | mark_buffer_dirty(bh); |
591 | if (!mapping->assoc_mapping) { | 590 | if (!mapping->private_data) { |
592 | mapping->assoc_mapping = buffer_mapping; | 591 | mapping->private_data = buffer_mapping; |
593 | } else { | 592 | } else { |
594 | BUG_ON(mapping->assoc_mapping != buffer_mapping); | 593 | BUG_ON(mapping->private_data != buffer_mapping); |
595 | } | 594 | } |
596 | if (!bh->b_assoc_map) { | 595 | if (!bh->b_assoc_map) { |
597 | spin_lock(&buffer_mapping->private_lock); | 596 | spin_lock(&buffer_mapping->private_lock); |
@@ -788,7 +787,7 @@ void invalidate_inode_buffers(struct inode *inode) | |||
788 | if (inode_has_buffers(inode)) { | 787 | if (inode_has_buffers(inode)) { |
789 | struct address_space *mapping = &inode->i_data; | 788 | struct address_space *mapping = &inode->i_data; |
790 | struct list_head *list = &mapping->private_list; | 789 | struct list_head *list = &mapping->private_list; |
791 | struct address_space *buffer_mapping = mapping->assoc_mapping; | 790 | struct address_space *buffer_mapping = mapping->private_data; |
792 | 791 | ||
793 | spin_lock(&buffer_mapping->private_lock); | 792 | spin_lock(&buffer_mapping->private_lock); |
794 | while (!list_empty(list)) | 793 | while (!list_empty(list)) |
@@ -811,7 +810,7 @@ int remove_inode_buffers(struct inode *inode) | |||
811 | if (inode_has_buffers(inode)) { | 810 | if (inode_has_buffers(inode)) { |
812 | struct address_space *mapping = &inode->i_data; | 811 | struct address_space *mapping = &inode->i_data; |
813 | struct list_head *list = &mapping->private_list; | 812 | struct list_head *list = &mapping->private_list; |
814 | struct address_space *buffer_mapping = mapping->assoc_mapping; | 813 | struct address_space *buffer_mapping = mapping->private_data; |
815 | 814 | ||
816 | spin_lock(&buffer_mapping->private_lock); | 815 | spin_lock(&buffer_mapping->private_lock); |
817 | while (!list_empty(list)) { | 816 | while (!list_empty(list)) { |
@@ -850,13 +849,10 @@ try_again: | |||
850 | if (!bh) | 849 | if (!bh) |
851 | goto no_grow; | 850 | goto no_grow; |
852 | 851 | ||
853 | bh->b_bdev = NULL; | ||
854 | bh->b_this_page = head; | 852 | bh->b_this_page = head; |
855 | bh->b_blocknr = -1; | 853 | bh->b_blocknr = -1; |
856 | head = bh; | 854 | head = bh; |
857 | 855 | ||
858 | bh->b_state = 0; | ||
859 | atomic_set(&bh->b_count, 0); | ||
860 | bh->b_size = size; | 856 | bh->b_size = size; |
861 | 857 | ||
862 | /* Link the buffer to its page */ | 858 | /* Link the buffer to its page */ |
@@ -911,6 +907,18 @@ link_dev_buffers(struct page *page, struct buffer_head *head) | |||
911 | attach_page_buffers(page, head); | 907 | attach_page_buffers(page, head); |
912 | } | 908 | } |
913 | 909 | ||
910 | static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size) | ||
911 | { | ||
912 | sector_t retval = ~((sector_t)0); | ||
913 | loff_t sz = i_size_read(bdev->bd_inode); | ||
914 | |||
915 | if (sz) { | ||
916 | unsigned int sizebits = blksize_bits(size); | ||
917 | retval = (sz >> sizebits); | ||
918 | } | ||
919 | return retval; | ||
920 | } | ||
921 | |||
914 | /* | 922 | /* |
915 | * Initialise the state of a blockdev page's buffers. | 923 | * Initialise the state of a blockdev page's buffers. |
916 | */ | 924 | */ |
@@ -921,7 +929,7 @@ init_page_buffers(struct page *page, struct block_device *bdev, | |||
921 | struct buffer_head *head = page_buffers(page); | 929 | struct buffer_head *head = page_buffers(page); |
922 | struct buffer_head *bh = head; | 930 | struct buffer_head *bh = head; |
923 | int uptodate = PageUptodate(page); | 931 | int uptodate = PageUptodate(page); |
924 | sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode)); | 932 | sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size); |
925 | 933 | ||
926 | do { | 934 | do { |
927 | if (!buffer_mapped(bh)) { | 935 | if (!buffer_mapped(bh)) { |
@@ -1553,6 +1561,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block) | |||
1553 | EXPORT_SYMBOL(unmap_underlying_metadata); | 1561 | EXPORT_SYMBOL(unmap_underlying_metadata); |
1554 | 1562 | ||
1555 | /* | 1563 | /* |
1564 | * Size is a power-of-two in the range 512..PAGE_SIZE, | ||
1565 | * and the case we care about most is PAGE_SIZE. | ||
1566 | * | ||
1567 | * So this *could* possibly be written with those | ||
1568 | * constraints in mind (relevant mostly if some | ||
1569 | * architecture has a slow bit-scan instruction) | ||
1570 | */ | ||
1571 | static inline int block_size_bits(unsigned int blocksize) | ||
1572 | { | ||
1573 | return ilog2(blocksize); | ||
1574 | } | ||
1575 | |||
1576 | static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state) | ||
1577 | { | ||
1578 | BUG_ON(!PageLocked(page)); | ||
1579 | |||
1580 | if (!page_has_buffers(page)) | ||
1581 | create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state); | ||
1582 | return page_buffers(page); | ||
1583 | } | ||
1584 | |||
1585 | /* | ||
1556 | * NOTE! All mapped/uptodate combinations are valid: | 1586 | * NOTE! All mapped/uptodate combinations are valid: |
1557 | * | 1587 | * |
1558 | * Mapped Uptodate Meaning | 1588 | * Mapped Uptodate Meaning |
@@ -1589,19 +1619,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1589 | sector_t block; | 1619 | sector_t block; |
1590 | sector_t last_block; | 1620 | sector_t last_block; |
1591 | struct buffer_head *bh, *head; | 1621 | struct buffer_head *bh, *head; |
1592 | const unsigned blocksize = 1 << inode->i_blkbits; | 1622 | unsigned int blocksize, bbits; |
1593 | int nr_underway = 0; | 1623 | int nr_underway = 0; |
1594 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? | 1624 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? |
1595 | WRITE_SYNC : WRITE); | 1625 | WRITE_SYNC : WRITE); |
1596 | 1626 | ||
1597 | BUG_ON(!PageLocked(page)); | 1627 | head = create_page_buffers(page, inode, |
1598 | |||
1599 | last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; | ||
1600 | |||
1601 | if (!page_has_buffers(page)) { | ||
1602 | create_empty_buffers(page, blocksize, | ||
1603 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 1628 | (1 << BH_Dirty)|(1 << BH_Uptodate)); |
1604 | } | ||
1605 | 1629 | ||
1606 | /* | 1630 | /* |
1607 | * Be very careful. We have no exclusion from __set_page_dirty_buffers | 1631 | * Be very careful. We have no exclusion from __set_page_dirty_buffers |
@@ -1613,9 +1637,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1613 | * handle that here by just cleaning them. | 1637 | * handle that here by just cleaning them. |
1614 | */ | 1638 | */ |
1615 | 1639 | ||
1616 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1617 | head = page_buffers(page); | ||
1618 | bh = head; | 1640 | bh = head; |
1641 | blocksize = bh->b_size; | ||
1642 | bbits = block_size_bits(blocksize); | ||
1643 | |||
1644 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); | ||
1645 | last_block = (i_size_read(inode) - 1) >> bbits; | ||
1619 | 1646 | ||
1620 | /* | 1647 | /* |
1621 | * Get all the dirty buffers mapped to disk addresses and | 1648 | * Get all the dirty buffers mapped to disk addresses and |
@@ -1806,12 +1833,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len, | |||
1806 | BUG_ON(to > PAGE_CACHE_SIZE); | 1833 | BUG_ON(to > PAGE_CACHE_SIZE); |
1807 | BUG_ON(from > to); | 1834 | BUG_ON(from > to); |
1808 | 1835 | ||
1809 | blocksize = 1 << inode->i_blkbits; | 1836 | head = create_page_buffers(page, inode, 0); |
1810 | if (!page_has_buffers(page)) | 1837 | blocksize = head->b_size; |
1811 | create_empty_buffers(page, blocksize, 0); | 1838 | bbits = block_size_bits(blocksize); |
1812 | head = page_buffers(page); | ||
1813 | 1839 | ||
1814 | bbits = inode->i_blkbits; | ||
1815 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); | 1840 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); |
1816 | 1841 | ||
1817 | for(bh = head, block_start = 0; bh != head || !block_start; | 1842 | for(bh = head, block_start = 0; bh != head || !block_start; |
@@ -1881,11 +1906,11 @@ static int __block_commit_write(struct inode *inode, struct page *page, | |||
1881 | unsigned blocksize; | 1906 | unsigned blocksize; |
1882 | struct buffer_head *bh, *head; | 1907 | struct buffer_head *bh, *head; |
1883 | 1908 | ||
1884 | blocksize = 1 << inode->i_blkbits; | 1909 | bh = head = page_buffers(page); |
1910 | blocksize = bh->b_size; | ||
1885 | 1911 | ||
1886 | for(bh = head = page_buffers(page), block_start = 0; | 1912 | block_start = 0; |
1887 | bh != head || !block_start; | 1913 | do { |
1888 | block_start=block_end, bh = bh->b_this_page) { | ||
1889 | block_end = block_start + blocksize; | 1914 | block_end = block_start + blocksize; |
1890 | if (block_end <= from || block_start >= to) { | 1915 | if (block_end <= from || block_start >= to) { |
1891 | if (!buffer_uptodate(bh)) | 1916 | if (!buffer_uptodate(bh)) |
@@ -1895,7 +1920,10 @@ static int __block_commit_write(struct inode *inode, struct page *page, | |||
1895 | mark_buffer_dirty(bh); | 1920 | mark_buffer_dirty(bh); |
1896 | } | 1921 | } |
1897 | clear_buffer_new(bh); | 1922 | clear_buffer_new(bh); |
1898 | } | 1923 | |
1924 | block_start = block_end; | ||
1925 | bh = bh->b_this_page; | ||
1926 | } while (bh != head); | ||
1899 | 1927 | ||
1900 | /* | 1928 | /* |
1901 | * If this is a partial write which happened to make all buffers | 1929 | * If this is a partial write which happened to make all buffers |
@@ -2020,7 +2048,6 @@ EXPORT_SYMBOL(generic_write_end); | |||
2020 | int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, | 2048 | int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, |
2021 | unsigned long from) | 2049 | unsigned long from) |
2022 | { | 2050 | { |
2023 | struct inode *inode = page->mapping->host; | ||
2024 | unsigned block_start, block_end, blocksize; | 2051 | unsigned block_start, block_end, blocksize; |
2025 | unsigned to; | 2052 | unsigned to; |
2026 | struct buffer_head *bh, *head; | 2053 | struct buffer_head *bh, *head; |
@@ -2029,13 +2056,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, | |||
2029 | if (!page_has_buffers(page)) | 2056 | if (!page_has_buffers(page)) |
2030 | return 0; | 2057 | return 0; |
2031 | 2058 | ||
2032 | blocksize = 1 << inode->i_blkbits; | 2059 | head = page_buffers(page); |
2060 | blocksize = head->b_size; | ||
2033 | to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); | 2061 | to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); |
2034 | to = from + to; | 2062 | to = from + to; |
2035 | if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) | 2063 | if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) |
2036 | return 0; | 2064 | return 0; |
2037 | 2065 | ||
2038 | head = page_buffers(page); | ||
2039 | bh = head; | 2066 | bh = head; |
2040 | block_start = 0; | 2067 | block_start = 0; |
2041 | do { | 2068 | do { |
@@ -2068,18 +2095,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block) | |||
2068 | struct inode *inode = page->mapping->host; | 2095 | struct inode *inode = page->mapping->host; |
2069 | sector_t iblock, lblock; | 2096 | sector_t iblock, lblock; |
2070 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; | 2097 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; |
2071 | unsigned int blocksize; | 2098 | unsigned int blocksize, bbits; |
2072 | int nr, i; | 2099 | int nr, i; |
2073 | int fully_mapped = 1; | 2100 | int fully_mapped = 1; |
2074 | 2101 | ||
2075 | BUG_ON(!PageLocked(page)); | 2102 | head = create_page_buffers(page, inode, 0); |
2076 | blocksize = 1 << inode->i_blkbits; | 2103 | blocksize = head->b_size; |
2077 | if (!page_has_buffers(page)) | 2104 | bbits = block_size_bits(blocksize); |
2078 | create_empty_buffers(page, blocksize, 0); | ||
2079 | head = page_buffers(page); | ||
2080 | 2105 | ||
2081 | iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2106 | iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); |
2082 | lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; | 2107 | lblock = (i_size_read(inode)+blocksize-1) >> bbits; |
2083 | bh = head; | 2108 | bh = head; |
2084 | nr = 0; | 2109 | nr = 0; |
2085 | i = 0; | 2110 | i = 0; |
@@ -2864,6 +2889,55 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) | |||
2864 | bio_put(bio); | 2889 | bio_put(bio); |
2865 | } | 2890 | } |
2866 | 2891 | ||
2892 | /* | ||
2893 | * This allows us to do IO even on the odd last sectors | ||
2894 | * of a device, even if the bh block size is some multiple | ||
2895 | * of the physical sector size. | ||
2896 | * | ||
2897 | * We'll just truncate the bio to the size of the device, | ||
2898 | * and clear the end of the buffer head manually. | ||
2899 | * | ||
2900 | * Truly out-of-range accesses will turn into actual IO | ||
2901 | * errors, this only handles the "we need to be able to | ||
2902 | * do IO at the final sector" case. | ||
2903 | */ | ||
2904 | static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh) | ||
2905 | { | ||
2906 | sector_t maxsector; | ||
2907 | unsigned bytes; | ||
2908 | |||
2909 | maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; | ||
2910 | if (!maxsector) | ||
2911 | return; | ||
2912 | |||
2913 | /* | ||
2914 | * If the *whole* IO is past the end of the device, | ||
2915 | * let it through, and the IO layer will turn it into | ||
2916 | * an EIO. | ||
2917 | */ | ||
2918 | if (unlikely(bio->bi_sector >= maxsector)) | ||
2919 | return; | ||
2920 | |||
2921 | maxsector -= bio->bi_sector; | ||
2922 | bytes = bio->bi_size; | ||
2923 | if (likely((bytes >> 9) <= maxsector)) | ||
2924 | return; | ||
2925 | |||
2926 | /* Uhhuh. We've got a bh that straddles the device size! */ | ||
2927 | bytes = maxsector << 9; | ||
2928 | |||
2929 | /* Truncate the bio.. */ | ||
2930 | bio->bi_size = bytes; | ||
2931 | bio->bi_io_vec[0].bv_len = bytes; | ||
2932 | |||
2933 | /* ..and clear the end of the buffer for reads */ | ||
2934 | if ((rw & RW_MASK) == READ) { | ||
2935 | void *kaddr = kmap_atomic(bh->b_page); | ||
2936 | memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes); | ||
2937 | kunmap_atomic(kaddr); | ||
2938 | } | ||
2939 | } | ||
2940 | |||
2867 | int submit_bh(int rw, struct buffer_head * bh) | 2941 | int submit_bh(int rw, struct buffer_head * bh) |
2868 | { | 2942 | { |
2869 | struct bio *bio; | 2943 | struct bio *bio; |
@@ -2900,6 +2974,9 @@ int submit_bh(int rw, struct buffer_head * bh) | |||
2900 | bio->bi_end_io = end_bio_bh_io_sync; | 2974 | bio->bi_end_io = end_bio_bh_io_sync; |
2901 | bio->bi_private = bh; | 2975 | bio->bi_private = bh; |
2902 | 2976 | ||
2977 | /* Take care of bh's that straddle the end of the device */ | ||
2978 | guard_bh_eod(rw, bio, bh); | ||
2979 | |||
2903 | bio_get(bio); | 2980 | bio_get(bio); |
2904 | submit_bio(rw, bio); | 2981 | submit_bio(rw, bio); |
2905 | 2982 | ||
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 2075ddfffa73..21ff76c22a17 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig | |||
@@ -122,9 +122,17 @@ config CIFS_ACL | |||
122 | Allows fetching CIFS/NTFS ACL from the server. The DACL blob | 122 | Allows fetching CIFS/NTFS ACL from the server. The DACL blob |
123 | is handed over to the application/caller. | 123 | is handed over to the application/caller. |
124 | 124 | ||
125 | config CIFS_DEBUG | ||
126 | bool "Enable CIFS debugging routines" | ||
127 | default y | ||
128 | depends on CIFS | ||
129 | help | ||
130 | Enabling this option adds helpful debugging messages to | ||
131 | the cifs code which increases the size of the cifs module. | ||
132 | If unsure, say Y. | ||
125 | config CIFS_DEBUG2 | 133 | config CIFS_DEBUG2 |
126 | bool "Enable additional CIFS debugging routines" | 134 | bool "Enable additional CIFS debugging routines" |
127 | depends on CIFS | 135 | depends on CIFS_DEBUG |
128 | help | 136 | help |
129 | Enabling this option adds a few more debugging routines | 137 | Enabling this option adds a few more debugging routines |
130 | to the cifs code which slightly increases the size of | 138 | to the cifs code which slightly increases the size of |
diff --git a/fs/cifs/README b/fs/cifs/README index 22ab7b5b8da7..2d5622f60e11 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -480,7 +480,7 @@ A partial list of the supported mount options follows: | |||
480 | Unicode on the wire. | 480 | Unicode on the wire. |
481 | nomapchars Do not translate any of these seven characters (default). | 481 | nomapchars Do not translate any of these seven characters (default). |
482 | nocase Request case insensitive path name matching (case | 482 | nocase Request case insensitive path name matching (case |
483 | sensitive is the default if the server suports it). | 483 | sensitive is the default if the server supports it). |
484 | (mount option "ignorecase" is identical to "nocase") | 484 | (mount option "ignorecase" is identical to "nocase") |
485 | posixpaths If CIFS Unix extensions are supported, attempt to | 485 | posixpaths If CIFS Unix extensions are supported, attempt to |
486 | negotiate posix path name support which allows certain | 486 | negotiate posix path name support which allows certain |
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h index c0c68bb492d7..86e92ef2abc1 100644 --- a/fs/cifs/cifs_debug.h +++ b/fs/cifs/cifs_debug.h | |||
@@ -18,7 +18,6 @@ | |||
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 | * | 19 | * |
20 | */ | 20 | */ |
21 | #define CIFS_DEBUG /* BB temporary */ | ||
22 | 21 | ||
23 | #ifndef _H_CIFS_DEBUG | 22 | #ifndef _H_CIFS_DEBUG |
24 | #define _H_CIFS_DEBUG | 23 | #define _H_CIFS_DEBUG |
@@ -37,49 +36,43 @@ void dump_smb(void *, int); | |||
37 | #define CIFS_RC 0x02 | 36 | #define CIFS_RC 0x02 |
38 | #define CIFS_TIMER 0x04 | 37 | #define CIFS_TIMER 0x04 |
39 | 38 | ||
39 | extern int cifsFYI; | ||
40 | extern int cifsERROR; | ||
41 | |||
40 | /* | 42 | /* |
41 | * debug ON | 43 | * debug ON |
42 | * -------- | 44 | * -------- |
43 | */ | 45 | */ |
44 | #ifdef CIFS_DEBUG | 46 | #ifdef CONFIG_CIFS_DEBUG |
45 | 47 | ||
46 | /* information message: e.g., configuration, major event */ | 48 | /* information message: e.g., configuration, major event */ |
47 | extern int cifsFYI; | 49 | #define cifsfyi(fmt, ...) \ |
48 | #define cifsfyi(fmt, arg...) \ | ||
49 | do { \ | 50 | do { \ |
50 | if (cifsFYI & CIFS_INFO) \ | 51 | if (cifsFYI & CIFS_INFO) \ |
51 | printk(KERN_DEBUG "%s: " fmt "\n", __FILE__, ##arg); \ | 52 | printk(KERN_DEBUG "%s: " fmt "\n", \ |
53 | __FILE__, ##__VA_ARGS__); \ | ||
52 | } while (0) | 54 | } while (0) |
53 | 55 | ||
54 | #define cFYI(set, fmt, arg...) \ | 56 | #define cFYI(set, fmt, ...) \ |
55 | do { \ | 57 | do { \ |
56 | if (set) \ | 58 | if (set) \ |
57 | cifsfyi(fmt, ##arg); \ | 59 | cifsfyi(fmt, ##__VA_ARGS__); \ |
58 | } while (0) | 60 | } while (0) |
59 | 61 | ||
60 | #define cifswarn(fmt, arg...) \ | 62 | #define cifswarn(fmt, ...) \ |
61 | printk(KERN_WARNING fmt "\n", ##arg) | 63 | printk(KERN_WARNING fmt "\n", ##__VA_ARGS__) |
62 | 64 | ||
63 | /* debug event message: */ | 65 | /* error event message: e.g., i/o error */ |
64 | extern int cifsERROR; | 66 | #define cifserror(fmt, ...) \ |
65 | |||
66 | #define cEVENT(fmt, arg...) \ | ||
67 | do { \ | 67 | do { \ |
68 | if (cifsERROR) \ | 68 | if (cifsERROR) \ |
69 | printk(KERN_EVENT "%s: " fmt "\n", __FILE__, ##arg); \ | 69 | printk(KERN_ERR "CIFS VFS: " fmt "\n", ##__VA_ARGS__); \ |
70 | } while (0) | ||
71 | |||
72 | /* error event message: e.g., i/o error */ | ||
73 | #define cifserror(fmt, arg...) \ | ||
74 | do { \ | ||
75 | if (cifsERROR) \ | ||
76 | printk(KERN_ERR "CIFS VFS: " fmt "\n", ##arg); \ | ||
77 | } while (0) | 70 | } while (0) |
78 | 71 | ||
79 | #define cERROR(set, fmt, arg...) \ | 72 | #define cERROR(set, fmt, ...) \ |
80 | do { \ | 73 | do { \ |
81 | if (set) \ | 74 | if (set) \ |
82 | cifserror(fmt, ##arg); \ | 75 | cifserror(fmt, ##__VA_ARGS__); \ |
83 | } while (0) | 76 | } while (0) |
84 | 77 | ||
85 | /* | 78 | /* |
@@ -87,10 +80,27 @@ do { \ | |||
87 | * --------- | 80 | * --------- |
88 | */ | 81 | */ |
89 | #else /* _CIFS_DEBUG */ | 82 | #else /* _CIFS_DEBUG */ |
90 | #define cERROR(set, fmt, arg...) | 83 | #define cifsfyi(fmt, ...) \ |
91 | #define cEVENT(fmt, arg...) | 84 | do { \ |
92 | #define cFYI(set, fmt, arg...) | 85 | if (0) \ |
93 | #define cifserror(fmt, arg...) | 86 | printk(KERN_DEBUG "%s: " fmt "\n", \ |
87 | __FILE__, ##__VA_ARGS__); \ | ||
88 | } while (0) | ||
89 | #define cFYI(set, fmt, ...) \ | ||
90 | do { \ | ||
91 | if (0 && set) \ | ||
92 | cifsfyi(fmt, ##__VA_ARGS__); \ | ||
93 | } while (0) | ||
94 | #define cifserror(fmt, ...) \ | ||
95 | do { \ | ||
96 | if (0) \ | ||
97 | printk(KERN_ERR "CIFS VFS: " fmt "\n", ##__VA_ARGS__); \ | ||
98 | } while (0) | ||
99 | #define cERROR(set, fmt, ...) \ | ||
100 | do { \ | ||
101 | if (0 && set) \ | ||
102 | cifserror(fmt, ##__VA_ARGS__); \ | ||
103 | } while (0) | ||
94 | #endif /* _CIFS_DEBUG */ | 104 | #endif /* _CIFS_DEBUG */ |
95 | 105 | ||
96 | #endif /* _H_CIFS_DEBUG */ | 106 | #endif /* _H_CIFS_DEBUG */ |
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 0fb15bbbe43c..75c1ee699143 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
@@ -42,135 +42,27 @@ static const struct cifs_sid sid_authusers = { | |||
42 | /* group users */ | 42 | /* group users */ |
43 | static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} }; | 43 | static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} }; |
44 | 44 | ||
45 | const struct cred *root_cred; | 45 | static const struct cred *root_cred; |
46 | |||
47 | static void | ||
48 | shrink_idmap_tree(struct rb_root *root, int nr_to_scan, int *nr_rem, | ||
49 | int *nr_del) | ||
50 | { | ||
51 | struct rb_node *node; | ||
52 | struct rb_node *tmp; | ||
53 | struct cifs_sid_id *psidid; | ||
54 | |||
55 | node = rb_first(root); | ||
56 | while (node) { | ||
57 | tmp = node; | ||
58 | node = rb_next(tmp); | ||
59 | psidid = rb_entry(tmp, struct cifs_sid_id, rbnode); | ||
60 | if (nr_to_scan == 0 || *nr_del == nr_to_scan) | ||
61 | ++(*nr_rem); | ||
62 | else { | ||
63 | if (time_after(jiffies, psidid->time + SID_MAP_EXPIRE) | ||
64 | && psidid->refcount == 0) { | ||
65 | rb_erase(tmp, root); | ||
66 | ++(*nr_del); | ||
67 | } else | ||
68 | ++(*nr_rem); | ||
69 | } | ||
70 | } | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * Run idmap cache shrinker. | ||
75 | */ | ||
76 | static int | ||
77 | cifs_idmap_shrinker(struct shrinker *shrink, struct shrink_control *sc) | ||
78 | { | ||
79 | int nr_to_scan = sc->nr_to_scan; | ||
80 | int nr_del = 0; | ||
81 | int nr_rem = 0; | ||
82 | struct rb_root *root; | ||
83 | |||
84 | root = &uidtree; | ||
85 | spin_lock(&siduidlock); | ||
86 | shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del); | ||
87 | spin_unlock(&siduidlock); | ||
88 | |||
89 | root = &gidtree; | ||
90 | spin_lock(&sidgidlock); | ||
91 | shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del); | ||
92 | spin_unlock(&sidgidlock); | ||
93 | |||
94 | root = &siduidtree; | ||
95 | spin_lock(&uidsidlock); | ||
96 | shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del); | ||
97 | spin_unlock(&uidsidlock); | ||
98 | |||
99 | root = &sidgidtree; | ||
100 | spin_lock(&gidsidlock); | ||
101 | shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del); | ||
102 | spin_unlock(&gidsidlock); | ||
103 | |||
104 | return nr_rem; | ||
105 | } | ||
106 | |||
107 | static void | ||
108 | sid_rb_insert(struct rb_root *root, unsigned long cid, | ||
109 | struct cifs_sid_id **psidid, char *typestr) | ||
110 | { | ||
111 | char *strptr; | ||
112 | struct rb_node *node = root->rb_node; | ||
113 | struct rb_node *parent = NULL; | ||
114 | struct rb_node **linkto = &(root->rb_node); | ||
115 | struct cifs_sid_id *lsidid; | ||
116 | |||
117 | while (node) { | ||
118 | lsidid = rb_entry(node, struct cifs_sid_id, rbnode); | ||
119 | parent = node; | ||
120 | if (cid > lsidid->id) { | ||
121 | linkto = &(node->rb_left); | ||
122 | node = node->rb_left; | ||
123 | } | ||
124 | if (cid < lsidid->id) { | ||
125 | linkto = &(node->rb_right); | ||
126 | node = node->rb_right; | ||
127 | } | ||
128 | } | ||
129 | |||
130 | (*psidid)->id = cid; | ||
131 | (*psidid)->time = jiffies - (SID_MAP_RETRY + 1); | ||
132 | (*psidid)->refcount = 0; | ||
133 | |||
134 | sprintf((*psidid)->sidstr, "%s", typestr); | ||
135 | strptr = (*psidid)->sidstr + strlen((*psidid)->sidstr); | ||
136 | sprintf(strptr, "%ld", cid); | ||
137 | |||
138 | clear_bit(SID_ID_PENDING, &(*psidid)->state); | ||
139 | clear_bit(SID_ID_MAPPED, &(*psidid)->state); | ||
140 | |||
141 | rb_link_node(&(*psidid)->rbnode, parent, linkto); | ||
142 | rb_insert_color(&(*psidid)->rbnode, root); | ||
143 | } | ||
144 | |||
145 | static struct cifs_sid_id * | ||
146 | sid_rb_search(struct rb_root *root, unsigned long cid) | ||
147 | { | ||
148 | struct rb_node *node = root->rb_node; | ||
149 | struct cifs_sid_id *lsidid; | ||
150 | |||
151 | while (node) { | ||
152 | lsidid = rb_entry(node, struct cifs_sid_id, rbnode); | ||
153 | if (cid > lsidid->id) | ||
154 | node = node->rb_left; | ||
155 | else if (cid < lsidid->id) | ||
156 | node = node->rb_right; | ||
157 | else /* node found */ | ||
158 | return lsidid; | ||
159 | } | ||
160 | |||
161 | return NULL; | ||
162 | } | ||
163 | |||
164 | static struct shrinker cifs_shrinker = { | ||
165 | .shrink = cifs_idmap_shrinker, | ||
166 | .seeks = DEFAULT_SEEKS, | ||
167 | }; | ||
168 | 46 | ||
169 | static int | 47 | static int |
170 | cifs_idmap_key_instantiate(struct key *key, struct key_preparsed_payload *prep) | 48 | cifs_idmap_key_instantiate(struct key *key, struct key_preparsed_payload *prep) |
171 | { | 49 | { |
172 | char *payload; | 50 | char *payload; |
173 | 51 | ||
52 | /* | ||
53 | * If the payload is less than or equal to the size of a pointer, then | ||
54 | * an allocation here is wasteful. Just copy the data directly to the | ||
55 | * payload.value union member instead. | ||
56 | * | ||
57 | * With this however, you must check the datalen before trying to | ||
58 | * dereference payload.data! | ||
59 | */ | ||
60 | if (prep->datalen <= sizeof(key->payload)) { | ||
61 | key->payload.value = 0; | ||
62 | memcpy(&key->payload.value, prep->data, prep->datalen); | ||
63 | key->datalen = prep->datalen; | ||
64 | return 0; | ||
65 | } | ||
174 | payload = kmalloc(prep->datalen, GFP_KERNEL); | 66 | payload = kmalloc(prep->datalen, GFP_KERNEL); |
175 | if (!payload) | 67 | if (!payload) |
176 | return -ENOMEM; | 68 | return -ENOMEM; |
@@ -184,10 +76,11 @@ cifs_idmap_key_instantiate(struct key *key, struct key_preparsed_payload *prep) | |||
184 | static inline void | 76 | static inline void |
185 | cifs_idmap_key_destroy(struct key *key) | 77 | cifs_idmap_key_destroy(struct key *key) |
186 | { | 78 | { |
187 | kfree(key->payload.data); | 79 | if (key->datalen > sizeof(key->payload)) |
80 | kfree(key->payload.data); | ||
188 | } | 81 | } |
189 | 82 | ||
190 | struct key_type cifs_idmap_key_type = { | 83 | static struct key_type cifs_idmap_key_type = { |
191 | .name = "cifs.idmap", | 84 | .name = "cifs.idmap", |
192 | .instantiate = cifs_idmap_key_instantiate, | 85 | .instantiate = cifs_idmap_key_instantiate, |
193 | .destroy = cifs_idmap_key_destroy, | 86 | .destroy = cifs_idmap_key_destroy, |
@@ -195,221 +88,174 @@ struct key_type cifs_idmap_key_type = { | |||
195 | .match = user_match, | 88 | .match = user_match, |
196 | }; | 89 | }; |
197 | 90 | ||
198 | static void | 91 | static char * |
199 | sid_to_str(struct cifs_sid *sidptr, char *sidstr) | 92 | sid_to_key_str(struct cifs_sid *sidptr, unsigned int type) |
200 | { | 93 | { |
201 | int i; | 94 | int i, len; |
202 | unsigned long saval; | 95 | unsigned int saval; |
203 | char *strptr; | 96 | char *sidstr, *strptr; |
97 | unsigned long long id_auth_val; | ||
98 | |||
99 | /* 3 bytes for prefix */ | ||
100 | sidstr = kmalloc(3 + SID_STRING_BASE_SIZE + | ||
101 | (SID_STRING_SUBAUTH_SIZE * sidptr->num_subauth), | ||
102 | GFP_KERNEL); | ||
103 | if (!sidstr) | ||
104 | return sidstr; | ||
204 | 105 | ||
205 | strptr = sidstr; | 106 | strptr = sidstr; |
107 | len = sprintf(strptr, "%cs:S-%hhu", type == SIDOWNER ? 'o' : 'g', | ||
108 | sidptr->revision); | ||
109 | strptr += len; | ||
110 | |||
111 | /* The authority field is a single 48-bit number */ | ||
112 | id_auth_val = (unsigned long long)sidptr->authority[5]; | ||
113 | id_auth_val |= (unsigned long long)sidptr->authority[4] << 8; | ||
114 | id_auth_val |= (unsigned long long)sidptr->authority[3] << 16; | ||
115 | id_auth_val |= (unsigned long long)sidptr->authority[2] << 24; | ||
116 | id_auth_val |= (unsigned long long)sidptr->authority[1] << 32; | ||
117 | id_auth_val |= (unsigned long long)sidptr->authority[0] << 48; | ||
206 | 118 | ||
207 | sprintf(strptr, "%s", "S"); | 119 | /* |
208 | strptr = sidstr + strlen(sidstr); | 120 | * MS-DTYP states that if the authority is >= 2^32, then it should be |
209 | 121 | * expressed as a hex value. | |
210 | sprintf(strptr, "-%d", sidptr->revision); | 122 | */ |
211 | strptr = sidstr + strlen(sidstr); | 123 | if (id_auth_val <= UINT_MAX) |
124 | len = sprintf(strptr, "-%llu", id_auth_val); | ||
125 | else | ||
126 | len = sprintf(strptr, "-0x%llx", id_auth_val); | ||
212 | 127 | ||
213 | for (i = 0; i < 6; ++i) { | 128 | strptr += len; |
214 | if (sidptr->authority[i]) { | ||
215 | sprintf(strptr, "-%d", sidptr->authority[i]); | ||
216 | strptr = sidstr + strlen(sidstr); | ||
217 | } | ||
218 | } | ||
219 | 129 | ||
220 | for (i = 0; i < sidptr->num_subauth; ++i) { | 130 | for (i = 0; i < sidptr->num_subauth; ++i) { |
221 | saval = le32_to_cpu(sidptr->sub_auth[i]); | 131 | saval = le32_to_cpu(sidptr->sub_auth[i]); |
222 | sprintf(strptr, "-%ld", saval); | 132 | len = sprintf(strptr, "-%u", saval); |
223 | strptr = sidstr + strlen(sidstr); | 133 | strptr += len; |
224 | } | 134 | } |
225 | } | ||
226 | 135 | ||
227 | static void | 136 | return sidstr; |
228 | cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src) | ||
229 | { | ||
230 | memcpy(dst, src, sizeof(*dst)); | ||
231 | dst->num_subauth = min_t(u8, src->num_subauth, NUM_SUBAUTHS); | ||
232 | } | 137 | } |
233 | 138 | ||
234 | static void | 139 | /* |
235 | id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr, | 140 | * if the two SIDs (roughly equivalent to a UUID for a user or group) are |
236 | struct cifs_sid_id **psidid, char *typestr) | 141 | * the same returns zero, if they do not match returns non-zero. |
142 | */ | ||
143 | static int | ||
144 | compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid) | ||
237 | { | 145 | { |
238 | int rc; | 146 | int i; |
239 | char *strptr; | 147 | int num_subauth, num_sat, num_saw; |
240 | struct rb_node *node = root->rb_node; | ||
241 | struct rb_node *parent = NULL; | ||
242 | struct rb_node **linkto = &(root->rb_node); | ||
243 | struct cifs_sid_id *lsidid; | ||
244 | |||
245 | while (node) { | ||
246 | lsidid = rb_entry(node, struct cifs_sid_id, rbnode); | ||
247 | parent = node; | ||
248 | rc = compare_sids(sidptr, &((lsidid)->sid)); | ||
249 | if (rc > 0) { | ||
250 | linkto = &(node->rb_left); | ||
251 | node = node->rb_left; | ||
252 | } else if (rc < 0) { | ||
253 | linkto = &(node->rb_right); | ||
254 | node = node->rb_right; | ||
255 | } | ||
256 | } | ||
257 | |||
258 | cifs_copy_sid(&(*psidid)->sid, sidptr); | ||
259 | (*psidid)->time = jiffies - (SID_MAP_RETRY + 1); | ||
260 | (*psidid)->refcount = 0; | ||
261 | 148 | ||
262 | sprintf((*psidid)->sidstr, "%s", typestr); | 149 | if ((!ctsid) || (!cwsid)) |
263 | strptr = (*psidid)->sidstr + strlen((*psidid)->sidstr); | 150 | return 1; |
264 | sid_to_str(&(*psidid)->sid, strptr); | ||
265 | 151 | ||
266 | clear_bit(SID_ID_PENDING, &(*psidid)->state); | 152 | /* compare the revision */ |
267 | clear_bit(SID_ID_MAPPED, &(*psidid)->state); | 153 | if (ctsid->revision != cwsid->revision) { |
154 | if (ctsid->revision > cwsid->revision) | ||
155 | return 1; | ||
156 | else | ||
157 | return -1; | ||
158 | } | ||
268 | 159 | ||
269 | rb_link_node(&(*psidid)->rbnode, parent, linkto); | 160 | /* compare all of the six auth values */ |
270 | rb_insert_color(&(*psidid)->rbnode, root); | 161 | for (i = 0; i < NUM_AUTHS; ++i) { |
271 | } | 162 | if (ctsid->authority[i] != cwsid->authority[i]) { |
163 | if (ctsid->authority[i] > cwsid->authority[i]) | ||
164 | return 1; | ||
165 | else | ||
166 | return -1; | ||
167 | } | ||
168 | } | ||
272 | 169 | ||
273 | static struct cifs_sid_id * | 170 | /* compare all of the subauth values if any */ |
274 | id_rb_search(struct rb_root *root, struct cifs_sid *sidptr) | 171 | num_sat = ctsid->num_subauth; |
275 | { | 172 | num_saw = cwsid->num_subauth; |
276 | int rc; | 173 | num_subauth = num_sat < num_saw ? num_sat : num_saw; |
277 | struct rb_node *node = root->rb_node; | 174 | if (num_subauth) { |
278 | struct cifs_sid_id *lsidid; | 175 | for (i = 0; i < num_subauth; ++i) { |
279 | 176 | if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) { | |
280 | while (node) { | 177 | if (le32_to_cpu(ctsid->sub_auth[i]) > |
281 | lsidid = rb_entry(node, struct cifs_sid_id, rbnode); | 178 | le32_to_cpu(cwsid->sub_auth[i])) |
282 | rc = compare_sids(sidptr, &((lsidid)->sid)); | 179 | return 1; |
283 | if (rc > 0) { | 180 | else |
284 | node = node->rb_left; | 181 | return -1; |
285 | } else if (rc < 0) { | 182 | } |
286 | node = node->rb_right; | 183 | } |
287 | } else /* node found */ | ||
288 | return lsidid; | ||
289 | } | 184 | } |
290 | 185 | ||
291 | return NULL; | 186 | return 0; /* sids compare/match */ |
292 | } | 187 | } |
293 | 188 | ||
294 | static int | 189 | static void |
295 | sidid_pending_wait(void *unused) | 190 | cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src) |
296 | { | 191 | { |
297 | schedule(); | 192 | int i; |
298 | return signal_pending(current) ? -ERESTARTSYS : 0; | 193 | |
194 | dst->revision = src->revision; | ||
195 | dst->num_subauth = min_t(u8, src->num_subauth, SID_MAX_SUB_AUTHORITIES); | ||
196 | for (i = 0; i < NUM_AUTHS; ++i) | ||
197 | dst->authority[i] = src->authority[i]; | ||
198 | for (i = 0; i < dst->num_subauth; ++i) | ||
199 | dst->sub_auth[i] = src->sub_auth[i]; | ||
299 | } | 200 | } |
300 | 201 | ||
301 | static int | 202 | static int |
302 | id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid) | 203 | id_to_sid(unsigned int cid, uint sidtype, struct cifs_sid *ssid) |
303 | { | 204 | { |
304 | int rc = 0; | 205 | int rc; |
305 | struct key *sidkey; | 206 | struct key *sidkey; |
207 | struct cifs_sid *ksid; | ||
208 | unsigned int ksid_size; | ||
209 | char desc[3 + 10 + 1]; /* 3 byte prefix + 10 bytes for value + NULL */ | ||
306 | const struct cred *saved_cred; | 210 | const struct cred *saved_cred; |
307 | struct cifs_sid *lsid; | ||
308 | struct cifs_sid_id *psidid, *npsidid; | ||
309 | struct rb_root *cidtree; | ||
310 | spinlock_t *cidlock; | ||
311 | |||
312 | if (sidtype == SIDOWNER) { | ||
313 | cidlock = &siduidlock; | ||
314 | cidtree = &uidtree; | ||
315 | } else if (sidtype == SIDGROUP) { | ||
316 | cidlock = &sidgidlock; | ||
317 | cidtree = &gidtree; | ||
318 | } else | ||
319 | return -EINVAL; | ||
320 | |||
321 | spin_lock(cidlock); | ||
322 | psidid = sid_rb_search(cidtree, cid); | ||
323 | 211 | ||
324 | if (!psidid) { /* node does not exist, allocate one & attempt adding */ | 212 | rc = snprintf(desc, sizeof(desc), "%ci:%u", |
325 | spin_unlock(cidlock); | 213 | sidtype == SIDOWNER ? 'o' : 'g', cid); |
326 | npsidid = kzalloc(sizeof(struct cifs_sid_id), GFP_KERNEL); | 214 | if (rc >= sizeof(desc)) |
327 | if (!npsidid) | 215 | return -EINVAL; |
328 | return -ENOMEM; | ||
329 | |||
330 | npsidid->sidstr = kmalloc(SIDLEN, GFP_KERNEL); | ||
331 | if (!npsidid->sidstr) { | ||
332 | kfree(npsidid); | ||
333 | return -ENOMEM; | ||
334 | } | ||
335 | 216 | ||
336 | spin_lock(cidlock); | 217 | rc = 0; |
337 | psidid = sid_rb_search(cidtree, cid); | 218 | saved_cred = override_creds(root_cred); |
338 | if (psidid) { /* node happened to get inserted meanwhile */ | 219 | sidkey = request_key(&cifs_idmap_key_type, desc, ""); |
339 | ++psidid->refcount; | 220 | if (IS_ERR(sidkey)) { |
340 | spin_unlock(cidlock); | 221 | rc = -EINVAL; |
341 | kfree(npsidid->sidstr); | 222 | cFYI(1, "%s: Can't map %cid %u to a SID", __func__, |
342 | kfree(npsidid); | 223 | sidtype == SIDOWNER ? 'u' : 'g', cid); |
343 | } else { | 224 | goto out_revert_creds; |
344 | psidid = npsidid; | 225 | } else if (sidkey->datalen < CIFS_SID_BASE_SIZE) { |
345 | sid_rb_insert(cidtree, cid, &psidid, | 226 | rc = -EIO; |
346 | sidtype == SIDOWNER ? "oi:" : "gi:"); | 227 | cFYI(1, "%s: Downcall contained malformed key " |
347 | ++psidid->refcount; | 228 | "(datalen=%hu)", __func__, sidkey->datalen); |
348 | spin_unlock(cidlock); | 229 | goto invalidate_key; |
349 | } | ||
350 | } else { | ||
351 | ++psidid->refcount; | ||
352 | spin_unlock(cidlock); | ||
353 | } | 230 | } |
354 | 231 | ||
355 | /* | 232 | /* |
356 | * If we are here, it is safe to access psidid and its fields | 233 | * A sid is usually too large to be embedded in payload.value, but if |
357 | * since a reference was taken earlier while holding the spinlock. | 234 | * there are no subauthorities and the host has 8-byte pointers, then |
358 | * A reference on the node is put without holding the spinlock | 235 | * it could be. |
359 | * and it is OK to do so in this case, shrinker will not erase | ||
360 | * this node until all references are put and we do not access | ||
361 | * any fields of the node after a reference is put . | ||
362 | */ | 236 | */ |
363 | if (test_bit(SID_ID_MAPPED, &psidid->state)) { | 237 | ksid = sidkey->datalen <= sizeof(sidkey->payload) ? |
364 | cifs_copy_sid(ssid, &psidid->sid); | 238 | (struct cifs_sid *)&sidkey->payload.value : |
365 | psidid->time = jiffies; /* update ts for accessing */ | 239 | (struct cifs_sid *)sidkey->payload.data; |
366 | goto id_sid_out; | 240 | |
241 | ksid_size = CIFS_SID_BASE_SIZE + (ksid->num_subauth * sizeof(__le32)); | ||
242 | if (ksid_size > sidkey->datalen) { | ||
243 | rc = -EIO; | ||
244 | cFYI(1, "%s: Downcall contained malformed key (datalen=%hu, " | ||
245 | "ksid_size=%u)", __func__, sidkey->datalen, ksid_size); | ||
246 | goto invalidate_key; | ||
367 | } | 247 | } |
368 | 248 | ||
369 | if (time_after(psidid->time + SID_MAP_RETRY, jiffies)) { | 249 | cifs_copy_sid(ssid, ksid); |
370 | rc = -EINVAL; | 250 | out_key_put: |
371 | goto id_sid_out; | 251 | key_put(sidkey); |
372 | } | 252 | out_revert_creds: |
373 | 253 | revert_creds(saved_cred); | |
374 | if (!test_and_set_bit(SID_ID_PENDING, &psidid->state)) { | ||
375 | saved_cred = override_creds(root_cred); | ||
376 | sidkey = request_key(&cifs_idmap_key_type, psidid->sidstr, ""); | ||
377 | if (IS_ERR(sidkey)) { | ||
378 | rc = -EINVAL; | ||
379 | cFYI(1, "%s: Can't map and id to a SID", __func__); | ||
380 | } else if (sidkey->datalen < sizeof(struct cifs_sid)) { | ||
381 | rc = -EIO; | ||
382 | cFYI(1, "%s: Downcall contained malformed key " | ||
383 | "(datalen=%hu)", __func__, sidkey->datalen); | ||
384 | } else { | ||
385 | lsid = (struct cifs_sid *)sidkey->payload.data; | ||
386 | cifs_copy_sid(&psidid->sid, lsid); | ||
387 | cifs_copy_sid(ssid, &psidid->sid); | ||
388 | set_bit(SID_ID_MAPPED, &psidid->state); | ||
389 | key_put(sidkey); | ||
390 | kfree(psidid->sidstr); | ||
391 | } | ||
392 | psidid->time = jiffies; /* update ts for accessing */ | ||
393 | revert_creds(saved_cred); | ||
394 | clear_bit(SID_ID_PENDING, &psidid->state); | ||
395 | wake_up_bit(&psidid->state, SID_ID_PENDING); | ||
396 | } else { | ||
397 | rc = wait_on_bit(&psidid->state, SID_ID_PENDING, | ||
398 | sidid_pending_wait, TASK_INTERRUPTIBLE); | ||
399 | if (rc) { | ||
400 | cFYI(1, "%s: sidid_pending_wait interrupted %d", | ||
401 | __func__, rc); | ||
402 | --psidid->refcount; | ||
403 | return rc; | ||
404 | } | ||
405 | if (test_bit(SID_ID_MAPPED, &psidid->state)) | ||
406 | cifs_copy_sid(ssid, &psidid->sid); | ||
407 | else | ||
408 | rc = -EINVAL; | ||
409 | } | ||
410 | id_sid_out: | ||
411 | --psidid->refcount; | ||
412 | return rc; | 254 | return rc; |
255 | |||
256 | invalidate_key: | ||
257 | key_invalidate(sidkey); | ||
258 | goto out_key_put; | ||
413 | } | 259 | } |
414 | 260 | ||
415 | static int | 261 | static int |
@@ -417,111 +263,67 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid, | |||
417 | struct cifs_fattr *fattr, uint sidtype) | 263 | struct cifs_fattr *fattr, uint sidtype) |
418 | { | 264 | { |
419 | int rc; | 265 | int rc; |
420 | unsigned long cid; | 266 | struct key *sidkey; |
421 | struct key *idkey; | 267 | char *sidstr; |
422 | const struct cred *saved_cred; | 268 | const struct cred *saved_cred; |
423 | struct cifs_sid_id *psidid, *npsidid; | 269 | uid_t fuid = cifs_sb->mnt_uid; |
424 | struct rb_root *cidtree; | 270 | gid_t fgid = cifs_sb->mnt_gid; |
425 | spinlock_t *cidlock; | ||
426 | |||
427 | if (sidtype == SIDOWNER) { | ||
428 | cid = cifs_sb->mnt_uid; /* default uid, in case upcall fails */ | ||
429 | cidlock = &siduidlock; | ||
430 | cidtree = &uidtree; | ||
431 | } else if (sidtype == SIDGROUP) { | ||
432 | cid = cifs_sb->mnt_gid; /* default gid, in case upcall fails */ | ||
433 | cidlock = &sidgidlock; | ||
434 | cidtree = &gidtree; | ||
435 | } else | ||
436 | return -ENOENT; | ||
437 | |||
438 | spin_lock(cidlock); | ||
439 | psidid = id_rb_search(cidtree, psid); | ||
440 | |||
441 | if (!psidid) { /* node does not exist, allocate one & attempt adding */ | ||
442 | spin_unlock(cidlock); | ||
443 | npsidid = kzalloc(sizeof(struct cifs_sid_id), GFP_KERNEL); | ||
444 | if (!npsidid) | ||
445 | return -ENOMEM; | ||
446 | |||
447 | npsidid->sidstr = kmalloc(SIDLEN, GFP_KERNEL); | ||
448 | if (!npsidid->sidstr) { | ||
449 | kfree(npsidid); | ||
450 | return -ENOMEM; | ||
451 | } | ||
452 | |||
453 | spin_lock(cidlock); | ||
454 | psidid = id_rb_search(cidtree, psid); | ||
455 | if (psidid) { /* node happened to get inserted meanwhile */ | ||
456 | ++psidid->refcount; | ||
457 | spin_unlock(cidlock); | ||
458 | kfree(npsidid->sidstr); | ||
459 | kfree(npsidid); | ||
460 | } else { | ||
461 | psidid = npsidid; | ||
462 | id_rb_insert(cidtree, psid, &psidid, | ||
463 | sidtype == SIDOWNER ? "os:" : "gs:"); | ||
464 | ++psidid->refcount; | ||
465 | spin_unlock(cidlock); | ||
466 | } | ||
467 | } else { | ||
468 | ++psidid->refcount; | ||
469 | spin_unlock(cidlock); | ||
470 | } | ||
471 | 271 | ||
472 | /* | 272 | /* |
473 | * If we are here, it is safe to access psidid and its fields | 273 | * If we have too many subauthorities, then something is really wrong. |
474 | * since a reference was taken earlier while holding the spinlock. | 274 | * Just return an error. |
475 | * A reference on the node is put without holding the spinlock | ||
476 | * and it is OK to do so in this case, shrinker will not erase | ||
477 | * this node until all references are put and we do not access | ||
478 | * any fields of the node after a reference is put . | ||
479 | */ | 275 | */ |
480 | if (test_bit(SID_ID_MAPPED, &psidid->state)) { | 276 | if (unlikely(psid->num_subauth > SID_MAX_SUB_AUTHORITIES)) { |
481 | cid = psidid->id; | 277 | cFYI(1, "%s: %u subauthorities is too many!", __func__, |
482 | psidid->time = jiffies; /* update ts for accessing */ | 278 | psid->num_subauth); |
483 | goto sid_to_id_out; | 279 | return -EIO; |
484 | } | 280 | } |
485 | 281 | ||
486 | if (time_after(psidid->time + SID_MAP_RETRY, jiffies)) | 282 | sidstr = sid_to_key_str(psid, sidtype); |
487 | goto sid_to_id_out; | 283 | if (!sidstr) |
488 | 284 | return -ENOMEM; | |
489 | if (!test_and_set_bit(SID_ID_PENDING, &psidid->state)) { | 285 | |
490 | saved_cred = override_creds(root_cred); | 286 | saved_cred = override_creds(root_cred); |
491 | idkey = request_key(&cifs_idmap_key_type, psidid->sidstr, ""); | 287 | sidkey = request_key(&cifs_idmap_key_type, sidstr, ""); |
492 | if (IS_ERR(idkey)) | 288 | if (IS_ERR(sidkey)) { |
493 | cFYI(1, "%s: Can't map SID to an id", __func__); | 289 | rc = -EINVAL; |
494 | else { | 290 | cFYI(1, "%s: Can't map SID %s to a %cid", __func__, sidstr, |
495 | cid = *(unsigned long *)idkey->payload.value; | 291 | sidtype == SIDOWNER ? 'u' : 'g'); |
496 | psidid->id = cid; | 292 | goto out_revert_creds; |
497 | set_bit(SID_ID_MAPPED, &psidid->state); | 293 | } |
498 | key_put(idkey); | 294 | |
499 | kfree(psidid->sidstr); | 295 | /* |
500 | } | 296 | * FIXME: Here we assume that uid_t and gid_t are same size. It's |
501 | revert_creds(saved_cred); | 297 | * probably a safe assumption but might be better to check based on |
502 | psidid->time = jiffies; /* update ts for accessing */ | 298 | * sidtype. |
503 | clear_bit(SID_ID_PENDING, &psidid->state); | 299 | */ |
504 | wake_up_bit(&psidid->state, SID_ID_PENDING); | 300 | if (sidkey->datalen != sizeof(uid_t)) { |
505 | } else { | 301 | rc = -EIO; |
506 | rc = wait_on_bit(&psidid->state, SID_ID_PENDING, | 302 | cFYI(1, "%s: Downcall contained malformed key " |
507 | sidid_pending_wait, TASK_INTERRUPTIBLE); | 303 | "(datalen=%hu)", __func__, sidkey->datalen); |
508 | if (rc) { | 304 | key_invalidate(sidkey); |
509 | cFYI(1, "%s: sidid_pending_wait interrupted %d", | 305 | goto out_key_put; |
510 | __func__, rc); | ||
511 | --psidid->refcount; /* decremented without spinlock */ | ||
512 | return rc; | ||
513 | } | ||
514 | if (test_bit(SID_ID_MAPPED, &psidid->state)) | ||
515 | cid = psidid->id; | ||
516 | } | 306 | } |
517 | 307 | ||
518 | sid_to_id_out: | ||
519 | --psidid->refcount; /* decremented without spinlock */ | ||
520 | if (sidtype == SIDOWNER) | 308 | if (sidtype == SIDOWNER) |
521 | fattr->cf_uid = cid; | 309 | memcpy(&fuid, &sidkey->payload.value, sizeof(uid_t)); |
522 | else | 310 | else |
523 | fattr->cf_gid = cid; | 311 | memcpy(&fgid, &sidkey->payload.value, sizeof(gid_t)); |
312 | |||
313 | out_key_put: | ||
314 | key_put(sidkey); | ||
315 | out_revert_creds: | ||
316 | revert_creds(saved_cred); | ||
317 | kfree(sidstr); | ||
524 | 318 | ||
319 | /* | ||
320 | * Note that we return 0 here unconditionally. If the mapping | ||
321 | * fails then we just fall back to using the mnt_uid/mnt_gid. | ||
322 | */ | ||
323 | if (sidtype == SIDOWNER) | ||
324 | fattr->cf_uid = fuid; | ||
325 | else | ||
326 | fattr->cf_gid = fgid; | ||
525 | return 0; | 327 | return 0; |
526 | } | 328 | } |
527 | 329 | ||
@@ -568,17 +370,6 @@ init_cifs_idmap(void) | |||
568 | cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; | 370 | cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; |
569 | root_cred = cred; | 371 | root_cred = cred; |
570 | 372 | ||
571 | spin_lock_init(&siduidlock); | ||
572 | uidtree = RB_ROOT; | ||
573 | spin_lock_init(&sidgidlock); | ||
574 | gidtree = RB_ROOT; | ||
575 | |||
576 | spin_lock_init(&uidsidlock); | ||
577 | siduidtree = RB_ROOT; | ||
578 | spin_lock_init(&gidsidlock); | ||
579 | sidgidtree = RB_ROOT; | ||
580 | register_shrinker(&cifs_shrinker); | ||
581 | |||
582 | cFYI(1, "cifs idmap keyring: %d", key_serial(keyring)); | 373 | cFYI(1, "cifs idmap keyring: %d", key_serial(keyring)); |
583 | return 0; | 374 | return 0; |
584 | 375 | ||
@@ -595,89 +386,9 @@ exit_cifs_idmap(void) | |||
595 | key_revoke(root_cred->thread_keyring); | 386 | key_revoke(root_cred->thread_keyring); |
596 | unregister_key_type(&cifs_idmap_key_type); | 387 | unregister_key_type(&cifs_idmap_key_type); |
597 | put_cred(root_cred); | 388 | put_cred(root_cred); |
598 | unregister_shrinker(&cifs_shrinker); | ||
599 | cFYI(1, "Unregistered %s key type", cifs_idmap_key_type.name); | 389 | cFYI(1, "Unregistered %s key type", cifs_idmap_key_type.name); |
600 | } | 390 | } |
601 | 391 | ||
602 | void | ||
603 | cifs_destroy_idmaptrees(void) | ||
604 | { | ||
605 | struct rb_root *root; | ||
606 | struct rb_node *node; | ||
607 | |||
608 | root = &uidtree; | ||
609 | spin_lock(&siduidlock); | ||
610 | while ((node = rb_first(root))) | ||
611 | rb_erase(node, root); | ||
612 | spin_unlock(&siduidlock); | ||
613 | |||
614 | root = &gidtree; | ||
615 | spin_lock(&sidgidlock); | ||
616 | while ((node = rb_first(root))) | ||
617 | rb_erase(node, root); | ||
618 | spin_unlock(&sidgidlock); | ||
619 | |||
620 | root = &siduidtree; | ||
621 | spin_lock(&uidsidlock); | ||
622 | while ((node = rb_first(root))) | ||
623 | rb_erase(node, root); | ||
624 | spin_unlock(&uidsidlock); | ||
625 | |||
626 | root = &sidgidtree; | ||
627 | spin_lock(&gidsidlock); | ||
628 | while ((node = rb_first(root))) | ||
629 | rb_erase(node, root); | ||
630 | spin_unlock(&gidsidlock); | ||
631 | } | ||
632 | |||
633 | /* if the two SIDs (roughly equivalent to a UUID for a user or group) are | ||
634 | the same returns 1, if they do not match returns 0 */ | ||
635 | int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid) | ||
636 | { | ||
637 | int i; | ||
638 | int num_subauth, num_sat, num_saw; | ||
639 | |||
640 | if ((!ctsid) || (!cwsid)) | ||
641 | return 1; | ||
642 | |||
643 | /* compare the revision */ | ||
644 | if (ctsid->revision != cwsid->revision) { | ||
645 | if (ctsid->revision > cwsid->revision) | ||
646 | return 1; | ||
647 | else | ||
648 | return -1; | ||
649 | } | ||
650 | |||
651 | /* compare all of the six auth values */ | ||
652 | for (i = 0; i < 6; ++i) { | ||
653 | if (ctsid->authority[i] != cwsid->authority[i]) { | ||
654 | if (ctsid->authority[i] > cwsid->authority[i]) | ||
655 | return 1; | ||
656 | else | ||
657 | return -1; | ||
658 | } | ||
659 | } | ||
660 | |||
661 | /* compare all of the subauth values if any */ | ||
662 | num_sat = ctsid->num_subauth; | ||
663 | num_saw = cwsid->num_subauth; | ||
664 | num_subauth = num_sat < num_saw ? num_sat : num_saw; | ||
665 | if (num_subauth) { | ||
666 | for (i = 0; i < num_subauth; ++i) { | ||
667 | if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) { | ||
668 | if (le32_to_cpu(ctsid->sub_auth[i]) > | ||
669 | le32_to_cpu(cwsid->sub_auth[i])) | ||
670 | return 1; | ||
671 | else | ||
672 | return -1; | ||
673 | } | ||
674 | } | ||
675 | } | ||
676 | |||
677 | return 0; /* sids compare/match */ | ||
678 | } | ||
679 | |||
680 | |||
681 | /* copy ntsd, owner sid, and group sid from a security descriptor to another */ | 392 | /* copy ntsd, owner sid, and group sid from a security descriptor to another */ |
682 | static void copy_sec_desc(const struct cifs_ntsd *pntsd, | 393 | static void copy_sec_desc(const struct cifs_ntsd *pntsd, |
683 | struct cifs_ntsd *pnntsd, __u32 sidsoffset) | 394 | struct cifs_ntsd *pnntsd, __u32 sidsoffset) |
@@ -811,7 +522,7 @@ static __u16 fill_ace_for_sid(struct cifs_ace *pntace, | |||
811 | 522 | ||
812 | pntace->sid.revision = psid->revision; | 523 | pntace->sid.revision = psid->revision; |
813 | pntace->sid.num_subauth = psid->num_subauth; | 524 | pntace->sid.num_subauth = psid->num_subauth; |
814 | for (i = 0; i < 6; i++) | 525 | for (i = 0; i < NUM_AUTHS; i++) |
815 | pntace->sid.authority[i] = psid->authority[i]; | 526 | pntace->sid.authority[i] = psid->authority[i]; |
816 | for (i = 0; i < psid->num_subauth; i++) | 527 | for (i = 0; i < psid->num_subauth; i++) |
817 | pntace->sid.sub_auth[i] = psid->sub_auth[i]; | 528 | pntace->sid.sub_auth[i] = psid->sub_auth[i]; |
@@ -987,8 +698,8 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl) | |||
987 | return -EINVAL; | 698 | return -EINVAL; |
988 | } | 699 | } |
989 | 700 | ||
990 | if (psid->num_subauth) { | ||
991 | #ifdef CONFIG_CIFS_DEBUG2 | 701 | #ifdef CONFIG_CIFS_DEBUG2 |
702 | if (psid->num_subauth) { | ||
992 | int i; | 703 | int i; |
993 | cFYI(1, "SID revision %d num_auth %d", | 704 | cFYI(1, "SID revision %d num_auth %d", |
994 | psid->revision, psid->num_subauth); | 705 | psid->revision, psid->num_subauth); |
@@ -1002,8 +713,8 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl) | |||
1002 | num auths and therefore go off the end */ | 713 | num auths and therefore go off the end */ |
1003 | cFYI(1, "RID 0x%x", | 714 | cFYI(1, "RID 0x%x", |
1004 | le32_to_cpu(psid->sub_auth[psid->num_subauth-1])); | 715 | le32_to_cpu(psid->sub_auth[psid->num_subauth-1])); |
1005 | #endif | ||
1006 | } | 716 | } |
717 | #endif | ||
1007 | 718 | ||
1008 | return 0; | 719 | return 0; |
1009 | } | 720 | } |
@@ -1307,42 +1018,39 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, | |||
1307 | 1018 | ||
1308 | /* Get the security descriptor */ | 1019 | /* Get the security descriptor */ |
1309 | pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen); | 1020 | pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen); |
1310 | |||
1311 | /* Add three ACEs for owner, group, everyone getting rid of | ||
1312 | other ACEs as chmod disables ACEs and set the security descriptor */ | ||
1313 | |||
1314 | if (IS_ERR(pntsd)) { | 1021 | if (IS_ERR(pntsd)) { |
1315 | rc = PTR_ERR(pntsd); | 1022 | rc = PTR_ERR(pntsd); |
1316 | cERROR(1, "%s: error %d getting sec desc", __func__, rc); | 1023 | cERROR(1, "%s: error %d getting sec desc", __func__, rc); |
1317 | } else { | 1024 | goto out; |
1318 | /* allocate memory for the smb header, | 1025 | } |
1319 | set security descriptor request security descriptor | ||
1320 | parameters, and secuirty descriptor itself */ | ||
1321 | |||
1322 | secdesclen = secdesclen < DEFSECDESCLEN ? | ||
1323 | DEFSECDESCLEN : secdesclen; | ||
1324 | pnntsd = kmalloc(secdesclen, GFP_KERNEL); | ||
1325 | if (!pnntsd) { | ||
1326 | cERROR(1, "Unable to allocate security descriptor"); | ||
1327 | kfree(pntsd); | ||
1328 | return -ENOMEM; | ||
1329 | } | ||
1330 | 1026 | ||
1331 | rc = build_sec_desc(pntsd, pnntsd, secdesclen, nmode, uid, gid, | 1027 | /* |
1332 | &aclflag); | 1028 | * Add three ACEs for owner, group, everyone getting rid of other ACEs |
1029 | * as chmod disables ACEs and set the security descriptor. Allocate | ||
1030 | * memory for the smb header, set security descriptor request security | ||
1031 | * descriptor parameters, and secuirty descriptor itself | ||
1032 | */ | ||
1033 | secdesclen = max_t(u32, secdesclen, DEFAULT_SEC_DESC_LEN); | ||
1034 | pnntsd = kmalloc(secdesclen, GFP_KERNEL); | ||
1035 | if (!pnntsd) { | ||
1036 | cERROR(1, "Unable to allocate security descriptor"); | ||
1037 | kfree(pntsd); | ||
1038 | return -ENOMEM; | ||
1039 | } | ||
1333 | 1040 | ||
1334 | cFYI(DBG2, "build_sec_desc rc: %d", rc); | 1041 | rc = build_sec_desc(pntsd, pnntsd, secdesclen, nmode, uid, gid, |
1042 | &aclflag); | ||
1335 | 1043 | ||
1336 | if (!rc) { | 1044 | cFYI(DBG2, "build_sec_desc rc: %d", rc); |
1337 | /* Set the security descriptor */ | ||
1338 | rc = set_cifs_acl(pnntsd, secdesclen, inode, | ||
1339 | path, aclflag); | ||
1340 | cFYI(DBG2, "set_cifs_acl rc: %d", rc); | ||
1341 | } | ||
1342 | 1045 | ||
1343 | kfree(pnntsd); | 1046 | if (!rc) { |
1344 | kfree(pntsd); | 1047 | /* Set the security descriptor */ |
1048 | rc = set_cifs_acl(pnntsd, secdesclen, inode, path, aclflag); | ||
1049 | cFYI(DBG2, "set_cifs_acl rc: %d", rc); | ||
1345 | } | 1050 | } |
1346 | 1051 | ||
1052 | kfree(pnntsd); | ||
1053 | kfree(pntsd); | ||
1054 | out: | ||
1347 | return rc; | 1055 | return rc; |
1348 | } | 1056 | } |
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h index 5c902c7ce524..4f3884835267 100644 --- a/fs/cifs/cifsacl.h +++ b/fs/cifs/cifsacl.h | |||
@@ -23,11 +23,8 @@ | |||
23 | #define _CIFSACL_H | 23 | #define _CIFSACL_H |
24 | 24 | ||
25 | 25 | ||
26 | #define NUM_AUTHS 6 /* number of authority fields */ | 26 | #define NUM_AUTHS (6) /* number of authority fields */ |
27 | #define NUM_SUBAUTHS 5 /* number of sub authority fields */ | 27 | #define SID_MAX_SUB_AUTHORITIES (15) /* max number of sub authority fields */ |
28 | #define NUM_WK_SIDS 7 /* number of well known sids */ | ||
29 | #define SIDNAMELENGTH 20 /* long enough for the ones we care about */ | ||
30 | #define DEFSECDESCLEN 192 /* sec desc len contaiting a dacl with three aces */ | ||
31 | 28 | ||
32 | #define READ_BIT 0x4 | 29 | #define READ_BIT 0x4 |
33 | #define WRITE_BIT 0x2 | 30 | #define WRITE_BIT 0x2 |
@@ -41,12 +38,32 @@ | |||
41 | 38 | ||
42 | #define SIDOWNER 1 | 39 | #define SIDOWNER 1 |
43 | #define SIDGROUP 2 | 40 | #define SIDGROUP 2 |
44 | #define SIDLEN 150 /* S- 1 revision- 6 authorities- max 5 sub authorities */ | ||
45 | 41 | ||
46 | #define SID_ID_MAPPED 0 | 42 | /* |
47 | #define SID_ID_PENDING 1 | 43 | * Security Descriptor length containing DACL with 3 ACEs (one each for |
48 | #define SID_MAP_EXPIRE (3600 * HZ) /* map entry expires after one hour */ | 44 | * owner, group and world). |
49 | #define SID_MAP_RETRY (300 * HZ) /* wait 5 minutes for next attempt to map */ | 45 | */ |
46 | #define DEFAULT_SEC_DESC_LEN (sizeof(struct cifs_ntsd) + \ | ||
47 | sizeof(struct cifs_acl) + \ | ||
48 | (sizeof(struct cifs_ace) * 3)) | ||
49 | |||
50 | /* | ||
51 | * Maximum size of a string representation of a SID: | ||
52 | * | ||
53 | * The fields are unsigned values in decimal. So: | ||
54 | * | ||
55 | * u8: max 3 bytes in decimal | ||
56 | * u32: max 10 bytes in decimal | ||
57 | * | ||
58 | * "S-" + 3 bytes for version field + 15 for authority field + NULL terminator | ||
59 | * | ||
60 | * For authority field, max is when all 6 values are non-zero and it must be | ||
61 | * represented in hex. So "-0x" + 12 hex digits. | ||
62 | * | ||
63 | * Add 11 bytes for each subauthority field (10 bytes each + 1 for '-') | ||
64 | */ | ||
65 | #define SID_STRING_BASE_SIZE (2 + 3 + 15 + 1) | ||
66 | #define SID_STRING_SUBAUTH_SIZE (11) /* size of a single subauth string */ | ||
50 | 67 | ||
51 | struct cifs_ntsd { | 68 | struct cifs_ntsd { |
52 | __le16 revision; /* revision level */ | 69 | __le16 revision; /* revision level */ |
@@ -60,10 +77,13 @@ struct cifs_ntsd { | |||
60 | struct cifs_sid { | 77 | struct cifs_sid { |
61 | __u8 revision; /* revision level */ | 78 | __u8 revision; /* revision level */ |
62 | __u8 num_subauth; | 79 | __u8 num_subauth; |
63 | __u8 authority[6]; | 80 | __u8 authority[NUM_AUTHS]; |
64 | __le32 sub_auth[5]; /* sub_auth[num_subauth] */ | 81 | __le32 sub_auth[SID_MAX_SUB_AUTHORITIES]; /* sub_auth[num_subauth] */ |
65 | } __attribute__((packed)); | 82 | } __attribute__((packed)); |
66 | 83 | ||
84 | /* size of a struct cifs_sid, sans sub_auth array */ | ||
85 | #define CIFS_SID_BASE_SIZE (1 + 1 + NUM_AUTHS) | ||
86 | |||
67 | struct cifs_acl { | 87 | struct cifs_acl { |
68 | __le16 revision; /* revision level */ | 88 | __le16 revision; /* revision level */ |
69 | __le16 size; | 89 | __le16 size; |
@@ -78,26 +98,4 @@ struct cifs_ace { | |||
78 | struct cifs_sid sid; /* ie UUID of user or group who gets these perms */ | 98 | struct cifs_sid sid; /* ie UUID of user or group who gets these perms */ |
79 | } __attribute__((packed)); | 99 | } __attribute__((packed)); |
80 | 100 | ||
81 | struct cifs_wksid { | ||
82 | struct cifs_sid cifssid; | ||
83 | char sidname[SIDNAMELENGTH]; | ||
84 | } __attribute__((packed)); | ||
85 | |||
86 | struct cifs_sid_id { | ||
87 | unsigned int refcount; /* increment with spinlock, decrement without */ | ||
88 | unsigned long id; | ||
89 | unsigned long time; | ||
90 | unsigned long state; | ||
91 | char *sidstr; | ||
92 | struct rb_node rbnode; | ||
93 | struct cifs_sid sid; | ||
94 | }; | ||
95 | |||
96 | #ifdef __KERNEL__ | ||
97 | extern struct key_type cifs_idmap_key_type; | ||
98 | extern const struct cred *root_cred; | ||
99 | #endif /* KERNEL */ | ||
100 | |||
101 | extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *); | ||
102 | |||
103 | #endif /* _CIFSACL_H */ | 101 | #endif /* _CIFSACL_H */ |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index e7931cc55d0c..210f0af83fc4 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -64,24 +64,23 @@ unsigned int global_secflags = CIFSSEC_DEF; | |||
64 | unsigned int sign_CIFS_PDUs = 1; | 64 | unsigned int sign_CIFS_PDUs = 1; |
65 | static const struct super_operations cifs_super_ops; | 65 | static const struct super_operations cifs_super_ops; |
66 | unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE; | 66 | unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE; |
67 | module_param(CIFSMaxBufSize, int, 0); | 67 | module_param(CIFSMaxBufSize, uint, 0); |
68 | MODULE_PARM_DESC(CIFSMaxBufSize, "Network buffer size (not including header). " | 68 | MODULE_PARM_DESC(CIFSMaxBufSize, "Network buffer size (not including header). " |
69 | "Default: 16384 Range: 8192 to 130048"); | 69 | "Default: 16384 Range: 8192 to 130048"); |
70 | unsigned int cifs_min_rcv = CIFS_MIN_RCV_POOL; | 70 | unsigned int cifs_min_rcv = CIFS_MIN_RCV_POOL; |
71 | module_param(cifs_min_rcv, int, 0); | 71 | module_param(cifs_min_rcv, uint, 0); |
72 | MODULE_PARM_DESC(cifs_min_rcv, "Network buffers in pool. Default: 4 Range: " | 72 | MODULE_PARM_DESC(cifs_min_rcv, "Network buffers in pool. Default: 4 Range: " |
73 | "1 to 64"); | 73 | "1 to 64"); |
74 | unsigned int cifs_min_small = 30; | 74 | unsigned int cifs_min_small = 30; |
75 | module_param(cifs_min_small, int, 0); | 75 | module_param(cifs_min_small, uint, 0); |
76 | MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 " | 76 | MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 " |
77 | "Range: 2 to 256"); | 77 | "Range: 2 to 256"); |
78 | unsigned int cifs_max_pending = CIFS_MAX_REQ; | 78 | unsigned int cifs_max_pending = CIFS_MAX_REQ; |
79 | module_param(cifs_max_pending, int, 0444); | 79 | module_param(cifs_max_pending, uint, 0444); |
80 | MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. " | 80 | MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. " |
81 | "Default: 32767 Range: 2 to 32767."); | 81 | "Default: 32767 Range: 2 to 32767."); |
82 | module_param(enable_oplocks, bool, 0644); | 82 | module_param(enable_oplocks, bool, 0644); |
83 | MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks (bool). Default:" | 83 | MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1"); |
84 | "y/Y/1"); | ||
85 | 84 | ||
86 | extern mempool_t *cifs_sm_req_poolp; | 85 | extern mempool_t *cifs_sm_req_poolp; |
87 | extern mempool_t *cifs_req_poolp; | 86 | extern mempool_t *cifs_req_poolp; |
@@ -230,6 +229,7 @@ cifs_alloc_inode(struct super_block *sb) | |||
230 | cifs_set_oplock_level(cifs_inode, 0); | 229 | cifs_set_oplock_level(cifs_inode, 0); |
231 | cifs_inode->delete_pending = false; | 230 | cifs_inode->delete_pending = false; |
232 | cifs_inode->invalid_mapping = false; | 231 | cifs_inode->invalid_mapping = false; |
232 | cifs_inode->leave_pages_clean = false; | ||
233 | cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ | 233 | cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ |
234 | cifs_inode->server_eof = 0; | 234 | cifs_inode->server_eof = 0; |
235 | cifs_inode->uniqueid = 0; | 235 | cifs_inode->uniqueid = 0; |
@@ -540,8 +540,8 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) | |||
540 | char *s, *p; | 540 | char *s, *p; |
541 | char sep; | 541 | char sep; |
542 | 542 | ||
543 | full_path = build_path_to_root(vol, cifs_sb, | 543 | full_path = cifs_build_path_to_root(vol, cifs_sb, |
544 | cifs_sb_master_tcon(cifs_sb)); | 544 | cifs_sb_master_tcon(cifs_sb)); |
545 | if (full_path == NULL) | 545 | if (full_path == NULL) |
546 | return ERR_PTR(-ENOMEM); | 546 | return ERR_PTR(-ENOMEM); |
547 | 547 | ||
@@ -1205,7 +1205,6 @@ exit_cifs(void) | |||
1205 | unregister_filesystem(&cifs_fs_type); | 1205 | unregister_filesystem(&cifs_fs_type); |
1206 | cifs_dfs_release_automount_timer(); | 1206 | cifs_dfs_release_automount_timer(); |
1207 | #ifdef CONFIG_CIFS_ACL | 1207 | #ifdef CONFIG_CIFS_ACL |
1208 | cifs_destroy_idmaptrees(); | ||
1209 | exit_cifs_idmap(); | 1208 | exit_cifs_idmap(); |
1210 | #endif | 1209 | #endif |
1211 | #ifdef CONFIG_CIFS_UPCALL | 1210 | #ifdef CONFIG_CIFS_UPCALL |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index f5af2527fc69..aea1eec64911 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -178,6 +178,7 @@ struct smb_rqst { | |||
178 | 178 | ||
179 | enum smb_version { | 179 | enum smb_version { |
180 | Smb_1 = 1, | 180 | Smb_1 = 1, |
181 | Smb_20, | ||
181 | Smb_21, | 182 | Smb_21, |
182 | Smb_30, | 183 | Smb_30, |
183 | }; | 184 | }; |
@@ -280,9 +281,6 @@ struct smb_version_operations { | |||
280 | /* set attributes */ | 281 | /* set attributes */ |
281 | int (*set_file_info)(struct inode *, const char *, FILE_BASIC_INFO *, | 282 | int (*set_file_info)(struct inode *, const char *, FILE_BASIC_INFO *, |
282 | const unsigned int); | 283 | const unsigned int); |
283 | /* build a full path to the root of the mount */ | ||
284 | char * (*build_path_to_root)(struct smb_vol *, struct cifs_sb_info *, | ||
285 | struct cifs_tcon *); | ||
286 | /* check if we can send an echo or nor */ | 284 | /* check if we can send an echo or nor */ |
287 | bool (*can_echo)(struct TCP_Server_Info *); | 285 | bool (*can_echo)(struct TCP_Server_Info *); |
288 | /* send echo request */ | 286 | /* send echo request */ |
@@ -369,6 +367,8 @@ struct smb_version_operations { | |||
369 | void (*set_lease_key)(struct inode *, struct cifs_fid *fid); | 367 | void (*set_lease_key)(struct inode *, struct cifs_fid *fid); |
370 | /* generate new lease key */ | 368 | /* generate new lease key */ |
371 | void (*new_lease_key)(struct cifs_fid *fid); | 369 | void (*new_lease_key)(struct cifs_fid *fid); |
370 | int (*calc_signature)(struct smb_rqst *rqst, | ||
371 | struct TCP_Server_Info *server); | ||
372 | }; | 372 | }; |
373 | 373 | ||
374 | struct smb_version_values { | 374 | struct smb_version_values { |
@@ -396,7 +396,6 @@ struct smb_vol { | |||
396 | char *password; | 396 | char *password; |
397 | char *domainname; | 397 | char *domainname; |
398 | char *UNC; | 398 | char *UNC; |
399 | char *UNCip; | ||
400 | char *iocharset; /* local code page for mapping to and from Unicode */ | 399 | char *iocharset; /* local code page for mapping to and from Unicode */ |
401 | char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */ | 400 | char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */ |
402 | char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */ | 401 | char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */ |
@@ -444,11 +443,11 @@ struct smb_vol { | |||
444 | unsigned int rsize; | 443 | unsigned int rsize; |
445 | unsigned int wsize; | 444 | unsigned int wsize; |
446 | bool sockopt_tcp_nodelay:1; | 445 | bool sockopt_tcp_nodelay:1; |
447 | unsigned short int port; | ||
448 | unsigned long actimeo; /* attribute cache timeout (jiffies) */ | 446 | unsigned long actimeo; /* attribute cache timeout (jiffies) */ |
449 | struct smb_version_operations *ops; | 447 | struct smb_version_operations *ops; |
450 | struct smb_version_values *vals; | 448 | struct smb_version_values *vals; |
451 | char *prepath; | 449 | char *prepath; |
450 | struct sockaddr_storage dstaddr; /* destination address */ | ||
452 | struct sockaddr_storage srcaddr; /* allow binding to a local IP */ | 451 | struct sockaddr_storage srcaddr; /* allow binding to a local IP */ |
453 | struct nls_table *local_nls; | 452 | struct nls_table *local_nls; |
454 | }; | 453 | }; |
@@ -1031,6 +1030,7 @@ struct cifsInodeInfo { | |||
1031 | bool clientCanCacheAll; /* read and writebehind oplock */ | 1030 | bool clientCanCacheAll; /* read and writebehind oplock */ |
1032 | bool delete_pending; /* DELETE_ON_CLOSE is set */ | 1031 | bool delete_pending; /* DELETE_ON_CLOSE is set */ |
1033 | bool invalid_mapping; /* pagecache is invalid */ | 1032 | bool invalid_mapping; /* pagecache is invalid */ |
1033 | bool leave_pages_clean; /* protected by i_mutex, not set pages dirty */ | ||
1034 | unsigned long time; /* jiffies of last update of inode */ | 1034 | unsigned long time; /* jiffies of last update of inode */ |
1035 | u64 server_eof; /* current file size on server -- protected by i_lock */ | 1035 | u64 server_eof; /* current file size on server -- protected by i_lock */ |
1036 | u64 uniqueid; /* server inode number */ | 1036 | u64 uniqueid; /* server inode number */ |
@@ -1067,30 +1067,16 @@ static inline char CIFS_DIR_SEP(const struct cifs_sb_info *cifs_sb) | |||
1067 | static inline void | 1067 | static inline void |
1068 | convert_delimiter(char *path, char delim) | 1068 | convert_delimiter(char *path, char delim) |
1069 | { | 1069 | { |
1070 | int i; | 1070 | char old_delim, *pos; |
1071 | char old_delim; | ||
1072 | |||
1073 | if (path == NULL) | ||
1074 | return; | ||
1075 | 1071 | ||
1076 | if (delim == '/') | 1072 | if (delim == '/') |
1077 | old_delim = '\\'; | 1073 | old_delim = '\\'; |
1078 | else | 1074 | else |
1079 | old_delim = '/'; | 1075 | old_delim = '/'; |
1080 | 1076 | ||
1081 | for (i = 0; path[i] != '\0'; i++) { | 1077 | pos = path; |
1082 | if (path[i] == old_delim) | 1078 | while ((pos = strchr(pos, old_delim))) |
1083 | path[i] = delim; | 1079 | *pos = delim; |
1084 | } | ||
1085 | } | ||
1086 | |||
1087 | static inline char * | ||
1088 | build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, | ||
1089 | struct cifs_tcon *tcon) | ||
1090 | { | ||
1091 | if (!vol->ops->build_path_to_root) | ||
1092 | return NULL; | ||
1093 | return vol->ops->build_path_to_root(vol, cifs_sb, tcon); | ||
1094 | } | 1080 | } |
1095 | 1081 | ||
1096 | #ifdef CONFIG_CIFS_STATS | 1082 | #ifdef CONFIG_CIFS_STATS |
@@ -1362,7 +1348,7 @@ require use of the stronger protocol */ | |||
1362 | #define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ | 1348 | #define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ |
1363 | #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ | 1349 | #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ |
1364 | 1350 | ||
1365 | #define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP) | 1351 | #define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMSSP) |
1366 | #define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2) | 1352 | #define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2) |
1367 | #define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) | 1353 | #define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) |
1368 | /* | 1354 | /* |
@@ -1506,6 +1492,6 @@ extern struct smb_version_values smb20_values; | |||
1506 | extern struct smb_version_operations smb21_operations; | 1492 | extern struct smb_version_operations smb21_operations; |
1507 | extern struct smb_version_values smb21_values; | 1493 | extern struct smb_version_values smb21_values; |
1508 | #define SMB30_VERSION_STRING "3.0" | 1494 | #define SMB30_VERSION_STRING "3.0" |
1509 | /*extern struct smb_version_operations smb30_operations; */ /* not needed yet */ | 1495 | extern struct smb_version_operations smb30_operations; |
1510 | extern struct smb_version_values smb30_values; | 1496 | extern struct smb_version_values smb30_values; |
1511 | #endif /* _CIFS_GLOB_H */ | 1497 | #endif /* _CIFS_GLOB_H */ |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 5144e9fbeb8c..1988c1baa224 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -58,8 +58,10 @@ do { \ | |||
58 | } while (0) | 58 | } while (0) |
59 | extern int init_cifs_idmap(void); | 59 | extern int init_cifs_idmap(void); |
60 | extern void exit_cifs_idmap(void); | 60 | extern void exit_cifs_idmap(void); |
61 | extern void cifs_destroy_idmaptrees(void); | ||
62 | extern char *build_path_from_dentry(struct dentry *); | 61 | extern char *build_path_from_dentry(struct dentry *); |
62 | extern char *cifs_build_path_to_root(struct smb_vol *vol, | ||
63 | struct cifs_sb_info *cifs_sb, | ||
64 | struct cifs_tcon *tcon); | ||
63 | extern char *build_wildcard_path_from_dentry(struct dentry *direntry); | 65 | extern char *build_wildcard_path_from_dentry(struct dentry *direntry); |
64 | extern char *cifs_compose_mount_options(const char *sb_mountdata, | 66 | extern char *cifs_compose_mount_options(const char *sb_mountdata, |
65 | const char *fullpath, const struct dfs_info3_param *ref, | 67 | const char *fullpath, const struct dfs_info3_param *ref, |
@@ -107,9 +109,7 @@ extern unsigned int smbCalcSize(void *buf); | |||
107 | extern int decode_negTokenInit(unsigned char *security_blob, int length, | 109 | extern int decode_negTokenInit(unsigned char *security_blob, int length, |
108 | struct TCP_Server_Info *server); | 110 | struct TCP_Server_Info *server); |
109 | extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); | 111 | extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); |
110 | extern int cifs_set_port(struct sockaddr *addr, const unsigned short int port); | 112 | extern void cifs_set_port(struct sockaddr *addr, const unsigned short int port); |
111 | extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, | ||
112 | const unsigned short int port); | ||
113 | extern int map_smb_to_linux_error(char *buf, bool logErr); | 113 | extern int map_smb_to_linux_error(char *buf, bool logErr); |
114 | extern void header_assemble(struct smb_hdr *, char /* command */ , | 114 | extern void header_assemble(struct smb_hdr *, char /* command */ , |
115 | const struct cifs_tcon *, int /* length of | 115 | const struct cifs_tcon *, int /* length of |
@@ -185,7 +185,7 @@ extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon); | |||
185 | extern bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, | 185 | extern bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, |
186 | __u64 length, __u8 type, | 186 | __u64 length, __u8 type, |
187 | struct cifsLockInfo **conf_lock, | 187 | struct cifsLockInfo **conf_lock, |
188 | bool rw_check); | 188 | int rw_check); |
189 | extern void cifs_add_pending_open(struct cifs_fid *fid, | 189 | extern void cifs_add_pending_open(struct cifs_fid *fid, |
190 | struct tcon_link *tlink, | 190 | struct tcon_link *tlink, |
191 | struct cifs_pending_open *open); | 191 | struct cifs_pending_open *open); |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5c670b998ffb..7635b5db26a7 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -186,6 +186,7 @@ static const match_table_t cifs_mount_option_tokens = { | |||
186 | { Opt_user, "user=%s" }, | 186 | { Opt_user, "user=%s" }, |
187 | { Opt_user, "username=%s" }, | 187 | { Opt_user, "username=%s" }, |
188 | { Opt_blank_pass, "pass=" }, | 188 | { Opt_blank_pass, "pass=" }, |
189 | { Opt_blank_pass, "password=" }, | ||
189 | { Opt_pass, "pass=%s" }, | 190 | { Opt_pass, "pass=%s" }, |
190 | { Opt_pass, "password=%s" }, | 191 | { Opt_pass, "password=%s" }, |
191 | { Opt_blank_ip, "ip=" }, | 192 | { Opt_blank_ip, "ip=" }, |
@@ -274,6 +275,7 @@ static const match_table_t cifs_cacheflavor_tokens = { | |||
274 | 275 | ||
275 | static const match_table_t cifs_smb_version_tokens = { | 276 | static const match_table_t cifs_smb_version_tokens = { |
276 | { Smb_1, SMB1_VERSION_STRING }, | 277 | { Smb_1, SMB1_VERSION_STRING }, |
278 | { Smb_20, SMB20_VERSION_STRING}, | ||
277 | { Smb_21, SMB21_VERSION_STRING }, | 279 | { Smb_21, SMB21_VERSION_STRING }, |
278 | { Smb_30, SMB30_VERSION_STRING }, | 280 | { Smb_30, SMB30_VERSION_STRING }, |
279 | }; | 281 | }; |
@@ -1074,12 +1076,16 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol) | |||
1074 | vol->vals = &smb1_values; | 1076 | vol->vals = &smb1_values; |
1075 | break; | 1077 | break; |
1076 | #ifdef CONFIG_CIFS_SMB2 | 1078 | #ifdef CONFIG_CIFS_SMB2 |
1079 | case Smb_20: | ||
1080 | vol->ops = &smb21_operations; /* currently identical with 2.1 */ | ||
1081 | vol->vals = &smb20_values; | ||
1082 | break; | ||
1077 | case Smb_21: | 1083 | case Smb_21: |
1078 | vol->ops = &smb21_operations; | 1084 | vol->ops = &smb21_operations; |
1079 | vol->vals = &smb21_values; | 1085 | vol->vals = &smb21_values; |
1080 | break; | 1086 | break; |
1081 | case Smb_30: | 1087 | case Smb_30: |
1082 | vol->ops = &smb21_operations; /* currently identical with 2.1 */ | 1088 | vol->ops = &smb30_operations; |
1083 | vol->vals = &smb30_values; | 1089 | vol->vals = &smb30_values; |
1084 | break; | 1090 | break; |
1085 | #endif | 1091 | #endif |
@@ -1090,6 +1096,52 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol) | |||
1090 | return 0; | 1096 | return 0; |
1091 | } | 1097 | } |
1092 | 1098 | ||
1099 | /* | ||
1100 | * Parse a devname into substrings and populate the vol->UNC and vol->prepath | ||
1101 | * fields with the result. Returns 0 on success and an error otherwise. | ||
1102 | */ | ||
1103 | static int | ||
1104 | cifs_parse_devname(const char *devname, struct smb_vol *vol) | ||
1105 | { | ||
1106 | char *pos; | ||
1107 | const char *delims = "/\\"; | ||
1108 | size_t len; | ||
1109 | |||
1110 | /* make sure we have a valid UNC double delimiter prefix */ | ||
1111 | len = strspn(devname, delims); | ||
1112 | if (len != 2) | ||
1113 | return -EINVAL; | ||
1114 | |||
1115 | /* find delimiter between host and sharename */ | ||
1116 | pos = strpbrk(devname + 2, delims); | ||
1117 | if (!pos) | ||
1118 | return -EINVAL; | ||
1119 | |||
1120 | /* skip past delimiter */ | ||
1121 | ++pos; | ||
1122 | |||
1123 | /* now go until next delimiter or end of string */ | ||
1124 | len = strcspn(pos, delims); | ||
1125 | |||
1126 | /* move "pos" up to delimiter or NULL */ | ||
1127 | pos += len; | ||
1128 | vol->UNC = kstrndup(devname, pos - devname, GFP_KERNEL); | ||
1129 | if (!vol->UNC) | ||
1130 | return -ENOMEM; | ||
1131 | |||
1132 | convert_delimiter(vol->UNC, '\\'); | ||
1133 | |||
1134 | /* If pos is NULL, or is a bogus trailing delimiter then no prepath */ | ||
1135 | if (!*pos++ || !*pos) | ||
1136 | return 0; | ||
1137 | |||
1138 | vol->prepath = kstrdup(pos, GFP_KERNEL); | ||
1139 | if (!vol->prepath) | ||
1140 | return -ENOMEM; | ||
1141 | |||
1142 | return 0; | ||
1143 | } | ||
1144 | |||
1093 | static int | 1145 | static int |
1094 | cifs_parse_mount_options(const char *mountdata, const char *devname, | 1146 | cifs_parse_mount_options(const char *mountdata, const char *devname, |
1095 | struct smb_vol *vol) | 1147 | struct smb_vol *vol) |
@@ -1108,11 +1160,17 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1108 | char *string = NULL; | 1160 | char *string = NULL; |
1109 | char *tmp_end, *value; | 1161 | char *tmp_end, *value; |
1110 | char delim; | 1162 | char delim; |
1163 | bool got_ip = false; | ||
1164 | unsigned short port = 0; | ||
1165 | struct sockaddr *dstaddr = (struct sockaddr *)&vol->dstaddr; | ||
1111 | 1166 | ||
1112 | separator[0] = ','; | 1167 | separator[0] = ','; |
1113 | separator[1] = 0; | 1168 | separator[1] = 0; |
1114 | delim = separator[0]; | 1169 | delim = separator[0]; |
1115 | 1170 | ||
1171 | /* ensure we always start with zeroed-out smb_vol */ | ||
1172 | memset(vol, 0, sizeof(*vol)); | ||
1173 | |||
1116 | /* | 1174 | /* |
1117 | * does not have to be perfect mapping since field is | 1175 | * does not have to be perfect mapping since field is |
1118 | * informational, only used for servers that do not support | 1176 | * informational, only used for servers that do not support |
@@ -1169,6 +1227,16 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1169 | vol->backupuid_specified = false; /* no backup intent for a user */ | 1227 | vol->backupuid_specified = false; /* no backup intent for a user */ |
1170 | vol->backupgid_specified = false; /* no backup intent for a group */ | 1228 | vol->backupgid_specified = false; /* no backup intent for a group */ |
1171 | 1229 | ||
1230 | /* | ||
1231 | * For now, we ignore -EINVAL errors under the assumption that the | ||
1232 | * unc= and prefixpath= options will be usable. | ||
1233 | */ | ||
1234 | if (cifs_parse_devname(devname, vol) == -ENOMEM) { | ||
1235 | printk(KERN_ERR "CIFS: Unable to allocate memory to parse " | ||
1236 | "device string.\n"); | ||
1237 | goto out_nomem; | ||
1238 | } | ||
1239 | |||
1172 | while ((data = strsep(&options, separator)) != NULL) { | 1240 | while ((data = strsep(&options, separator)) != NULL) { |
1173 | substring_t args[MAX_OPT_ARGS]; | 1241 | substring_t args[MAX_OPT_ARGS]; |
1174 | unsigned long option; | 1242 | unsigned long option; |
@@ -1416,12 +1484,12 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1416 | vol->dir_mode = option; | 1484 | vol->dir_mode = option; |
1417 | break; | 1485 | break; |
1418 | case Opt_port: | 1486 | case Opt_port: |
1419 | if (get_option_ul(args, &option)) { | 1487 | if (get_option_ul(args, &option) || |
1420 | cERROR(1, "%s: Invalid port value", | 1488 | option > USHRT_MAX) { |
1421 | __func__); | 1489 | cERROR(1, "%s: Invalid port value", __func__); |
1422 | goto cifs_parse_mount_err; | 1490 | goto cifs_parse_mount_err; |
1423 | } | 1491 | } |
1424 | vol->port = option; | 1492 | port = (unsigned short)option; |
1425 | break; | 1493 | break; |
1426 | case Opt_rsize: | 1494 | case Opt_rsize: |
1427 | if (get_option_ul(args, &option)) { | 1495 | if (get_option_ul(args, &option)) { |
@@ -1537,53 +1605,48 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1537 | vol->password[j] = '\0'; | 1605 | vol->password[j] = '\0'; |
1538 | break; | 1606 | break; |
1539 | case Opt_blank_ip: | 1607 | case Opt_blank_ip: |
1540 | vol->UNCip = NULL; | 1608 | /* FIXME: should this be an error instead? */ |
1609 | got_ip = false; | ||
1541 | break; | 1610 | break; |
1542 | case Opt_ip: | 1611 | case Opt_ip: |
1543 | string = match_strdup(args); | 1612 | string = match_strdup(args); |
1544 | if (string == NULL) | 1613 | if (string == NULL) |
1545 | goto out_nomem; | 1614 | goto out_nomem; |
1546 | 1615 | ||
1547 | if (strnlen(string, INET6_ADDRSTRLEN) > | 1616 | if (!cifs_convert_address(dstaddr, string, |
1548 | INET6_ADDRSTRLEN) { | 1617 | strlen(string))) { |
1549 | printk(KERN_WARNING "CIFS: ip address " | 1618 | printk(KERN_ERR "CIFS: bad ip= option (%s).\n", |
1550 | "too long\n"); | 1619 | string); |
1551 | goto cifs_parse_mount_err; | ||
1552 | } | ||
1553 | vol->UNCip = kstrdup(string, GFP_KERNEL); | ||
1554 | if (!vol->UNCip) { | ||
1555 | printk(KERN_WARNING "CIFS: no memory " | ||
1556 | "for UNC IP\n"); | ||
1557 | goto cifs_parse_mount_err; | 1620 | goto cifs_parse_mount_err; |
1558 | } | 1621 | } |
1622 | got_ip = true; | ||
1559 | break; | 1623 | break; |
1560 | case Opt_unc: | 1624 | case Opt_unc: |
1561 | string = match_strdup(args); | 1625 | string = vol->UNC; |
1562 | if (string == NULL) | 1626 | vol->UNC = match_strdup(args); |
1627 | if (vol->UNC == NULL) { | ||
1628 | kfree(string); | ||
1563 | goto out_nomem; | 1629 | goto out_nomem; |
1564 | |||
1565 | temp_len = strnlen(string, 300); | ||
1566 | if (temp_len == 300) { | ||
1567 | printk(KERN_WARNING "CIFS: UNC name too long\n"); | ||
1568 | goto cifs_parse_mount_err; | ||
1569 | } | 1630 | } |
1570 | 1631 | ||
1571 | vol->UNC = kmalloc(temp_len+1, GFP_KERNEL); | 1632 | convert_delimiter(vol->UNC, '\\'); |
1572 | if (vol->UNC == NULL) { | 1633 | if (vol->UNC[0] != '\\' || vol->UNC[1] != '\\') { |
1573 | printk(KERN_WARNING "CIFS: no memory for UNC\n"); | 1634 | kfree(string); |
1574 | goto cifs_parse_mount_err; | 1635 | printk(KERN_ERR "CIFS: UNC Path does not " |
1575 | } | 1636 | "begin with // or \\\\\n"); |
1576 | strcpy(vol->UNC, string); | ||
1577 | |||
1578 | if (strncmp(string, "//", 2) == 0) { | ||
1579 | vol->UNC[0] = '\\'; | ||
1580 | vol->UNC[1] = '\\'; | ||
1581 | } else if (strncmp(string, "\\\\", 2) != 0) { | ||
1582 | printk(KERN_WARNING "CIFS: UNC Path does not " | ||
1583 | "begin with // or \\\\\n"); | ||
1584 | goto cifs_parse_mount_err; | 1637 | goto cifs_parse_mount_err; |
1585 | } | 1638 | } |
1586 | 1639 | ||
1640 | /* Compare old unc= option to new one */ | ||
1641 | if (!string || strcmp(string, vol->UNC)) | ||
1642 | printk(KERN_WARNING "CIFS: the value of the " | ||
1643 | "unc= mount option does not match the " | ||
1644 | "device string. Using the unc= option " | ||
1645 | "for now. In 3.10, that option will " | ||
1646 | "be ignored and the contents of the " | ||
1647 | "device string will be used " | ||
1648 | "instead. (%s != %s)\n", string, | ||
1649 | vol->UNC); | ||
1587 | break; | 1650 | break; |
1588 | case Opt_domain: | 1651 | case Opt_domain: |
1589 | string = match_strdup(args); | 1652 | string = match_strdup(args); |
@@ -1618,31 +1681,26 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1618 | } | 1681 | } |
1619 | break; | 1682 | break; |
1620 | case Opt_prefixpath: | 1683 | case Opt_prefixpath: |
1621 | string = match_strdup(args); | 1684 | /* skip over any leading delimiter */ |
1622 | if (string == NULL) | 1685 | if (*args[0].from == '/' || *args[0].from == '\\') |
1623 | goto out_nomem; | 1686 | args[0].from++; |
1624 | |||
1625 | temp_len = strnlen(string, 1024); | ||
1626 | if (string[0] != '/') | ||
1627 | temp_len++; /* missing leading slash */ | ||
1628 | if (temp_len > 1024) { | ||
1629 | printk(KERN_WARNING "CIFS: prefix too long\n"); | ||
1630 | goto cifs_parse_mount_err; | ||
1631 | } | ||
1632 | 1687 | ||
1633 | vol->prepath = kmalloc(temp_len+1, GFP_KERNEL); | 1688 | string = vol->prepath; |
1689 | vol->prepath = match_strdup(args); | ||
1634 | if (vol->prepath == NULL) { | 1690 | if (vol->prepath == NULL) { |
1635 | printk(KERN_WARNING "CIFS: no memory " | 1691 | kfree(string); |
1636 | "for path prefix\n"); | 1692 | goto out_nomem; |
1637 | goto cifs_parse_mount_err; | ||
1638 | } | 1693 | } |
1639 | 1694 | /* Compare old prefixpath= option to new one */ | |
1640 | if (string[0] != '/') { | 1695 | if (!string || strcmp(string, vol->prepath)) |
1641 | vol->prepath[0] = '/'; | 1696 | printk(KERN_WARNING "CIFS: the value of the " |
1642 | strcpy(vol->prepath+1, string); | 1697 | "prefixpath= mount option does not " |
1643 | } else | 1698 | "match the device string. Using the " |
1644 | strcpy(vol->prepath, string); | 1699 | "prefixpath= option for now. In 3.10, " |
1645 | 1700 | "that option will be ignored and the " | |
1701 | "contents of the device string will be " | ||
1702 | "used instead.(%s != %s)\n", string, | ||
1703 | vol->prepath); | ||
1646 | break; | 1704 | break; |
1647 | case Opt_iocharset: | 1705 | case Opt_iocharset: |
1648 | string = match_strdup(args); | 1706 | string = match_strdup(args); |
@@ -1799,9 +1857,30 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, | |||
1799 | goto cifs_parse_mount_err; | 1857 | goto cifs_parse_mount_err; |
1800 | } | 1858 | } |
1801 | #endif | 1859 | #endif |
1860 | if (!vol->UNC) { | ||
1861 | cERROR(1, "CIFS mount error: No usable UNC path provided in " | ||
1862 | "device string or in unc= option!"); | ||
1863 | goto cifs_parse_mount_err; | ||
1864 | } | ||
1802 | 1865 | ||
1803 | if (vol->UNCip == NULL) | 1866 | /* make sure UNC has a share name */ |
1804 | vol->UNCip = &vol->UNC[2]; | 1867 | if (!strchr(vol->UNC + 3, '\\')) { |
1868 | cERROR(1, "Malformed UNC. Unable to find share name."); | ||
1869 | goto cifs_parse_mount_err; | ||
1870 | } | ||
1871 | |||
1872 | if (!got_ip) { | ||
1873 | /* No ip= option specified? Try to get it from UNC */ | ||
1874 | if (!cifs_convert_address(dstaddr, &vol->UNC[2], | ||
1875 | strlen(&vol->UNC[2]))) { | ||
1876 | printk(KERN_ERR "Unable to determine destination " | ||
1877 | "address.\n"); | ||
1878 | goto cifs_parse_mount_err; | ||
1879 | } | ||
1880 | } | ||
1881 | |||
1882 | /* set the port that we got earlier */ | ||
1883 | cifs_set_port(dstaddr, port); | ||
1805 | 1884 | ||
1806 | if (uid_specified) | 1885 | if (uid_specified) |
1807 | vol->override_uid = override_uid; | 1886 | vol->override_uid = override_uid; |
@@ -1972,9 +2051,10 @@ match_security(struct TCP_Server_Info *server, struct smb_vol *vol) | |||
1972 | return true; | 2051 | return true; |
1973 | } | 2052 | } |
1974 | 2053 | ||
1975 | static int match_server(struct TCP_Server_Info *server, struct sockaddr *addr, | 2054 | static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol) |
1976 | struct smb_vol *vol) | ||
1977 | { | 2055 | { |
2056 | struct sockaddr *addr = (struct sockaddr *)&vol->dstaddr; | ||
2057 | |||
1978 | if ((server->vals != vol->vals) || (server->ops != vol->ops)) | 2058 | if ((server->vals != vol->vals) || (server->ops != vol->ops)) |
1979 | return 0; | 2059 | return 0; |
1980 | 2060 | ||
@@ -1995,13 +2075,13 @@ static int match_server(struct TCP_Server_Info *server, struct sockaddr *addr, | |||
1995 | } | 2075 | } |
1996 | 2076 | ||
1997 | static struct TCP_Server_Info * | 2077 | static struct TCP_Server_Info * |
1998 | cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol) | 2078 | cifs_find_tcp_session(struct smb_vol *vol) |
1999 | { | 2079 | { |
2000 | struct TCP_Server_Info *server; | 2080 | struct TCP_Server_Info *server; |
2001 | 2081 | ||
2002 | spin_lock(&cifs_tcp_ses_lock); | 2082 | spin_lock(&cifs_tcp_ses_lock); |
2003 | list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { | 2083 | list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { |
2004 | if (!match_server(server, addr, vol)) | 2084 | if (!match_server(server, vol)) |
2005 | continue; | 2085 | continue; |
2006 | 2086 | ||
2007 | ++server->srv_count; | 2087 | ++server->srv_count; |
@@ -2051,40 +2131,12 @@ static struct TCP_Server_Info * | |||
2051 | cifs_get_tcp_session(struct smb_vol *volume_info) | 2131 | cifs_get_tcp_session(struct smb_vol *volume_info) |
2052 | { | 2132 | { |
2053 | struct TCP_Server_Info *tcp_ses = NULL; | 2133 | struct TCP_Server_Info *tcp_ses = NULL; |
2054 | struct sockaddr_storage addr; | ||
2055 | struct sockaddr_in *sin_server = (struct sockaddr_in *) &addr; | ||
2056 | struct sockaddr_in6 *sin_server6 = (struct sockaddr_in6 *) &addr; | ||
2057 | int rc; | 2134 | int rc; |
2058 | 2135 | ||
2059 | memset(&addr, 0, sizeof(struct sockaddr_storage)); | 2136 | cFYI(1, "UNC: %s", volume_info->UNC); |
2060 | |||
2061 | cFYI(1, "UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip); | ||
2062 | |||
2063 | if (volume_info->UNCip && volume_info->UNC) { | ||
2064 | rc = cifs_fill_sockaddr((struct sockaddr *)&addr, | ||
2065 | volume_info->UNCip, | ||
2066 | strlen(volume_info->UNCip), | ||
2067 | volume_info->port); | ||
2068 | if (!rc) { | ||
2069 | /* we failed translating address */ | ||
2070 | rc = -EINVAL; | ||
2071 | goto out_err; | ||
2072 | } | ||
2073 | } else if (volume_info->UNCip) { | ||
2074 | /* BB using ip addr as tcp_ses name to connect to the | ||
2075 | DFS root below */ | ||
2076 | cERROR(1, "Connecting to DFS root not implemented yet"); | ||
2077 | rc = -EINVAL; | ||
2078 | goto out_err; | ||
2079 | } else /* which tcp_sess DFS root would we conect to */ { | ||
2080 | cERROR(1, "CIFS mount error: No UNC path (e.g. -o " | ||
2081 | "unc=//192.168.1.100/public) specified"); | ||
2082 | rc = -EINVAL; | ||
2083 | goto out_err; | ||
2084 | } | ||
2085 | 2137 | ||
2086 | /* see if we already have a matching tcp_ses */ | 2138 | /* see if we already have a matching tcp_ses */ |
2087 | tcp_ses = cifs_find_tcp_session((struct sockaddr *)&addr, volume_info); | 2139 | tcp_ses = cifs_find_tcp_session(volume_info); |
2088 | if (tcp_ses) | 2140 | if (tcp_ses) |
2089 | return tcp_ses; | 2141 | return tcp_ses; |
2090 | 2142 | ||
@@ -2129,27 +2181,18 @@ cifs_get_tcp_session(struct smb_vol *volume_info) | |||
2129 | INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); | 2181 | INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); |
2130 | INIT_LIST_HEAD(&tcp_ses->smb_ses_list); | 2182 | INIT_LIST_HEAD(&tcp_ses->smb_ses_list); |
2131 | INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); | 2183 | INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); |
2132 | 2184 | memcpy(&tcp_ses->srcaddr, &volume_info->srcaddr, | |
2185 | sizeof(tcp_ses->srcaddr)); | ||
2186 | memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr, | ||
2187 | sizeof(tcp_ses->dstaddr)); | ||
2133 | /* | 2188 | /* |
2134 | * at this point we are the only ones with the pointer | 2189 | * at this point we are the only ones with the pointer |
2135 | * to the struct since the kernel thread not created yet | 2190 | * to the struct since the kernel thread not created yet |
2136 | * no need to spinlock this init of tcpStatus or srv_count | 2191 | * no need to spinlock this init of tcpStatus or srv_count |
2137 | */ | 2192 | */ |
2138 | tcp_ses->tcpStatus = CifsNew; | 2193 | tcp_ses->tcpStatus = CifsNew; |
2139 | memcpy(&tcp_ses->srcaddr, &volume_info->srcaddr, | ||
2140 | sizeof(tcp_ses->srcaddr)); | ||
2141 | ++tcp_ses->srv_count; | 2194 | ++tcp_ses->srv_count; |
2142 | 2195 | ||
2143 | if (addr.ss_family == AF_INET6) { | ||
2144 | cFYI(1, "attempting ipv6 connect"); | ||
2145 | /* BB should we allow ipv6 on port 139? */ | ||
2146 | /* other OS never observed in Wild doing 139 with v6 */ | ||
2147 | memcpy(&tcp_ses->dstaddr, sin_server6, | ||
2148 | sizeof(struct sockaddr_in6)); | ||
2149 | } else | ||
2150 | memcpy(&tcp_ses->dstaddr, sin_server, | ||
2151 | sizeof(struct sockaddr_in)); | ||
2152 | |||
2153 | rc = ip_connect(tcp_ses); | 2196 | rc = ip_connect(tcp_ses); |
2154 | if (rc < 0) { | 2197 | if (rc < 0) { |
2155 | cERROR(1, "Error connecting to socket. Aborting operation"); | 2198 | cERROR(1, "Error connecting to socket. Aborting operation"); |
@@ -2397,8 +2440,6 @@ cifs_set_cifscreds(struct smb_vol *vol __attribute__((unused)), | |||
2397 | } | 2440 | } |
2398 | #endif /* CONFIG_KEYS */ | 2441 | #endif /* CONFIG_KEYS */ |
2399 | 2442 | ||
2400 | static bool warned_on_ntlm; /* globals init to false automatically */ | ||
2401 | |||
2402 | static struct cifs_ses * | 2443 | static struct cifs_ses * |
2403 | cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) | 2444 | cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) |
2404 | { | 2445 | { |
@@ -2475,14 +2516,6 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) | |||
2475 | ses->cred_uid = volume_info->cred_uid; | 2516 | ses->cred_uid = volume_info->cred_uid; |
2476 | ses->linux_uid = volume_info->linux_uid; | 2517 | ses->linux_uid = volume_info->linux_uid; |
2477 | 2518 | ||
2478 | /* ntlmv2 is much stronger than ntlm security, and has been broadly | ||
2479 | supported for many years, time to update default security mechanism */ | ||
2480 | if ((volume_info->secFlg == 0) && warned_on_ntlm == false) { | ||
2481 | warned_on_ntlm = true; | ||
2482 | cERROR(1, "default security mechanism requested. The default " | ||
2483 | "security mechanism will be upgraded from ntlm to " | ||
2484 | "ntlmv2 in kernel release 3.3"); | ||
2485 | } | ||
2486 | ses->overrideSecFlg = volume_info->secFlg; | 2519 | ses->overrideSecFlg = volume_info->secFlg; |
2487 | 2520 | ||
2488 | mutex_lock(&ses->session_mutex); | 2521 | mutex_lock(&ses->session_mutex); |
@@ -2598,13 +2631,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info) | |||
2598 | } | 2631 | } |
2599 | } | 2632 | } |
2600 | 2633 | ||
2601 | if (strchr(volume_info->UNC + 3, '\\') == NULL | ||
2602 | && strchr(volume_info->UNC + 3, '/') == NULL) { | ||
2603 | cERROR(1, "Missing share name"); | ||
2604 | rc = -ENODEV; | ||
2605 | goto out_fail; | ||
2606 | } | ||
2607 | |||
2608 | /* | 2634 | /* |
2609 | * BB Do we need to wrap session_mutex around this TCon call and Unix | 2635 | * BB Do we need to wrap session_mutex around this TCon call and Unix |
2610 | * SetFS as we do on SessSetup and reconnect? | 2636 | * SetFS as we do on SessSetup and reconnect? |
@@ -2718,11 +2744,8 @@ cifs_match_super(struct super_block *sb, void *data) | |||
2718 | struct cifs_ses *ses; | 2744 | struct cifs_ses *ses; |
2719 | struct cifs_tcon *tcon; | 2745 | struct cifs_tcon *tcon; |
2720 | struct tcon_link *tlink; | 2746 | struct tcon_link *tlink; |
2721 | struct sockaddr_storage addr; | ||
2722 | int rc = 0; | 2747 | int rc = 0; |
2723 | 2748 | ||
2724 | memset(&addr, 0, sizeof(struct sockaddr_storage)); | ||
2725 | |||
2726 | spin_lock(&cifs_tcp_ses_lock); | 2749 | spin_lock(&cifs_tcp_ses_lock); |
2727 | cifs_sb = CIFS_SB(sb); | 2750 | cifs_sb = CIFS_SB(sb); |
2728 | tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb)); | 2751 | tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb)); |
@@ -2736,17 +2759,7 @@ cifs_match_super(struct super_block *sb, void *data) | |||
2736 | 2759 | ||
2737 | volume_info = mnt_data->vol; | 2760 | volume_info = mnt_data->vol; |
2738 | 2761 | ||
2739 | if (!volume_info->UNCip || !volume_info->UNC) | 2762 | if (!match_server(tcp_srv, volume_info) || |
2740 | goto out; | ||
2741 | |||
2742 | rc = cifs_fill_sockaddr((struct sockaddr *)&addr, | ||
2743 | volume_info->UNCip, | ||
2744 | strlen(volume_info->UNCip), | ||
2745 | volume_info->port); | ||
2746 | if (!rc) | ||
2747 | goto out; | ||
2748 | |||
2749 | if (!match_server(tcp_srv, (struct sockaddr *)&addr, volume_info) || | ||
2750 | !match_session(ses, volume_info) || | 2763 | !match_session(ses, volume_info) || |
2751 | !match_tcon(tcon, volume_info->UNC)) { | 2764 | !match_tcon(tcon, volume_info->UNC)) { |
2752 | rc = 0; | 2765 | rc = 0; |
@@ -3261,8 +3274,6 @@ cleanup_volume_info_contents(struct smb_vol *volume_info) | |||
3261 | { | 3274 | { |
3262 | kfree(volume_info->username); | 3275 | kfree(volume_info->username); |
3263 | kzfree(volume_info->password); | 3276 | kzfree(volume_info->password); |
3264 | if (volume_info->UNCip != volume_info->UNC + 2) | ||
3265 | kfree(volume_info->UNCip); | ||
3266 | kfree(volume_info->UNC); | 3277 | kfree(volume_info->UNC); |
3267 | kfree(volume_info->domainname); | 3278 | kfree(volume_info->domainname); |
3268 | kfree(volume_info->iocharset); | 3279 | kfree(volume_info->iocharset); |
@@ -3280,14 +3291,16 @@ cifs_cleanup_volume_info(struct smb_vol *volume_info) | |||
3280 | 3291 | ||
3281 | 3292 | ||
3282 | #ifdef CONFIG_CIFS_DFS_UPCALL | 3293 | #ifdef CONFIG_CIFS_DFS_UPCALL |
3283 | /* build_path_to_root returns full path to root when | 3294 | /* |
3284 | * we do not have an exiting connection (tcon) */ | 3295 | * cifs_build_path_to_root returns full path to root when we do not have an |
3296 | * exiting connection (tcon) | ||
3297 | */ | ||
3285 | static char * | 3298 | static char * |
3286 | build_unc_path_to_root(const struct smb_vol *vol, | 3299 | build_unc_path_to_root(const struct smb_vol *vol, |
3287 | const struct cifs_sb_info *cifs_sb) | 3300 | const struct cifs_sb_info *cifs_sb) |
3288 | { | 3301 | { |
3289 | char *full_path, *pos; | 3302 | char *full_path, *pos; |
3290 | unsigned int pplen = vol->prepath ? strlen(vol->prepath) : 0; | 3303 | unsigned int pplen = vol->prepath ? strlen(vol->prepath) + 1 : 0; |
3291 | unsigned int unc_len = strnlen(vol->UNC, MAX_TREE_SIZE + 1); | 3304 | unsigned int unc_len = strnlen(vol->UNC, MAX_TREE_SIZE + 1); |
3292 | 3305 | ||
3293 | full_path = kmalloc(unc_len + pplen + 1, GFP_KERNEL); | 3306 | full_path = kmalloc(unc_len + pplen + 1, GFP_KERNEL); |
@@ -3298,6 +3311,7 @@ build_unc_path_to_root(const struct smb_vol *vol, | |||
3298 | pos = full_path + unc_len; | 3311 | pos = full_path + unc_len; |
3299 | 3312 | ||
3300 | if (pplen) { | 3313 | if (pplen) { |
3314 | *pos++ = CIFS_DIR_SEP(cifs_sb); | ||
3301 | strncpy(pos, vol->prepath, pplen); | 3315 | strncpy(pos, vol->prepath, pplen); |
3302 | pos += pplen; | 3316 | pos += pplen; |
3303 | } | 3317 | } |
@@ -3353,7 +3367,6 @@ expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses, | |||
3353 | mdata = NULL; | 3367 | mdata = NULL; |
3354 | } else { | 3368 | } else { |
3355 | cleanup_volume_info_contents(volume_info); | 3369 | cleanup_volume_info_contents(volume_info); |
3356 | memset(volume_info, '\0', sizeof(*volume_info)); | ||
3357 | rc = cifs_setup_volume_info(volume_info, mdata, | 3370 | rc = cifs_setup_volume_info(volume_info, mdata, |
3358 | fake_devname); | 3371 | fake_devname); |
3359 | } | 3372 | } |
@@ -3375,7 +3388,6 @@ cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, | |||
3375 | if (cifs_parse_mount_options(mount_data, devname, volume_info)) | 3388 | if (cifs_parse_mount_options(mount_data, devname, volume_info)) |
3376 | return -EINVAL; | 3389 | return -EINVAL; |
3377 | 3390 | ||
3378 | |||
3379 | if (volume_info->nullauth) { | 3391 | if (volume_info->nullauth) { |
3380 | cFYI(1, "Anonymous login"); | 3392 | cFYI(1, "Anonymous login"); |
3381 | kfree(volume_info->username); | 3393 | kfree(volume_info->username); |
@@ -3412,7 +3424,7 @@ cifs_get_volume_info(char *mount_data, const char *devname) | |||
3412 | int rc; | 3424 | int rc; |
3413 | struct smb_vol *volume_info; | 3425 | struct smb_vol *volume_info; |
3414 | 3426 | ||
3415 | volume_info = kzalloc(sizeof(struct smb_vol), GFP_KERNEL); | 3427 | volume_info = kmalloc(sizeof(struct smb_vol), GFP_KERNEL); |
3416 | if (!volume_info) | 3428 | if (!volume_info) |
3417 | return ERR_PTR(-ENOMEM); | 3429 | return ERR_PTR(-ENOMEM); |
3418 | 3430 | ||
@@ -3537,8 +3549,10 @@ remote_path_check: | |||
3537 | rc = -ENOSYS; | 3549 | rc = -ENOSYS; |
3538 | goto mount_fail_check; | 3550 | goto mount_fail_check; |
3539 | } | 3551 | } |
3540 | /* build_path_to_root works only when we have a valid tcon */ | 3552 | /* |
3541 | full_path = build_path_to_root(volume_info, cifs_sb, tcon); | 3553 | * cifs_build_path_to_root works only when we have a valid tcon |
3554 | */ | ||
3555 | full_path = cifs_build_path_to_root(volume_info, cifs_sb, tcon); | ||
3542 | if (full_path == NULL) { | 3556 | if (full_path == NULL) { |
3543 | rc = -ENOMEM; | 3557 | rc = -ENOMEM; |
3544 | goto mount_fail_check; | 3558 | goto mount_fail_check; |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index d3671f2acb29..8719bbe0dcc3 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -44,6 +44,38 @@ renew_parental_timestamps(struct dentry *direntry) | |||
44 | } while (!IS_ROOT(direntry)); | 44 | } while (!IS_ROOT(direntry)); |
45 | } | 45 | } |
46 | 46 | ||
47 | char * | ||
48 | cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, | ||
49 | struct cifs_tcon *tcon) | ||
50 | { | ||
51 | int pplen = vol->prepath ? strlen(vol->prepath) + 1 : 0; | ||
52 | int dfsplen; | ||
53 | char *full_path = NULL; | ||
54 | |||
55 | /* if no prefix path, simply set path to the root of share to "" */ | ||
56 | if (pplen == 0) { | ||
57 | full_path = kzalloc(1, GFP_KERNEL); | ||
58 | return full_path; | ||
59 | } | ||
60 | |||
61 | if (tcon->Flags & SMB_SHARE_IS_IN_DFS) | ||
62 | dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1); | ||
63 | else | ||
64 | dfsplen = 0; | ||
65 | |||
66 | full_path = kmalloc(dfsplen + pplen + 1, GFP_KERNEL); | ||
67 | if (full_path == NULL) | ||
68 | return full_path; | ||
69 | |||
70 | if (dfsplen) | ||
71 | strncpy(full_path, tcon->treeName, dfsplen); | ||
72 | full_path[dfsplen] = CIFS_DIR_SEP(cifs_sb); | ||
73 | strncpy(full_path + dfsplen + 1, vol->prepath, pplen); | ||
74 | convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); | ||
75 | full_path[dfsplen + pplen] = 0; /* add trailing null */ | ||
76 | return full_path; | ||
77 | } | ||
78 | |||
47 | /* Note: caller must free return buffer */ | 79 | /* Note: caller must free return buffer */ |
48 | char * | 80 | char * |
49 | build_path_from_dentry(struct dentry *direntry) | 81 | build_path_from_dentry(struct dentry *direntry) |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index edb25b4bbb95..0a6677ba212b 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -505,16 +505,36 @@ out: | |||
505 | return rc; | 505 | return rc; |
506 | } | 506 | } |
507 | 507 | ||
508 | static int cifs_push_posix_locks(struct cifsFileInfo *cfile); | ||
509 | |||
508 | /* | 510 | /* |
509 | * Try to reacquire byte range locks that were released when session | 511 | * Try to reacquire byte range locks that were released when session |
510 | * to server was lost | 512 | * to server was lost. |
511 | */ | 513 | */ |
512 | static int cifs_relock_file(struct cifsFileInfo *cifsFile) | 514 | static int |
515 | cifs_relock_file(struct cifsFileInfo *cfile) | ||
513 | { | 516 | { |
517 | struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); | ||
518 | struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); | ||
519 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | ||
514 | int rc = 0; | 520 | int rc = 0; |
515 | 521 | ||
516 | /* BB list all locks open on this file and relock */ | 522 | /* we are going to update can_cache_brlcks here - need a write access */ |
523 | down_write(&cinode->lock_sem); | ||
524 | if (cinode->can_cache_brlcks) { | ||
525 | /* can cache locks - no need to push them */ | ||
526 | up_write(&cinode->lock_sem); | ||
527 | return rc; | ||
528 | } | ||
517 | 529 | ||
530 | if (cap_unix(tcon->ses) && | ||
531 | (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && | ||
532 | ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) | ||
533 | rc = cifs_push_posix_locks(cfile); | ||
534 | else | ||
535 | rc = tcon->ses->server->ops->push_mand_locks(cfile); | ||
536 | |||
537 | up_write(&cinode->lock_sem); | ||
518 | return rc; | 538 | return rc; |
519 | } | 539 | } |
520 | 540 | ||
@@ -739,10 +759,15 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock) | |||
739 | } | 759 | } |
740 | } | 760 | } |
741 | 761 | ||
762 | #define CIFS_LOCK_OP 0 | ||
763 | #define CIFS_READ_OP 1 | ||
764 | #define CIFS_WRITE_OP 2 | ||
765 | |||
766 | /* @rw_check : 0 - no op, 1 - read, 2 - write */ | ||
742 | static bool | 767 | static bool |
743 | cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, | 768 | cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, |
744 | __u64 length, __u8 type, struct cifsFileInfo *cfile, | 769 | __u64 length, __u8 type, struct cifsFileInfo *cfile, |
745 | struct cifsLockInfo **conf_lock, bool rw_check) | 770 | struct cifsLockInfo **conf_lock, int rw_check) |
746 | { | 771 | { |
747 | struct cifsLockInfo *li; | 772 | struct cifsLockInfo *li; |
748 | struct cifsFileInfo *cur_cfile = fdlocks->cfile; | 773 | struct cifsFileInfo *cur_cfile = fdlocks->cfile; |
@@ -752,9 +777,13 @@ cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, | |||
752 | if (offset + length <= li->offset || | 777 | if (offset + length <= li->offset || |
753 | offset >= li->offset + li->length) | 778 | offset >= li->offset + li->length) |
754 | continue; | 779 | continue; |
755 | if (rw_check && server->ops->compare_fids(cfile, cur_cfile) && | 780 | if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid && |
756 | current->tgid == li->pid) | 781 | server->ops->compare_fids(cfile, cur_cfile)) { |
757 | continue; | 782 | /* shared lock prevents write op through the same fid */ |
783 | if (!(li->type & server->vals->shared_lock_type) || | ||
784 | rw_check != CIFS_WRITE_OP) | ||
785 | continue; | ||
786 | } | ||
758 | if ((type & server->vals->shared_lock_type) && | 787 | if ((type & server->vals->shared_lock_type) && |
759 | ((server->ops->compare_fids(cfile, cur_cfile) && | 788 | ((server->ops->compare_fids(cfile, cur_cfile) && |
760 | current->tgid == li->pid) || type == li->type)) | 789 | current->tgid == li->pid) || type == li->type)) |
@@ -769,7 +798,7 @@ cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, | |||
769 | bool | 798 | bool |
770 | cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, | 799 | cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, |
771 | __u8 type, struct cifsLockInfo **conf_lock, | 800 | __u8 type, struct cifsLockInfo **conf_lock, |
772 | bool rw_check) | 801 | int rw_check) |
773 | { | 802 | { |
774 | bool rc = false; | 803 | bool rc = false; |
775 | struct cifs_fid_locks *cur; | 804 | struct cifs_fid_locks *cur; |
@@ -805,7 +834,7 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, | |||
805 | down_read(&cinode->lock_sem); | 834 | down_read(&cinode->lock_sem); |
806 | 835 | ||
807 | exist = cifs_find_lock_conflict(cfile, offset, length, type, | 836 | exist = cifs_find_lock_conflict(cfile, offset, length, type, |
808 | &conf_lock, false); | 837 | &conf_lock, CIFS_LOCK_OP); |
809 | if (exist) { | 838 | if (exist) { |
810 | flock->fl_start = conf_lock->offset; | 839 | flock->fl_start = conf_lock->offset; |
811 | flock->fl_end = conf_lock->offset + conf_lock->length - 1; | 840 | flock->fl_end = conf_lock->offset + conf_lock->length - 1; |
@@ -852,7 +881,7 @@ try_again: | |||
852 | down_write(&cinode->lock_sem); | 881 | down_write(&cinode->lock_sem); |
853 | 882 | ||
854 | exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, | 883 | exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, |
855 | lock->type, &conf_lock, false); | 884 | lock->type, &conf_lock, CIFS_LOCK_OP); |
856 | if (!exist && cinode->can_cache_brlcks) { | 885 | if (!exist && cinode->can_cache_brlcks) { |
857 | list_add_tail(&lock->llist, &cfile->llist->locks); | 886 | list_add_tail(&lock->llist, &cfile->llist->locks); |
858 | up_write(&cinode->lock_sem); | 887 | up_write(&cinode->lock_sem); |
@@ -948,7 +977,6 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) | |||
948 | int rc = 0, stored_rc; | 977 | int rc = 0, stored_rc; |
949 | struct cifsLockInfo *li, *tmp; | 978 | struct cifsLockInfo *li, *tmp; |
950 | struct cifs_tcon *tcon; | 979 | struct cifs_tcon *tcon; |
951 | struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); | ||
952 | unsigned int num, max_num, max_buf; | 980 | unsigned int num, max_num, max_buf; |
953 | LOCKING_ANDX_RANGE *buf, *cur; | 981 | LOCKING_ANDX_RANGE *buf, *cur; |
954 | int types[] = {LOCKING_ANDX_LARGE_FILES, | 982 | int types[] = {LOCKING_ANDX_LARGE_FILES, |
@@ -958,21 +986,12 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) | |||
958 | xid = get_xid(); | 986 | xid = get_xid(); |
959 | tcon = tlink_tcon(cfile->tlink); | 987 | tcon = tlink_tcon(cfile->tlink); |
960 | 988 | ||
961 | /* we are going to update can_cache_brlcks here - need a write access */ | ||
962 | down_write(&cinode->lock_sem); | ||
963 | if (!cinode->can_cache_brlcks) { | ||
964 | up_write(&cinode->lock_sem); | ||
965 | free_xid(xid); | ||
966 | return rc; | ||
967 | } | ||
968 | |||
969 | /* | 989 | /* |
970 | * Accessing maxBuf is racy with cifs_reconnect - need to store value | 990 | * Accessing maxBuf is racy with cifs_reconnect - need to store value |
971 | * and check it for zero before using. | 991 | * and check it for zero before using. |
972 | */ | 992 | */ |
973 | max_buf = tcon->ses->server->maxBuf; | 993 | max_buf = tcon->ses->server->maxBuf; |
974 | if (!max_buf) { | 994 | if (!max_buf) { |
975 | up_write(&cinode->lock_sem); | ||
976 | free_xid(xid); | 995 | free_xid(xid); |
977 | return -EINVAL; | 996 | return -EINVAL; |
978 | } | 997 | } |
@@ -981,7 +1000,6 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) | |||
981 | sizeof(LOCKING_ANDX_RANGE); | 1000 | sizeof(LOCKING_ANDX_RANGE); |
982 | buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); | 1001 | buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); |
983 | if (!buf) { | 1002 | if (!buf) { |
984 | up_write(&cinode->lock_sem); | ||
985 | free_xid(xid); | 1003 | free_xid(xid); |
986 | return -ENOMEM; | 1004 | return -ENOMEM; |
987 | } | 1005 | } |
@@ -1018,9 +1036,6 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) | |||
1018 | } | 1036 | } |
1019 | } | 1037 | } |
1020 | 1038 | ||
1021 | cinode->can_cache_brlcks = false; | ||
1022 | up_write(&cinode->lock_sem); | ||
1023 | |||
1024 | kfree(buf); | 1039 | kfree(buf); |
1025 | free_xid(xid); | 1040 | free_xid(xid); |
1026 | return rc; | 1041 | return rc; |
@@ -1043,7 +1058,6 @@ struct lock_to_push { | |||
1043 | static int | 1058 | static int |
1044 | cifs_push_posix_locks(struct cifsFileInfo *cfile) | 1059 | cifs_push_posix_locks(struct cifsFileInfo *cfile) |
1045 | { | 1060 | { |
1046 | struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); | ||
1047 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | 1061 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); |
1048 | struct file_lock *flock, **before; | 1062 | struct file_lock *flock, **before; |
1049 | unsigned int count = 0, i = 0; | 1063 | unsigned int count = 0, i = 0; |
@@ -1054,14 +1068,6 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
1054 | 1068 | ||
1055 | xid = get_xid(); | 1069 | xid = get_xid(); |
1056 | 1070 | ||
1057 | /* we are going to update can_cache_brlcks here - need a write access */ | ||
1058 | down_write(&cinode->lock_sem); | ||
1059 | if (!cinode->can_cache_brlcks) { | ||
1060 | up_write(&cinode->lock_sem); | ||
1061 | free_xid(xid); | ||
1062 | return rc; | ||
1063 | } | ||
1064 | |||
1065 | lock_flocks(); | 1071 | lock_flocks(); |
1066 | cifs_for_each_lock(cfile->dentry->d_inode, before) { | 1072 | cifs_for_each_lock(cfile->dentry->d_inode, before) { |
1067 | if ((*before)->fl_flags & FL_POSIX) | 1073 | if ((*before)->fl_flags & FL_POSIX) |
@@ -1127,9 +1133,6 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) | |||
1127 | } | 1133 | } |
1128 | 1134 | ||
1129 | out: | 1135 | out: |
1130 | cinode->can_cache_brlcks = false; | ||
1131 | up_write(&cinode->lock_sem); | ||
1132 | |||
1133 | free_xid(xid); | 1136 | free_xid(xid); |
1134 | return rc; | 1137 | return rc; |
1135 | err_out: | 1138 | err_out: |
@@ -1144,14 +1147,27 @@ static int | |||
1144 | cifs_push_locks(struct cifsFileInfo *cfile) | 1147 | cifs_push_locks(struct cifsFileInfo *cfile) |
1145 | { | 1148 | { |
1146 | struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); | 1149 | struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); |
1150 | struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); | ||
1147 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | 1151 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); |
1152 | int rc = 0; | ||
1153 | |||
1154 | /* we are going to update can_cache_brlcks here - need a write access */ | ||
1155 | down_write(&cinode->lock_sem); | ||
1156 | if (!cinode->can_cache_brlcks) { | ||
1157 | up_write(&cinode->lock_sem); | ||
1158 | return rc; | ||
1159 | } | ||
1148 | 1160 | ||
1149 | if (cap_unix(tcon->ses) && | 1161 | if (cap_unix(tcon->ses) && |
1150 | (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && | 1162 | (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && |
1151 | ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) | 1163 | ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) |
1152 | return cifs_push_posix_locks(cfile); | 1164 | rc = cifs_push_posix_locks(cfile); |
1165 | else | ||
1166 | rc = tcon->ses->server->ops->push_mand_locks(cfile); | ||
1153 | 1167 | ||
1154 | return tcon->ses->server->ops->push_mand_locks(cfile); | 1168 | cinode->can_cache_brlcks = false; |
1169 | up_write(&cinode->lock_sem); | ||
1170 | return rc; | ||
1155 | } | 1171 | } |
1156 | 1172 | ||
1157 | static void | 1173 | static void |
@@ -1436,16 +1452,18 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, | |||
1436 | return -ENOMEM; | 1452 | return -ENOMEM; |
1437 | 1453 | ||
1438 | rc = cifs_lock_add_if(cfile, lock, wait_flag); | 1454 | rc = cifs_lock_add_if(cfile, lock, wait_flag); |
1439 | if (rc < 0) | 1455 | if (rc < 0) { |
1440 | kfree(lock); | 1456 | kfree(lock); |
1441 | if (rc <= 0) | 1457 | return rc; |
1458 | } | ||
1459 | if (!rc) | ||
1442 | goto out; | 1460 | goto out; |
1443 | 1461 | ||
1444 | rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, | 1462 | rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, |
1445 | type, 1, 0, wait_flag); | 1463 | type, 1, 0, wait_flag); |
1446 | if (rc) { | 1464 | if (rc) { |
1447 | kfree(lock); | 1465 | kfree(lock); |
1448 | goto out; | 1466 | return rc; |
1449 | } | 1467 | } |
1450 | 1468 | ||
1451 | cifs_lock_add(cfile, lock); | 1469 | cifs_lock_add(cfile, lock); |
@@ -1794,7 +1812,6 @@ static int cifs_writepages(struct address_space *mapping, | |||
1794 | struct TCP_Server_Info *server; | 1812 | struct TCP_Server_Info *server; |
1795 | struct page *page; | 1813 | struct page *page; |
1796 | int rc = 0; | 1814 | int rc = 0; |
1797 | loff_t isize = i_size_read(mapping->host); | ||
1798 | 1815 | ||
1799 | /* | 1816 | /* |
1800 | * If wsize is smaller than the page cache size, default to writing | 1817 | * If wsize is smaller than the page cache size, default to writing |
@@ -1899,7 +1916,7 @@ retry: | |||
1899 | */ | 1916 | */ |
1900 | set_page_writeback(page); | 1917 | set_page_writeback(page); |
1901 | 1918 | ||
1902 | if (page_offset(page) >= isize) { | 1919 | if (page_offset(page) >= i_size_read(mapping->host)) { |
1903 | done = true; | 1920 | done = true; |
1904 | unlock_page(page); | 1921 | unlock_page(page); |
1905 | end_page_writeback(page); | 1922 | end_page_writeback(page); |
@@ -1932,7 +1949,8 @@ retry: | |||
1932 | wdata->offset = page_offset(wdata->pages[0]); | 1949 | wdata->offset = page_offset(wdata->pages[0]); |
1933 | wdata->pagesz = PAGE_CACHE_SIZE; | 1950 | wdata->pagesz = PAGE_CACHE_SIZE; |
1934 | wdata->tailsz = | 1951 | wdata->tailsz = |
1935 | min(isize - page_offset(wdata->pages[nr_pages - 1]), | 1952 | min(i_size_read(mapping->host) - |
1953 | page_offset(wdata->pages[nr_pages - 1]), | ||
1936 | (loff_t)PAGE_CACHE_SIZE); | 1954 | (loff_t)PAGE_CACHE_SIZE); |
1937 | wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + | 1955 | wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + |
1938 | wdata->tailsz; | 1956 | wdata->tailsz; |
@@ -2085,7 +2103,15 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, | |||
2085 | } else { | 2103 | } else { |
2086 | rc = copied; | 2104 | rc = copied; |
2087 | pos += copied; | 2105 | pos += copied; |
2088 | set_page_dirty(page); | 2106 | /* |
2107 | * When we use strict cache mode and cifs_strict_writev was run | ||
2108 | * with level II oplock (indicated by leave_pages_clean field of | ||
2109 | * CIFS_I(inode)), we can leave pages clean - cifs_strict_writev | ||
2110 | * sent the data to the server itself. | ||
2111 | */ | ||
2112 | if (!CIFS_I(inode)->leave_pages_clean || | ||
2113 | !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)) | ||
2114 | set_page_dirty(page); | ||
2089 | } | 2115 | } |
2090 | 2116 | ||
2091 | if (rc > 0) { | 2117 | if (rc > 0) { |
@@ -2436,8 +2462,8 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, | |||
2436 | } | 2462 | } |
2437 | 2463 | ||
2438 | static ssize_t | 2464 | static ssize_t |
2439 | cifs_writev(struct kiocb *iocb, const struct iovec *iov, | 2465 | cifs_pagecache_writev(struct kiocb *iocb, const struct iovec *iov, |
2440 | unsigned long nr_segs, loff_t pos) | 2466 | unsigned long nr_segs, loff_t pos, bool cache_ex) |
2441 | { | 2467 | { |
2442 | struct file *file = iocb->ki_filp; | 2468 | struct file *file = iocb->ki_filp; |
2443 | struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; | 2469 | struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; |
@@ -2457,10 +2483,14 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, | |||
2457 | down_read(&cinode->lock_sem); | 2483 | down_read(&cinode->lock_sem); |
2458 | if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), | 2484 | if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), |
2459 | server->vals->exclusive_lock_type, NULL, | 2485 | server->vals->exclusive_lock_type, NULL, |
2460 | true)) { | 2486 | CIFS_WRITE_OP)) { |
2461 | mutex_lock(&inode->i_mutex); | 2487 | mutex_lock(&inode->i_mutex); |
2488 | if (!cache_ex) | ||
2489 | cinode->leave_pages_clean = true; | ||
2462 | rc = __generic_file_aio_write(iocb, iov, nr_segs, | 2490 | rc = __generic_file_aio_write(iocb, iov, nr_segs, |
2463 | &iocb->ki_pos); | 2491 | &iocb->ki_pos); |
2492 | if (!cache_ex) | ||
2493 | cinode->leave_pages_clean = false; | ||
2464 | mutex_unlock(&inode->i_mutex); | 2494 | mutex_unlock(&inode->i_mutex); |
2465 | } | 2495 | } |
2466 | 2496 | ||
@@ -2487,42 +2517,62 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, | |||
2487 | struct cifsFileInfo *cfile = (struct cifsFileInfo *) | 2517 | struct cifsFileInfo *cfile = (struct cifsFileInfo *) |
2488 | iocb->ki_filp->private_data; | 2518 | iocb->ki_filp->private_data; |
2489 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); | 2519 | struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); |
2490 | 2520 | ssize_t written, written2; | |
2491 | #ifdef CONFIG_CIFS_SMB2 | ||
2492 | /* | 2521 | /* |
2493 | * If we have an oplock for read and want to write a data to the file | 2522 | * We need to store clientCanCacheAll here to prevent race |
2494 | * we need to store it in the page cache and then push it to the server | 2523 | * conditions - this value can be changed during an execution |
2495 | * to be sure the next read will get a valid data. | 2524 | * of generic_file_aio_write. For CIFS it can be changed from |
2525 | * true to false only, but for SMB2 it can be changed both from | ||
2526 | * true to false and vice versa. So, we can end up with a data | ||
2527 | * stored in the cache, not marked dirty and not sent to the | ||
2528 | * server if this value changes its state from false to true | ||
2529 | * after cifs_write_end. | ||
2496 | */ | 2530 | */ |
2497 | if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead) { | 2531 | bool cache_ex = cinode->clientCanCacheAll; |
2498 | ssize_t written; | 2532 | bool cache_read = cinode->clientCanCacheRead; |
2499 | int rc; | 2533 | int rc; |
2500 | 2534 | loff_t saved_pos; | |
2501 | written = generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
2502 | rc = filemap_fdatawrite(inode->i_mapping); | ||
2503 | if (rc) | ||
2504 | return (ssize_t)rc; | ||
2505 | 2535 | ||
2506 | return written; | 2536 | if (cache_ex) { |
2537 | if (cap_unix(tcon->ses) && | ||
2538 | ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) && | ||
2539 | (CIFS_UNIX_FCNTL_CAP & le64_to_cpu( | ||
2540 | tcon->fsUnixInfo.Capability))) | ||
2541 | return generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
2542 | return cifs_pagecache_writev(iocb, iov, nr_segs, pos, cache_ex); | ||
2507 | } | 2543 | } |
2508 | #endif | ||
2509 | 2544 | ||
2510 | /* | 2545 | /* |
2511 | * For non-oplocked files in strict cache mode we need to write the data | 2546 | * For files without exclusive oplock in strict cache mode we need to |
2512 | * to the server exactly from the pos to pos+len-1 rather than flush all | 2547 | * write the data to the server exactly from the pos to pos+len-1 rather |
2513 | * affected pages because it may cause a error with mandatory locks on | 2548 | * than flush all affected pages because it may cause a error with |
2514 | * these pages but not on the region from pos to ppos+len-1. | 2549 | * mandatory locks on these pages but not on the region from pos to |
2550 | * ppos+len-1. | ||
2515 | */ | 2551 | */ |
2552 | written = cifs_user_writev(iocb, iov, nr_segs, pos); | ||
2553 | if (!cache_read || written <= 0) | ||
2554 | return written; | ||
2516 | 2555 | ||
2517 | if (!cinode->clientCanCacheAll) | 2556 | saved_pos = iocb->ki_pos; |
2518 | return cifs_user_writev(iocb, iov, nr_segs, pos); | 2557 | iocb->ki_pos = pos; |
2519 | 2558 | /* we have a read oplock - need to store a data in the page cache */ | |
2520 | if (cap_unix(tcon->ses) && | 2559 | if (cap_unix(tcon->ses) && |
2521 | (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && | 2560 | ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) && |
2522 | ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) | 2561 | (CIFS_UNIX_FCNTL_CAP & le64_to_cpu( |
2523 | return generic_file_aio_write(iocb, iov, nr_segs, pos); | 2562 | tcon->fsUnixInfo.Capability))) |
2524 | 2563 | written2 = generic_file_aio_write(iocb, iov, nr_segs, pos); | |
2525 | return cifs_writev(iocb, iov, nr_segs, pos); | 2564 | else |
2565 | written2 = cifs_pagecache_writev(iocb, iov, nr_segs, pos, | ||
2566 | cache_ex); | ||
2567 | /* errors occured during writing - invalidate the page cache */ | ||
2568 | if (written2 < 0) { | ||
2569 | rc = cifs_invalidate_mapping(inode); | ||
2570 | if (rc) | ||
2571 | written = (ssize_t)rc; | ||
2572 | else | ||
2573 | iocb->ki_pos = saved_pos; | ||
2574 | } | ||
2575 | return written; | ||
2526 | } | 2576 | } |
2527 | 2577 | ||
2528 | static struct cifs_readdata * | 2578 | static struct cifs_readdata * |
@@ -2892,7 +2942,7 @@ cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, | |||
2892 | down_read(&cinode->lock_sem); | 2942 | down_read(&cinode->lock_sem); |
2893 | if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), | 2943 | if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), |
2894 | tcon->ses->server->vals->shared_lock_type, | 2944 | tcon->ses->server->vals->shared_lock_type, |
2895 | NULL, true)) | 2945 | NULL, CIFS_READ_OP)) |
2896 | rc = generic_file_aio_read(iocb, iov, nr_segs, pos); | 2946 | rc = generic_file_aio_read(iocb, iov, nr_segs, pos); |
2897 | up_read(&cinode->lock_sem); | 2947 | up_read(&cinode->lock_sem); |
2898 | return rc; | 2948 | return rc; |
@@ -3536,7 +3586,7 @@ void cifs_oplock_break(struct work_struct *work) | |||
3536 | if (cinode->clientCanCacheRead == 0) { | 3586 | if (cinode->clientCanCacheRead == 0) { |
3537 | rc = filemap_fdatawait(inode->i_mapping); | 3587 | rc = filemap_fdatawait(inode->i_mapping); |
3538 | mapping_set_error(inode->i_mapping, rc); | 3588 | mapping_set_error(inode->i_mapping, rc); |
3539 | invalidate_remote_inode(inode); | 3589 | cifs_invalidate_mapping(inode); |
3540 | } | 3590 | } |
3541 | cFYI(1, "Oplock flush inode %p rc %d", inode, rc); | 3591 | cFYI(1, "Oplock flush inode %p rc %d", inode, rc); |
3542 | } | 3592 | } |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index afdff79651f1..ed6208ff85a7 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -1791,11 +1791,12 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
1791 | stat->ino = CIFS_I(inode)->uniqueid; | 1791 | stat->ino = CIFS_I(inode)->uniqueid; |
1792 | 1792 | ||
1793 | /* | 1793 | /* |
1794 | * If on a multiuser mount without unix extensions, and the admin hasn't | 1794 | * If on a multiuser mount without unix extensions or cifsacl being |
1795 | * overridden them, set the ownership to the fsuid/fsgid of the current | 1795 | * enabled, and the admin hasn't overridden them, set the ownership |
1796 | * process. | 1796 | * to the fsuid/fsgid of the current process. |
1797 | */ | 1797 | */ |
1798 | if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) && | 1798 | if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) && |
1799 | !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && | ||
1799 | !tcon->unix_ext) { | 1800 | !tcon->unix_ext) { |
1800 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) | 1801 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) |
1801 | stat->uid = current_fsuid(); | 1802 | stat->uid = current_fsuid(); |
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index d5ce9e26696c..a82bc51fdc82 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c | |||
@@ -204,7 +204,7 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len) | |||
204 | return rc; | 204 | return rc; |
205 | } | 205 | } |
206 | 206 | ||
207 | int | 207 | void |
208 | cifs_set_port(struct sockaddr *addr, const unsigned short int port) | 208 | cifs_set_port(struct sockaddr *addr, const unsigned short int port) |
209 | { | 209 | { |
210 | switch (addr->sa_family) { | 210 | switch (addr->sa_family) { |
@@ -214,19 +214,7 @@ cifs_set_port(struct sockaddr *addr, const unsigned short int port) | |||
214 | case AF_INET6: | 214 | case AF_INET6: |
215 | ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); | 215 | ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); |
216 | break; | 216 | break; |
217 | default: | ||
218 | return 0; | ||
219 | } | 217 | } |
220 | return 1; | ||
221 | } | ||
222 | |||
223 | int | ||
224 | cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, | ||
225 | const unsigned short int port) | ||
226 | { | ||
227 | if (!cifs_convert_address(dst, src, len)) | ||
228 | return 0; | ||
229 | return cifs_set_port(dst, port); | ||
230 | } | 218 | } |
231 | 219 | ||
232 | /***************************************************************************** | 220 | /***************************************************************************** |
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index f9b5d3d6cf33..6002fdc920ae 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -66,18 +66,20 @@ static inline void dump_cifs_file_struct(struct file *file, char *label) | |||
66 | #endif /* DEBUG2 */ | 66 | #endif /* DEBUG2 */ |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * Attempt to preload the dcache with the results from the FIND_FIRST/NEXT | ||
70 | * | ||
69 | * Find the dentry that matches "name". If there isn't one, create one. If it's | 71 | * Find the dentry that matches "name". If there isn't one, create one. If it's |
70 | * a negative dentry or the uniqueid changed, then drop it and recreate it. | 72 | * a negative dentry or the uniqueid changed, then drop it and recreate it. |
71 | */ | 73 | */ |
72 | static struct dentry * | 74 | static void |
73 | cifs_readdir_lookup(struct dentry *parent, struct qstr *name, | 75 | cifs_prime_dcache(struct dentry *parent, struct qstr *name, |
74 | struct cifs_fattr *fattr) | 76 | struct cifs_fattr *fattr) |
75 | { | 77 | { |
76 | struct dentry *dentry, *alias; | 78 | struct dentry *dentry, *alias; |
77 | struct inode *inode; | 79 | struct inode *inode; |
78 | struct super_block *sb = parent->d_inode->i_sb; | 80 | struct super_block *sb = parent->d_inode->i_sb; |
79 | 81 | ||
80 | cFYI(1, "For %s", name->name); | 82 | cFYI(1, "%s: for %s", __func__, name->name); |
81 | 83 | ||
82 | if (parent->d_op && parent->d_op->d_hash) | 84 | if (parent->d_op && parent->d_op->d_hash) |
83 | parent->d_op->d_hash(parent, parent->d_inode, name); | 85 | parent->d_op->d_hash(parent, parent->d_inode, name); |
@@ -86,35 +88,33 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, | |||
86 | 88 | ||
87 | dentry = d_lookup(parent, name); | 89 | dentry = d_lookup(parent, name); |
88 | if (dentry) { | 90 | if (dentry) { |
91 | int err; | ||
92 | |||
89 | inode = dentry->d_inode; | 93 | inode = dentry->d_inode; |
90 | /* update inode in place if i_ino didn't change */ | 94 | /* update inode in place if i_ino didn't change */ |
91 | if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { | 95 | if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { |
92 | cifs_fattr_to_inode(inode, fattr); | 96 | cifs_fattr_to_inode(inode, fattr); |
93 | return dentry; | 97 | goto out; |
94 | } | 98 | } |
95 | d_drop(dentry); | 99 | err = d_invalidate(dentry); |
96 | dput(dentry); | 100 | dput(dentry); |
101 | if (err) | ||
102 | return; | ||
97 | } | 103 | } |
98 | 104 | ||
99 | dentry = d_alloc(parent, name); | 105 | dentry = d_alloc(parent, name); |
100 | if (dentry == NULL) | 106 | if (!dentry) |
101 | return NULL; | 107 | return; |
102 | 108 | ||
103 | inode = cifs_iget(sb, fattr); | 109 | inode = cifs_iget(sb, fattr); |
104 | if (!inode) { | 110 | if (!inode) |
105 | dput(dentry); | 111 | goto out; |
106 | return NULL; | ||
107 | } | ||
108 | 112 | ||
109 | alias = d_materialise_unique(dentry, inode); | 113 | alias = d_materialise_unique(dentry, inode); |
110 | if (alias != NULL) { | 114 | if (alias && !IS_ERR(alias)) |
111 | dput(dentry); | 115 | dput(alias); |
112 | if (IS_ERR(alias)) | 116 | out: |
113 | return NULL; | 117 | dput(dentry); |
114 | dentry = alias; | ||
115 | } | ||
116 | |||
117 | return dentry; | ||
118 | } | 118 | } |
119 | 119 | ||
120 | static void | 120 | static void |
@@ -134,6 +134,16 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) | |||
134 | if (fattr->cf_cifsattrs & ATTR_READONLY) | 134 | if (fattr->cf_cifsattrs & ATTR_READONLY) |
135 | fattr->cf_mode &= ~S_IWUGO; | 135 | fattr->cf_mode &= ~S_IWUGO; |
136 | 136 | ||
137 | /* | ||
138 | * We of course don't get ACL info in FIND_FIRST/NEXT results, so | ||
139 | * mark it for revalidation so that "ls -l" will look right. It might | ||
140 | * be super-slow, but if we don't do this then the ownership of files | ||
141 | * may look wrong since the inodes may not have timed out by the time | ||
142 | * "ls" does a stat() call on them. | ||
143 | */ | ||
144 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) | ||
145 | fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; | ||
146 | |||
137 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL && | 147 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL && |
138 | fattr->cf_cifsattrs & ATTR_SYSTEM) { | 148 | fattr->cf_cifsattrs & ATTR_SYSTEM) { |
139 | if (fattr->cf_eof == 0) { | 149 | if (fattr->cf_eof == 0) { |
@@ -649,7 +659,6 @@ static int cifs_filldir(char *find_entry, struct file *file, filldir_t filldir, | |||
649 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | 659 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); |
650 | struct cifs_dirent de = { NULL, }; | 660 | struct cifs_dirent de = { NULL, }; |
651 | struct cifs_fattr fattr; | 661 | struct cifs_fattr fattr; |
652 | struct dentry *dentry; | ||
653 | struct qstr name; | 662 | struct qstr name; |
654 | int rc = 0; | 663 | int rc = 0; |
655 | ino_t ino; | 664 | ino_t ino; |
@@ -720,13 +729,11 @@ static int cifs_filldir(char *find_entry, struct file *file, filldir_t filldir, | |||
720 | */ | 729 | */ |
721 | fattr.cf_flags |= CIFS_FATTR_NEED_REVAL; | 730 | fattr.cf_flags |= CIFS_FATTR_NEED_REVAL; |
722 | 731 | ||
723 | ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid); | 732 | cifs_prime_dcache(file->f_dentry, &name, &fattr); |
724 | dentry = cifs_readdir_lookup(file->f_dentry, &name, &fattr); | ||
725 | 733 | ||
734 | ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid); | ||
726 | rc = filldir(dirent, name.name, name.len, file->f_pos, ino, | 735 | rc = filldir(dirent, name.name, name.len, file->f_pos, ino, |
727 | fattr.cf_dtype); | 736 | fattr.cf_dtype); |
728 | |||
729 | dput(dentry); | ||
730 | return rc; | 737 | return rc; |
731 | } | 738 | } |
732 | 739 | ||
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 56cc4be87807..a5d234c8d5d9 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c | |||
@@ -575,37 +575,6 @@ cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, | |||
575 | return CIFSSMBQFileInfo(xid, tcon, fid->netfid, data); | 575 | return CIFSSMBQFileInfo(xid, tcon, fid->netfid, data); |
576 | } | 576 | } |
577 | 577 | ||
578 | static char * | ||
579 | cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, | ||
580 | struct cifs_tcon *tcon) | ||
581 | { | ||
582 | int pplen = vol->prepath ? strlen(vol->prepath) : 0; | ||
583 | int dfsplen; | ||
584 | char *full_path = NULL; | ||
585 | |||
586 | /* if no prefix path, simply set path to the root of share to "" */ | ||
587 | if (pplen == 0) { | ||
588 | full_path = kzalloc(1, GFP_KERNEL); | ||
589 | return full_path; | ||
590 | } | ||
591 | |||
592 | if (tcon->Flags & SMB_SHARE_IS_IN_DFS) | ||
593 | dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1); | ||
594 | else | ||
595 | dfsplen = 0; | ||
596 | |||
597 | full_path = kmalloc(dfsplen + pplen + 1, GFP_KERNEL); | ||
598 | if (full_path == NULL) | ||
599 | return full_path; | ||
600 | |||
601 | if (dfsplen) | ||
602 | strncpy(full_path, tcon->treeName, dfsplen); | ||
603 | strncpy(full_path + dfsplen, vol->prepath, pplen); | ||
604 | convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); | ||
605 | full_path[dfsplen + pplen] = 0; /* add trailing null */ | ||
606 | return full_path; | ||
607 | } | ||
608 | |||
609 | static void | 578 | static void |
610 | cifs_clear_stats(struct cifs_tcon *tcon) | 579 | cifs_clear_stats(struct cifs_tcon *tcon) |
611 | { | 580 | { |
@@ -766,7 +735,6 @@ smb_set_file_info(struct inode *inode, const char *full_path, | |||
766 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 735 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
767 | struct tcon_link *tlink = NULL; | 736 | struct tcon_link *tlink = NULL; |
768 | struct cifs_tcon *tcon; | 737 | struct cifs_tcon *tcon; |
769 | FILE_BASIC_INFO info_buf; | ||
770 | 738 | ||
771 | /* if the file is already open for write, just use that fileid */ | 739 | /* if the file is already open for write, just use that fileid */ |
772 | open_file = find_writable_file(cinode, true); | 740 | open_file = find_writable_file(cinode, true); |
@@ -817,7 +785,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, | |||
817 | netpid = current->tgid; | 785 | netpid = current->tgid; |
818 | 786 | ||
819 | set_via_filehandle: | 787 | set_via_filehandle: |
820 | rc = CIFSSMBSetFileInfo(xid, tcon, &info_buf, netfid, netpid); | 788 | rc = CIFSSMBSetFileInfo(xid, tcon, buf, netfid, netpid); |
821 | if (!rc) | 789 | if (!rc) |
822 | cinode->cifsAttrs = le32_to_cpu(buf->Attributes); | 790 | cinode->cifsAttrs = le32_to_cpu(buf->Attributes); |
823 | 791 | ||
@@ -944,7 +912,6 @@ struct smb_version_operations smb1_operations = { | |||
944 | .set_path_size = CIFSSMBSetEOF, | 912 | .set_path_size = CIFSSMBSetEOF, |
945 | .set_file_size = CIFSSMBSetFileSize, | 913 | .set_file_size = CIFSSMBSetFileSize, |
946 | .set_file_info = smb_set_file_info, | 914 | .set_file_info = smb_set_file_info, |
947 | .build_path_to_root = cifs_build_path_to_root, | ||
948 | .echo = CIFSSMBEcho, | 915 | .echo = CIFSSMBEcho, |
949 | .mkdir = CIFSSMBMkDir, | 916 | .mkdir = CIFSSMBMkDir, |
950 | .mkdir_setinfo = cifs_mkdir_setinfo, | 917 | .mkdir_setinfo = cifs_mkdir_setinfo, |
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index a93eec30a50d..71e6aed4b382 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c | |||
@@ -260,13 +260,6 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile) | |||
260 | struct cifs_fid_locks *fdlocks; | 260 | struct cifs_fid_locks *fdlocks; |
261 | 261 | ||
262 | xid = get_xid(); | 262 | xid = get_xid(); |
263 | /* we are going to update can_cache_brlcks here - need a write access */ | ||
264 | down_write(&cinode->lock_sem); | ||
265 | if (!cinode->can_cache_brlcks) { | ||
266 | up_write(&cinode->lock_sem); | ||
267 | free_xid(xid); | ||
268 | return rc; | ||
269 | } | ||
270 | 263 | ||
271 | /* | 264 | /* |
272 | * Accessing maxBuf is racy with cifs_reconnect - need to store value | 265 | * Accessing maxBuf is racy with cifs_reconnect - need to store value |
@@ -274,7 +267,6 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile) | |||
274 | */ | 267 | */ |
275 | max_buf = tlink_tcon(cfile->tlink)->ses->server->maxBuf; | 268 | max_buf = tlink_tcon(cfile->tlink)->ses->server->maxBuf; |
276 | if (!max_buf) { | 269 | if (!max_buf) { |
277 | up_write(&cinode->lock_sem); | ||
278 | free_xid(xid); | 270 | free_xid(xid); |
279 | return -EINVAL; | 271 | return -EINVAL; |
280 | } | 272 | } |
@@ -282,7 +274,6 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile) | |||
282 | max_num = max_buf / sizeof(struct smb2_lock_element); | 274 | max_num = max_buf / sizeof(struct smb2_lock_element); |
283 | buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL); | 275 | buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL); |
284 | if (!buf) { | 276 | if (!buf) { |
285 | up_write(&cinode->lock_sem); | ||
286 | free_xid(xid); | 277 | free_xid(xid); |
287 | return -ENOMEM; | 278 | return -ENOMEM; |
288 | } | 279 | } |
@@ -293,10 +284,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile) | |||
293 | rc = stored_rc; | 284 | rc = stored_rc; |
294 | } | 285 | } |
295 | 286 | ||
296 | cinode->can_cache_brlcks = false; | ||
297 | kfree(buf); | 287 | kfree(buf); |
298 | |||
299 | up_write(&cinode->lock_sem); | ||
300 | free_xid(xid); | 288 | free_xid(xid); |
301 | return rc; | 289 | return rc; |
302 | } | 290 | } |
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 4d9dbe0b7385..d79de7bc4435 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c | |||
@@ -262,23 +262,6 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, | |||
262 | return rc; | 262 | return rc; |
263 | } | 263 | } |
264 | 264 | ||
265 | static char * | ||
266 | smb2_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, | ||
267 | struct cifs_tcon *tcon) | ||
268 | { | ||
269 | int pplen = vol->prepath ? strlen(vol->prepath) : 0; | ||
270 | char *full_path = NULL; | ||
271 | |||
272 | /* if no prefix path, simply set path to the root of share to "" */ | ||
273 | if (pplen == 0) { | ||
274 | full_path = kzalloc(2, GFP_KERNEL); | ||
275 | return full_path; | ||
276 | } | ||
277 | |||
278 | cERROR(1, "prefixpath is not supported for SMB2 now"); | ||
279 | return NULL; | ||
280 | } | ||
281 | |||
282 | static bool | 265 | static bool |
283 | smb2_can_echo(struct TCP_Server_Info *server) | 266 | smb2_can_echo(struct TCP_Server_Info *server) |
284 | { | 267 | { |
@@ -613,7 +596,6 @@ struct smb_version_operations smb21_operations = { | |||
613 | .set_path_size = smb2_set_path_size, | 596 | .set_path_size = smb2_set_path_size, |
614 | .set_file_size = smb2_set_file_size, | 597 | .set_file_size = smb2_set_file_size, |
615 | .set_file_info = smb2_set_file_info, | 598 | .set_file_info = smb2_set_file_info, |
616 | .build_path_to_root = smb2_build_path_to_root, | ||
617 | .mkdir = smb2_mkdir, | 599 | .mkdir = smb2_mkdir, |
618 | .mkdir_setinfo = smb2_mkdir_setinfo, | 600 | .mkdir_setinfo = smb2_mkdir_setinfo, |
619 | .rmdir = smb2_rmdir, | 601 | .rmdir = smb2_rmdir, |
@@ -641,6 +623,91 @@ struct smb_version_operations smb21_operations = { | |||
641 | .get_lease_key = smb2_get_lease_key, | 623 | .get_lease_key = smb2_get_lease_key, |
642 | .set_lease_key = smb2_set_lease_key, | 624 | .set_lease_key = smb2_set_lease_key, |
643 | .new_lease_key = smb2_new_lease_key, | 625 | .new_lease_key = smb2_new_lease_key, |
626 | .calc_signature = smb2_calc_signature, | ||
627 | }; | ||
628 | |||
629 | |||
630 | struct smb_version_operations smb30_operations = { | ||
631 | .compare_fids = smb2_compare_fids, | ||
632 | .setup_request = smb2_setup_request, | ||
633 | .setup_async_request = smb2_setup_async_request, | ||
634 | .check_receive = smb2_check_receive, | ||
635 | .add_credits = smb2_add_credits, | ||
636 | .set_credits = smb2_set_credits, | ||
637 | .get_credits_field = smb2_get_credits_field, | ||
638 | .get_credits = smb2_get_credits, | ||
639 | .get_next_mid = smb2_get_next_mid, | ||
640 | .read_data_offset = smb2_read_data_offset, | ||
641 | .read_data_length = smb2_read_data_length, | ||
642 | .map_error = map_smb2_to_linux_error, | ||
643 | .find_mid = smb2_find_mid, | ||
644 | .check_message = smb2_check_message, | ||
645 | .dump_detail = smb2_dump_detail, | ||
646 | .clear_stats = smb2_clear_stats, | ||
647 | .print_stats = smb2_print_stats, | ||
648 | .is_oplock_break = smb2_is_valid_oplock_break, | ||
649 | .need_neg = smb2_need_neg, | ||
650 | .negotiate = smb2_negotiate, | ||
651 | .negotiate_wsize = smb2_negotiate_wsize, | ||
652 | .negotiate_rsize = smb2_negotiate_rsize, | ||
653 | .sess_setup = SMB2_sess_setup, | ||
654 | .logoff = SMB2_logoff, | ||
655 | .tree_connect = SMB2_tcon, | ||
656 | .tree_disconnect = SMB2_tdis, | ||
657 | .is_path_accessible = smb2_is_path_accessible, | ||
658 | .can_echo = smb2_can_echo, | ||
659 | .echo = SMB2_echo, | ||
660 | .query_path_info = smb2_query_path_info, | ||
661 | .get_srv_inum = smb2_get_srv_inum, | ||
662 | .query_file_info = smb2_query_file_info, | ||
663 | .set_path_size = smb2_set_path_size, | ||
664 | .set_file_size = smb2_set_file_size, | ||
665 | .set_file_info = smb2_set_file_info, | ||
666 | .mkdir = smb2_mkdir, | ||
667 | .mkdir_setinfo = smb2_mkdir_setinfo, | ||
668 | .rmdir = smb2_rmdir, | ||
669 | .unlink = smb2_unlink, | ||
670 | .rename = smb2_rename_path, | ||
671 | .create_hardlink = smb2_create_hardlink, | ||
672 | .open = smb2_open_file, | ||
673 | .set_fid = smb2_set_fid, | ||
674 | .close = smb2_close_file, | ||
675 | .flush = smb2_flush_file, | ||
676 | .async_readv = smb2_async_readv, | ||
677 | .async_writev = smb2_async_writev, | ||
678 | .sync_read = smb2_sync_read, | ||
679 | .sync_write = smb2_sync_write, | ||
680 | .query_dir_first = smb2_query_dir_first, | ||
681 | .query_dir_next = smb2_query_dir_next, | ||
682 | .close_dir = smb2_close_dir, | ||
683 | .calc_smb_size = smb2_calc_size, | ||
684 | .is_status_pending = smb2_is_status_pending, | ||
685 | .oplock_response = smb2_oplock_response, | ||
686 | .queryfs = smb2_queryfs, | ||
687 | .mand_lock = smb2_mand_lock, | ||
688 | .mand_unlock_range = smb2_unlock_range, | ||
689 | .push_mand_locks = smb2_push_mandatory_locks, | ||
690 | .get_lease_key = smb2_get_lease_key, | ||
691 | .set_lease_key = smb2_set_lease_key, | ||
692 | .new_lease_key = smb2_new_lease_key, | ||
693 | .calc_signature = smb3_calc_signature, | ||
694 | }; | ||
695 | |||
696 | struct smb_version_values smb20_values = { | ||
697 | .version_string = SMB20_VERSION_STRING, | ||
698 | .protocol_id = SMB20_PROT_ID, | ||
699 | .req_capabilities = 0, /* MBZ */ | ||
700 | .large_lock_type = 0, | ||
701 | .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, | ||
702 | .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, | ||
703 | .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, | ||
704 | .header_size = sizeof(struct smb2_hdr), | ||
705 | .max_header_size = MAX_SMB2_HDR_SIZE, | ||
706 | .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, | ||
707 | .lock_cmd = SMB2_LOCK, | ||
708 | .cap_unix = 0, | ||
709 | .cap_nt_find = SMB2_NT_FIND, | ||
710 | .cap_large_files = SMB2_LARGE_FILES, | ||
644 | }; | 711 | }; |
645 | 712 | ||
646 | struct smb_version_values smb21_values = { | 713 | struct smb_version_values smb21_values = { |
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index cf33622cdac8..41d9d0725f0f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c | |||
@@ -425,7 +425,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) | |||
425 | } | 425 | } |
426 | 426 | ||
427 | cFYI(1, "sec_flags 0x%x", sec_flags); | 427 | cFYI(1, "sec_flags 0x%x", sec_flags); |
428 | if (sec_flags & CIFSSEC_MUST_SIGN) { | 428 | if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) { |
429 | cFYI(1, "Signing required"); | 429 | cFYI(1, "Signing required"); |
430 | if (!(server->sec_mode & (SMB2_NEGOTIATE_SIGNING_REQUIRED | | 430 | if (!(server->sec_mode & (SMB2_NEGOTIATE_SIGNING_REQUIRED | |
431 | SMB2_NEGOTIATE_SIGNING_ENABLED))) { | 431 | SMB2_NEGOTIATE_SIGNING_ENABLED))) { |
@@ -612,7 +612,8 @@ ssetup_ntlmssp_authenticate: | |||
612 | 612 | ||
613 | /* BB add code to build os and lm fields */ | 613 | /* BB add code to build os and lm fields */ |
614 | 614 | ||
615 | rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, CIFS_LOG_ERROR); | 615 | rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, |
616 | CIFS_LOG_ERROR | CIFS_NEG_OP); | ||
616 | 617 | ||
617 | kfree(security_blob); | 618 | kfree(security_blob); |
618 | rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base; | 619 | rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base; |
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 7d25f8b14f93..2aa3535e38ce 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h | |||
@@ -47,6 +47,10 @@ extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses, | |||
47 | struct smb_rqst *rqst); | 47 | struct smb_rqst *rqst); |
48 | extern struct mid_q_entry *smb2_setup_async_request( | 48 | extern struct mid_q_entry *smb2_setup_async_request( |
49 | struct TCP_Server_Info *server, struct smb_rqst *rqst); | 49 | struct TCP_Server_Info *server, struct smb_rqst *rqst); |
50 | extern int smb2_calc_signature(struct smb_rqst *rqst, | ||
51 | struct TCP_Server_Info *server); | ||
52 | extern int smb3_calc_signature(struct smb_rqst *rqst, | ||
53 | struct TCP_Server_Info *server); | ||
50 | extern void smb2_echo_request(struct work_struct *work); | 54 | extern void smb2_echo_request(struct work_struct *work); |
51 | extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode); | 55 | extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode); |
52 | extern __u8 smb2_map_lease_to_oplock(__le32 lease_state); | 56 | extern __u8 smb2_map_lease_to_oplock(__le32 lease_state); |
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 2a5fdf26f79f..8dd73e61d762 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c | |||
@@ -39,7 +39,7 @@ | |||
39 | #include "smb2status.h" | 39 | #include "smb2status.h" |
40 | #include "smb2glob.h" | 40 | #include "smb2glob.h" |
41 | 41 | ||
42 | static int | 42 | int |
43 | smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) | 43 | smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) |
44 | { | 44 | { |
45 | int i, rc; | 45 | int i, rc; |
@@ -116,6 +116,13 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) | |||
116 | return rc; | 116 | return rc; |
117 | } | 117 | } |
118 | 118 | ||
119 | int | ||
120 | smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) | ||
121 | { | ||
122 | cFYI(1, "smb3 signatures not supported yet"); | ||
123 | return -EOPNOTSUPP; | ||
124 | } | ||
125 | |||
119 | /* must be called with server->srv_mutex held */ | 126 | /* must be called with server->srv_mutex held */ |
120 | static int | 127 | static int |
121 | smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) | 128 | smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) |
@@ -132,7 +139,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) | |||
132 | return rc; | 139 | return rc; |
133 | } | 140 | } |
134 | 141 | ||
135 | rc = smb2_calc_signature(rqst, server); | 142 | rc = server->ops->calc_signature(rqst, server); |
136 | 143 | ||
137 | return rc; | 144 | return rc; |
138 | } | 145 | } |
@@ -168,7 +175,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) | |||
168 | memset(smb2_pdu->Signature, 0, SMB2_SIGNATURE_SIZE); | 175 | memset(smb2_pdu->Signature, 0, SMB2_SIGNATURE_SIZE); |
169 | 176 | ||
170 | mutex_lock(&server->srv_mutex); | 177 | mutex_lock(&server->srv_mutex); |
171 | rc = smb2_calc_signature(rqst, server); | 178 | rc = server->ops->calc_signature(rqst, server); |
172 | mutex_unlock(&server->srv_mutex); | 179 | mutex_unlock(&server->srv_mutex); |
173 | 180 | ||
174 | if (rc) | 181 | if (rc) |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 4c6285fff598..e2f57a007029 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -844,6 +844,9 @@ COMPATIBLE_IOCTL(TIOCGDEV) | |||
844 | COMPATIBLE_IOCTL(TIOCCBRK) | 844 | COMPATIBLE_IOCTL(TIOCCBRK) |
845 | COMPATIBLE_IOCTL(TIOCGSID) | 845 | COMPATIBLE_IOCTL(TIOCGSID) |
846 | COMPATIBLE_IOCTL(TIOCGICOUNT) | 846 | COMPATIBLE_IOCTL(TIOCGICOUNT) |
847 | COMPATIBLE_IOCTL(TIOCGPKT) | ||
848 | COMPATIBLE_IOCTL(TIOCGPTLCK) | ||
849 | COMPATIBLE_IOCTL(TIOCGEXCL) | ||
847 | /* Little t */ | 850 | /* Little t */ |
848 | COMPATIBLE_IOCTL(TIOCGETD) | 851 | COMPATIBLE_IOCTL(TIOCGETD) |
849 | COMPATIBLE_IOCTL(TIOCSETD) | 852 | COMPATIBLE_IOCTL(TIOCSETD) |
diff --git a/fs/coredump.c b/fs/coredump.c index ce47379bfa61..177493272a61 100644 --- a/fs/coredump.c +++ b/fs/coredump.c | |||
@@ -458,7 +458,7 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) | |||
458 | return err; | 458 | return err; |
459 | } | 459 | } |
460 | 460 | ||
461 | void do_coredump(siginfo_t *siginfo, struct pt_regs *regs) | 461 | void do_coredump(siginfo_t *siginfo) |
462 | { | 462 | { |
463 | struct core_state core_state; | 463 | struct core_state core_state; |
464 | struct core_name cn; | 464 | struct core_name cn; |
@@ -474,7 +474,7 @@ void do_coredump(siginfo_t *siginfo, struct pt_regs *regs) | |||
474 | static atomic_t core_dump_count = ATOMIC_INIT(0); | 474 | static atomic_t core_dump_count = ATOMIC_INIT(0); |
475 | struct coredump_params cprm = { | 475 | struct coredump_params cprm = { |
476 | .siginfo = siginfo, | 476 | .siginfo = siginfo, |
477 | .regs = regs, | 477 | .regs = signal_pt_regs(), |
478 | .limit = rlimit(RLIMIT_CORE), | 478 | .limit = rlimit(RLIMIT_CORE), |
479 | /* | 479 | /* |
480 | * We must use the same mm->flags while dumping core to avoid | 480 | * We must use the same mm->flags while dumping core to avoid |
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b607d92cdf24..153bb1e42e63 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c | |||
@@ -59,7 +59,6 @@ static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev | |||
59 | case S_IFDIR: | 59 | case S_IFDIR: |
60 | inode->i_op = &simple_dir_inode_operations; | 60 | inode->i_op = &simple_dir_inode_operations; |
61 | inode->i_fop = &simple_dir_operations; | 61 | inode->i_fop = &simple_dir_operations; |
62 | inode->i_private = NULL; | ||
63 | 62 | ||
64 | /* directory inodes start off with i_nlink == 2 | 63 | /* directory inodes start off with i_nlink == 2 |
65 | * (for "." entry) */ | 64 | * (for "." entry) */ |
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 14afbabe6546..472e6befc54d 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c | |||
@@ -545,37 +545,38 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx) | |||
545 | mutex_unlock(&allocated_ptys_lock); | 545 | mutex_unlock(&allocated_ptys_lock); |
546 | } | 546 | } |
547 | 547 | ||
548 | int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) | 548 | /** |
549 | * devpts_pty_new -- create a new inode in /dev/pts/ | ||
550 | * @ptmx_inode: inode of the master | ||
551 | * @device: major+minor of the node to be created | ||
552 | * @index: used as a name of the node | ||
553 | * @priv: what's given back by devpts_get_priv | ||
554 | * | ||
555 | * The created inode is returned. Remove it from /dev/pts/ by devpts_pty_kill. | ||
556 | */ | ||
557 | struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index, | ||
558 | void *priv) | ||
549 | { | 559 | { |
550 | /* tty layer puts index from devpts_new_index() in here */ | ||
551 | int number = tty->index; | ||
552 | struct tty_driver *driver = tty->driver; | ||
553 | dev_t device = MKDEV(driver->major, driver->minor_start+number); | ||
554 | struct dentry *dentry; | 560 | struct dentry *dentry; |
555 | struct super_block *sb = pts_sb_from_inode(ptmx_inode); | 561 | struct super_block *sb = pts_sb_from_inode(ptmx_inode); |
556 | struct inode *inode = new_inode(sb); | 562 | struct inode *inode; |
557 | struct dentry *root = sb->s_root; | 563 | struct dentry *root = sb->s_root; |
558 | struct pts_fs_info *fsi = DEVPTS_SB(sb); | 564 | struct pts_fs_info *fsi = DEVPTS_SB(sb); |
559 | struct pts_mount_opts *opts = &fsi->mount_opts; | 565 | struct pts_mount_opts *opts = &fsi->mount_opts; |
560 | int ret = 0; | ||
561 | char s[12]; | 566 | char s[12]; |
562 | 567 | ||
563 | /* We're supposed to be given the slave end of a pty */ | 568 | inode = new_inode(sb); |
564 | BUG_ON(driver->type != TTY_DRIVER_TYPE_PTY); | ||
565 | BUG_ON(driver->subtype != PTY_TYPE_SLAVE); | ||
566 | |||
567 | if (!inode) | 569 | if (!inode) |
568 | return -ENOMEM; | 570 | return ERR_PTR(-ENOMEM); |
569 | 571 | ||
570 | inode->i_ino = number + 3; | 572 | inode->i_ino = index + 3; |
571 | inode->i_uid = opts->setuid ? opts->uid : current_fsuid(); | 573 | inode->i_uid = opts->setuid ? opts->uid : current_fsuid(); |
572 | inode->i_gid = opts->setgid ? opts->gid : current_fsgid(); | 574 | inode->i_gid = opts->setgid ? opts->gid : current_fsgid(); |
573 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 575 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
574 | init_special_inode(inode, S_IFCHR|opts->mode, device); | 576 | init_special_inode(inode, S_IFCHR|opts->mode, device); |
575 | inode->i_private = tty; | 577 | inode->i_private = priv; |
576 | tty->driver_data = inode; | ||
577 | 578 | ||
578 | sprintf(s, "%d", number); | 579 | sprintf(s, "%d", index); |
579 | 580 | ||
580 | mutex_lock(&root->d_inode->i_mutex); | 581 | mutex_lock(&root->d_inode->i_mutex); |
581 | 582 | ||
@@ -585,18 +586,24 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) | |||
585 | fsnotify_create(root->d_inode, dentry); | 586 | fsnotify_create(root->d_inode, dentry); |
586 | } else { | 587 | } else { |
587 | iput(inode); | 588 | iput(inode); |
588 | ret = -ENOMEM; | 589 | inode = ERR_PTR(-ENOMEM); |
589 | } | 590 | } |
590 | 591 | ||
591 | mutex_unlock(&root->d_inode->i_mutex); | 592 | mutex_unlock(&root->d_inode->i_mutex); |
592 | 593 | ||
593 | return ret; | 594 | return inode; |
594 | } | 595 | } |
595 | 596 | ||
596 | struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number) | 597 | /** |
598 | * devpts_get_priv -- get private data for a slave | ||
599 | * @pts_inode: inode of the slave | ||
600 | * | ||
601 | * Returns whatever was passed as priv in devpts_pty_new for a given inode. | ||
602 | */ | ||
603 | void *devpts_get_priv(struct inode *pts_inode) | ||
597 | { | 604 | { |
598 | struct dentry *dentry; | 605 | struct dentry *dentry; |
599 | struct tty_struct *tty; | 606 | void *priv = NULL; |
600 | 607 | ||
601 | BUG_ON(pts_inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); | 608 | BUG_ON(pts_inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); |
602 | 609 | ||
@@ -605,18 +612,22 @@ struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number) | |||
605 | if (!dentry) | 612 | if (!dentry) |
606 | return NULL; | 613 | return NULL; |
607 | 614 | ||
608 | tty = NULL; | ||
609 | if (pts_inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) | 615 | if (pts_inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) |
610 | tty = (struct tty_struct *)pts_inode->i_private; | 616 | priv = pts_inode->i_private; |
611 | 617 | ||
612 | dput(dentry); | 618 | dput(dentry); |
613 | 619 | ||
614 | return tty; | 620 | return priv; |
615 | } | 621 | } |
616 | 622 | ||
617 | void devpts_pty_kill(struct tty_struct *tty) | 623 | /** |
624 | * devpts_pty_kill -- remove inode form /dev/pts/ | ||
625 | * @inode: inode of the slave to be removed | ||
626 | * | ||
627 | * This is an inverse operation of devpts_pty_new. | ||
628 | */ | ||
629 | void devpts_pty_kill(struct inode *inode) | ||
618 | { | 630 | { |
619 | struct inode *inode = tty->driver_data; | ||
620 | struct super_block *sb = pts_sb_from_inode(inode); | 631 | struct super_block *sb = pts_sb_from_inode(inode); |
621 | struct dentry *root = sb->s_root; | 632 | struct dentry *root = sb->s_root; |
622 | struct dentry *dentry; | 633 | struct dentry *dentry; |
diff --git a/fs/direct-io.c b/fs/direct-io.c index f86c720dba0e..cf5b44b10c67 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -540,6 +540,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, | |||
540 | sector_t fs_endblk; /* Into file, in filesystem-sized blocks */ | 540 | sector_t fs_endblk; /* Into file, in filesystem-sized blocks */ |
541 | unsigned long fs_count; /* Number of filesystem-sized blocks */ | 541 | unsigned long fs_count; /* Number of filesystem-sized blocks */ |
542 | int create; | 542 | int create; |
543 | unsigned int i_blkbits = sdio->blkbits + sdio->blkfactor; | ||
543 | 544 | ||
544 | /* | 545 | /* |
545 | * If there was a memory error and we've overwritten all the | 546 | * If there was a memory error and we've overwritten all the |
@@ -554,7 +555,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, | |||
554 | fs_count = fs_endblk - fs_startblk + 1; | 555 | fs_count = fs_endblk - fs_startblk + 1; |
555 | 556 | ||
556 | map_bh->b_state = 0; | 557 | map_bh->b_state = 0; |
557 | map_bh->b_size = fs_count << dio->inode->i_blkbits; | 558 | map_bh->b_size = fs_count << i_blkbits; |
558 | 559 | ||
559 | /* | 560 | /* |
560 | * For writes inside i_size on a DIO_SKIP_HOLES filesystem we | 561 | * For writes inside i_size on a DIO_SKIP_HOLES filesystem we |
@@ -1053,7 +1054,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1053 | int seg; | 1054 | int seg; |
1054 | size_t size; | 1055 | size_t size; |
1055 | unsigned long addr; | 1056 | unsigned long addr; |
1056 | unsigned blkbits = inode->i_blkbits; | 1057 | unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits); |
1058 | unsigned blkbits = i_blkbits; | ||
1057 | unsigned blocksize_mask = (1 << blkbits) - 1; | 1059 | unsigned blocksize_mask = (1 << blkbits) - 1; |
1058 | ssize_t retval = -EINVAL; | 1060 | ssize_t retval = -EINVAL; |
1059 | loff_t end = offset; | 1061 | loff_t end = offset; |
@@ -1149,7 +1151,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1149 | dio->inode = inode; | 1151 | dio->inode = inode; |
1150 | dio->rw = rw; | 1152 | dio->rw = rw; |
1151 | sdio.blkbits = blkbits; | 1153 | sdio.blkbits = blkbits; |
1152 | sdio.blkfactor = inode->i_blkbits - blkbits; | 1154 | sdio.blkfactor = i_blkbits - blkbits; |
1153 | sdio.block_in_file = offset >> blkbits; | 1155 | sdio.block_in_file = offset >> blkbits; |
1154 | 1156 | ||
1155 | sdio.get_block = get_block; | 1157 | sdio.get_block = get_block; |
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig index 1897eb1b4b6a..e4242c3f8486 100644 --- a/fs/dlm/Kconfig +++ b/fs/dlm/Kconfig | |||
@@ -1,6 +1,6 @@ | |||
1 | menuconfig DLM | 1 | menuconfig DLM |
2 | tristate "Distributed Lock Manager (DLM)" | 2 | tristate "Distributed Lock Manager (DLM)" |
3 | depends on EXPERIMENTAL && INET | 3 | depends on INET |
4 | depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n) | 4 | depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n) |
5 | select IP_SCTP | 5 | select IP_SCTP |
6 | help | 6 | help |
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 871c1abf6029..77c0f70f8fe8 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
@@ -337,6 +337,7 @@ enum rsb_flags { | |||
337 | RSB_NEW_MASTER2, | 337 | RSB_NEW_MASTER2, |
338 | RSB_RECOVER_CONVERT, | 338 | RSB_RECOVER_CONVERT, |
339 | RSB_RECOVER_GRANT, | 339 | RSB_RECOVER_GRANT, |
340 | RSB_RECOVER_LVB_INVAL, | ||
340 | }; | 341 | }; |
341 | 342 | ||
342 | static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag) | 343 | static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag) |
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index b56950758188..a579f30f237d 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
@@ -5393,6 +5393,13 @@ static void purge_dead_list(struct dlm_ls *ls, struct dlm_rsb *r, | |||
5393 | if ((lkb->lkb_nodeid == nodeid_gone) || | 5393 | if ((lkb->lkb_nodeid == nodeid_gone) || |
5394 | dlm_is_removed(ls, lkb->lkb_nodeid)) { | 5394 | dlm_is_removed(ls, lkb->lkb_nodeid)) { |
5395 | 5395 | ||
5396 | /* tell recover_lvb to invalidate the lvb | ||
5397 | because a node holding EX/PW failed */ | ||
5398 | if ((lkb->lkb_exflags & DLM_LKF_VALBLK) && | ||
5399 | (lkb->lkb_grmode >= DLM_LOCK_PW)) { | ||
5400 | rsb_set_flag(r, RSB_RECOVER_LVB_INVAL); | ||
5401 | } | ||
5402 | |||
5396 | del_lkb(r, lkb); | 5403 | del_lkb(r, lkb); |
5397 | 5404 | ||
5398 | /* this put should free the lkb */ | 5405 | /* this put should free the lkb */ |
@@ -6025,15 +6032,18 @@ static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) | |||
6025 | return error; | 6032 | return error; |
6026 | } | 6033 | } |
6027 | 6034 | ||
6028 | /* The force flag allows the unlock to go ahead even if the lkb isn't granted. | 6035 | /* The FORCEUNLOCK flag allows the unlock to go ahead even if the lkb isn't |
6029 | Regardless of what rsb queue the lock is on, it's removed and freed. */ | 6036 | granted. Regardless of what rsb queue the lock is on, it's removed and |
6037 | freed. The IVVALBLK flag causes the lvb on the resource to be invalidated | ||
6038 | if our lock is PW/EX (it's ignored if our granted mode is smaller.) */ | ||
6030 | 6039 | ||
6031 | static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) | 6040 | static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) |
6032 | { | 6041 | { |
6033 | struct dlm_args args; | 6042 | struct dlm_args args; |
6034 | int error; | 6043 | int error; |
6035 | 6044 | ||
6036 | set_unlock_args(DLM_LKF_FORCEUNLOCK, lkb->lkb_ua, &args); | 6045 | set_unlock_args(DLM_LKF_FORCEUNLOCK | DLM_LKF_IVVALBLK, |
6046 | lkb->lkb_ua, &args); | ||
6037 | 6047 | ||
6038 | error = unlock_lock(ls, lkb, &args); | 6048 | error = unlock_lock(ls, lkb, &args); |
6039 | if (error == -DLM_EUNLOCK) | 6049 | if (error == -DLM_EUNLOCK) |
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 331ea4f94efd..dd87a31bcc21 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -1385,7 +1385,6 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) | |||
1385 | struct connection *con; | 1385 | struct connection *con; |
1386 | struct writequeue_entry *e; | 1386 | struct writequeue_entry *e; |
1387 | int offset = 0; | 1387 | int offset = 0; |
1388 | int users = 0; | ||
1389 | 1388 | ||
1390 | con = nodeid2con(nodeid, allocation); | 1389 | con = nodeid2con(nodeid, allocation); |
1391 | if (!con) | 1390 | if (!con) |
@@ -1399,7 +1398,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) | |||
1399 | } else { | 1398 | } else { |
1400 | offset = e->end; | 1399 | offset = e->end; |
1401 | e->end += len; | 1400 | e->end += len; |
1402 | users = e->users++; | 1401 | e->users++; |
1403 | } | 1402 | } |
1404 | spin_unlock(&con->writequeue_lock); | 1403 | spin_unlock(&con->writequeue_lock); |
1405 | 1404 | ||
@@ -1414,7 +1413,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) | |||
1414 | spin_lock(&con->writequeue_lock); | 1413 | spin_lock(&con->writequeue_lock); |
1415 | offset = e->end; | 1414 | offset = e->end; |
1416 | e->end += len; | 1415 | e->end += len; |
1417 | users = e->users++; | 1416 | e->users++; |
1418 | list_add_tail(&e->list, &con->writequeue); | 1417 | list_add_tail(&e->list, &con->writequeue); |
1419 | spin_unlock(&con->writequeue_lock); | 1418 | spin_unlock(&con->writequeue_lock); |
1420 | goto got_one; | 1419 | goto got_one; |
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index 4a7a76e42fc3..aedea28a86a1 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c | |||
@@ -717,8 +717,14 @@ void dlm_recovered_lock(struct dlm_rsb *r) | |||
717 | * the VALNOTVALID flag if necessary, and determining the correct lvb contents | 717 | * the VALNOTVALID flag if necessary, and determining the correct lvb contents |
718 | * based on the lvb's of the locks held on the rsb. | 718 | * based on the lvb's of the locks held on the rsb. |
719 | * | 719 | * |
720 | * RSB_VALNOTVALID is set if there are only NL/CR locks on the rsb. If it | 720 | * RSB_VALNOTVALID is set in two cases: |
721 | * was already set prior to recovery, it's not cleared, regardless of locks. | 721 | * |
722 | * 1. we are master, but not new, and we purged an EX/PW lock held by a | ||
723 | * failed node (in dlm_recover_purge which set RSB_RECOVER_LVB_INVAL) | ||
724 | * | ||
725 | * 2. we are a new master, and there are only NL/CR locks left. | ||
726 | * (We could probably improve this by only invaliding in this way when | ||
727 | * the previous master left uncleanly. VMS docs mention that.) | ||
722 | * | 728 | * |
723 | * The LVB contents are only considered for changing when this is a new master | 729 | * The LVB contents are only considered for changing when this is a new master |
724 | * of the rsb (NEW_MASTER2). Then, the rsb's lvb is taken from any lkb with | 730 | * of the rsb (NEW_MASTER2). Then, the rsb's lvb is taken from any lkb with |
@@ -734,6 +740,19 @@ static void recover_lvb(struct dlm_rsb *r) | |||
734 | int big_lock_exists = 0; | 740 | int big_lock_exists = 0; |
735 | int lvblen = r->res_ls->ls_lvblen; | 741 | int lvblen = r->res_ls->ls_lvblen; |
736 | 742 | ||
743 | if (!rsb_flag(r, RSB_NEW_MASTER2) && | ||
744 | rsb_flag(r, RSB_RECOVER_LVB_INVAL)) { | ||
745 | /* case 1 above */ | ||
746 | rsb_set_flag(r, RSB_VALNOTVALID); | ||
747 | return; | ||
748 | } | ||
749 | |||
750 | if (!rsb_flag(r, RSB_NEW_MASTER2)) | ||
751 | return; | ||
752 | |||
753 | /* we are the new master, so figure out if VALNOTVALID should | ||
754 | be set, and set the rsb lvb from the best lkb available. */ | ||
755 | |||
737 | list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { | 756 | list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) { |
738 | if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) | 757 | if (!(lkb->lkb_exflags & DLM_LKF_VALBLK)) |
739 | continue; | 758 | continue; |
@@ -772,13 +791,10 @@ static void recover_lvb(struct dlm_rsb *r) | |||
772 | if (!lock_lvb_exists) | 791 | if (!lock_lvb_exists) |
773 | goto out; | 792 | goto out; |
774 | 793 | ||
794 | /* lvb is invalidated if only NL/CR locks remain */ | ||
775 | if (!big_lock_exists) | 795 | if (!big_lock_exists) |
776 | rsb_set_flag(r, RSB_VALNOTVALID); | 796 | rsb_set_flag(r, RSB_VALNOTVALID); |
777 | 797 | ||
778 | /* don't mess with the lvb unless we're the new master */ | ||
779 | if (!rsb_flag(r, RSB_NEW_MASTER2)) | ||
780 | goto out; | ||
781 | |||
782 | if (!r->res_lvbptr) { | 798 | if (!r->res_lvbptr) { |
783 | r->res_lvbptr = dlm_allocate_lvb(r->res_ls); | 799 | r->res_lvbptr = dlm_allocate_lvb(r->res_ls); |
784 | if (!r->res_lvbptr) | 800 | if (!r->res_lvbptr) |
@@ -852,12 +868,19 @@ void dlm_recover_rsbs(struct dlm_ls *ls) | |||
852 | if (is_master(r)) { | 868 | if (is_master(r)) { |
853 | if (rsb_flag(r, RSB_RECOVER_CONVERT)) | 869 | if (rsb_flag(r, RSB_RECOVER_CONVERT)) |
854 | recover_conversion(r); | 870 | recover_conversion(r); |
871 | |||
872 | /* recover lvb before granting locks so the updated | ||
873 | lvb/VALNOTVALID is presented in the completion */ | ||
874 | recover_lvb(r); | ||
875 | |||
855 | if (rsb_flag(r, RSB_NEW_MASTER2)) | 876 | if (rsb_flag(r, RSB_NEW_MASTER2)) |
856 | recover_grant(r); | 877 | recover_grant(r); |
857 | recover_lvb(r); | ||
858 | count++; | 878 | count++; |
879 | } else { | ||
880 | rsb_clear_flag(r, RSB_VALNOTVALID); | ||
859 | } | 881 | } |
860 | rsb_clear_flag(r, RSB_RECOVER_CONVERT); | 882 | rsb_clear_flag(r, RSB_RECOVER_CONVERT); |
883 | rsb_clear_flag(r, RSB_RECOVER_LVB_INVAL); | ||
861 | rsb_clear_flag(r, RSB_NEW_MASTER2); | 884 | rsb_clear_flag(r, RSB_NEW_MASTER2); |
862 | unlock_rsb(r); | 885 | unlock_rsb(r); |
863 | } | 886 | } |
@@ -1349,7 +1349,7 @@ EXPORT_SYMBOL(remove_arg_zero); | |||
1349 | /* | 1349 | /* |
1350 | * cycle the list of binary formats handler, until one recognizes the image | 1350 | * cycle the list of binary formats handler, until one recognizes the image |
1351 | */ | 1351 | */ |
1352 | int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) | 1352 | int search_binary_handler(struct linux_binprm *bprm) |
1353 | { | 1353 | { |
1354 | unsigned int depth = bprm->recursion_depth; | 1354 | unsigned int depth = bprm->recursion_depth; |
1355 | int try,retval; | 1355 | int try,retval; |
@@ -1374,13 +1374,13 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) | |||
1374 | for (try=0; try<2; try++) { | 1374 | for (try=0; try<2; try++) { |
1375 | read_lock(&binfmt_lock); | 1375 | read_lock(&binfmt_lock); |
1376 | list_for_each_entry(fmt, &formats, lh) { | 1376 | list_for_each_entry(fmt, &formats, lh) { |
1377 | int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary; | 1377 | int (*fn)(struct linux_binprm *) = fmt->load_binary; |
1378 | if (!fn) | 1378 | if (!fn) |
1379 | continue; | 1379 | continue; |
1380 | if (!try_module_get(fmt->module)) | 1380 | if (!try_module_get(fmt->module)) |
1381 | continue; | 1381 | continue; |
1382 | read_unlock(&binfmt_lock); | 1382 | read_unlock(&binfmt_lock); |
1383 | retval = fn(bprm, regs); | 1383 | retval = fn(bprm); |
1384 | /* | 1384 | /* |
1385 | * Restore the depth counter to its starting value | 1385 | * Restore the depth counter to its starting value |
1386 | * in this call, so we don't have to rely on every | 1386 | * in this call, so we don't have to rely on every |
@@ -1439,8 +1439,7 @@ EXPORT_SYMBOL(search_binary_handler); | |||
1439 | */ | 1439 | */ |
1440 | static int do_execve_common(const char *filename, | 1440 | static int do_execve_common(const char *filename, |
1441 | struct user_arg_ptr argv, | 1441 | struct user_arg_ptr argv, |
1442 | struct user_arg_ptr envp, | 1442 | struct user_arg_ptr envp) |
1443 | struct pt_regs *regs) | ||
1444 | { | 1443 | { |
1445 | struct linux_binprm *bprm; | 1444 | struct linux_binprm *bprm; |
1446 | struct file *file; | 1445 | struct file *file; |
@@ -1524,7 +1523,7 @@ static int do_execve_common(const char *filename, | |||
1524 | if (retval < 0) | 1523 | if (retval < 0) |
1525 | goto out; | 1524 | goto out; |
1526 | 1525 | ||
1527 | retval = search_binary_handler(bprm,regs); | 1526 | retval = search_binary_handler(bprm); |
1528 | if (retval < 0) | 1527 | if (retval < 0) |
1529 | goto out; | 1528 | goto out; |
1530 | 1529 | ||
@@ -1566,19 +1565,17 @@ out_ret: | |||
1566 | 1565 | ||
1567 | int do_execve(const char *filename, | 1566 | int do_execve(const char *filename, |
1568 | const char __user *const __user *__argv, | 1567 | const char __user *const __user *__argv, |
1569 | const char __user *const __user *__envp, | 1568 | const char __user *const __user *__envp) |
1570 | struct pt_regs *regs) | ||
1571 | { | 1569 | { |
1572 | struct user_arg_ptr argv = { .ptr.native = __argv }; | 1570 | struct user_arg_ptr argv = { .ptr.native = __argv }; |
1573 | struct user_arg_ptr envp = { .ptr.native = __envp }; | 1571 | struct user_arg_ptr envp = { .ptr.native = __envp }; |
1574 | return do_execve_common(filename, argv, envp, regs); | 1572 | return do_execve_common(filename, argv, envp); |
1575 | } | 1573 | } |
1576 | 1574 | ||
1577 | #ifdef CONFIG_COMPAT | 1575 | #ifdef CONFIG_COMPAT |
1578 | int compat_do_execve(const char *filename, | 1576 | static int compat_do_execve(const char *filename, |
1579 | const compat_uptr_t __user *__argv, | 1577 | const compat_uptr_t __user *__argv, |
1580 | const compat_uptr_t __user *__envp, | 1578 | const compat_uptr_t __user *__envp) |
1581 | struct pt_regs *regs) | ||
1582 | { | 1579 | { |
1583 | struct user_arg_ptr argv = { | 1580 | struct user_arg_ptr argv = { |
1584 | .is_compat = true, | 1581 | .is_compat = true, |
@@ -1588,7 +1585,7 @@ int compat_do_execve(const char *filename, | |||
1588 | .is_compat = true, | 1585 | .is_compat = true, |
1589 | .ptr.compat = __envp, | 1586 | .ptr.compat = __envp, |
1590 | }; | 1587 | }; |
1591 | return do_execve_common(filename, argv, envp, regs); | 1588 | return do_execve_common(filename, argv, envp); |
1592 | } | 1589 | } |
1593 | #endif | 1590 | #endif |
1594 | 1591 | ||
@@ -1669,7 +1666,7 @@ SYSCALL_DEFINE3(execve, | |||
1669 | struct filename *path = getname(filename); | 1666 | struct filename *path = getname(filename); |
1670 | int error = PTR_ERR(path); | 1667 | int error = PTR_ERR(path); |
1671 | if (!IS_ERR(path)) { | 1668 | if (!IS_ERR(path)) { |
1672 | error = do_execve(path->name, argv, envp, current_pt_regs()); | 1669 | error = do_execve(path->name, argv, envp); |
1673 | putname(path); | 1670 | putname(path); |
1674 | } | 1671 | } |
1675 | return error; | 1672 | return error; |
@@ -1682,8 +1679,7 @@ asmlinkage long compat_sys_execve(const char __user * filename, | |||
1682 | struct filename *path = getname(filename); | 1679 | struct filename *path = getname(filename); |
1683 | int error = PTR_ERR(path); | 1680 | int error = PTR_ERR(path); |
1684 | if (!IS_ERR(path)) { | 1681 | if (!IS_ERR(path)) { |
1685 | error = compat_do_execve(path->name, argv, envp, | 1682 | error = compat_do_execve(path->name, argv, envp); |
1686 | current_pt_regs()); | ||
1687 | putname(path); | 1683 | putname(path); |
1688 | } | 1684 | } |
1689 | return error; | 1685 | return error; |
@@ -1696,12 +1692,9 @@ int kernel_execve(const char *filename, | |||
1696 | const char *const argv[], | 1692 | const char *const argv[], |
1697 | const char *const envp[]) | 1693 | const char *const envp[]) |
1698 | { | 1694 | { |
1699 | struct pt_regs *p = current_pt_regs(); | 1695 | int ret = do_execve(filename, |
1700 | int ret; | ||
1701 | |||
1702 | ret = do_execve(filename, | ||
1703 | (const char __user *const __user *)argv, | 1696 | (const char __user *const __user *)argv, |
1704 | (const char __user *const __user *)envp, p); | 1697 | (const char __user *const __user *)envp); |
1705 | if (ret < 0) | 1698 | if (ret < 0) |
1706 | return ret; | 1699 | return ret; |
1707 | 1700 | ||
@@ -1709,6 +1702,6 @@ int kernel_execve(const char *filename, | |||
1709 | * We were successful. We won't be returning to our caller, but | 1702 | * We were successful. We won't be returning to our caller, but |
1710 | * instead to user space by manipulating the kernel stack. | 1703 | * instead to user space by manipulating the kernel stack. |
1711 | */ | 1704 | */ |
1712 | ret_from_kernel_execve(p); | 1705 | ret_from_kernel_execve(current_pt_regs()); |
1713 | } | 1706 | } |
1714 | #endif | 1707 | #endif |
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 7320a66e958f..22548f56197b 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c | |||
@@ -2101,8 +2101,9 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
2101 | end = start + (range->len >> sb->s_blocksize_bits) - 1; | 2101 | end = start + (range->len >> sb->s_blocksize_bits) - 1; |
2102 | minlen = range->minlen >> sb->s_blocksize_bits; | 2102 | minlen = range->minlen >> sb->s_blocksize_bits; |
2103 | 2103 | ||
2104 | if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)) || | 2104 | if (minlen > EXT3_BLOCKS_PER_GROUP(sb) || |
2105 | unlikely(start >= max_blks)) | 2105 | start >= max_blks || |
2106 | range->len < sb->s_blocksize) | ||
2106 | return -EINVAL; | 2107 | return -EINVAL; |
2107 | if (end >= max_blks) | 2108 | if (end >= max_blks) |
2108 | end = max_blks - 1; | 2109 | end = max_blks - 1; |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3c20de1d59d0..df163da388c9 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -2455,7 +2455,7 @@ TAS_BUFFER_FNS(Uninit, uninit) | |||
2455 | BUFFER_FNS(Da_Mapped, da_mapped) | 2455 | BUFFER_FNS(Da_Mapped, da_mapped) |
2456 | 2456 | ||
2457 | /* | 2457 | /* |
2458 | * Add new method to test wether block and inode bitmaps are properly | 2458 | * Add new method to test whether block and inode bitmaps are properly |
2459 | * initialized. With uninit_bg reading the block from disk is not enough | 2459 | * initialized. With uninit_bg reading the block from disk is not enough |
2460 | * to mark the bitmap uptodate. We need to also zero-out the bitmap | 2460 | * to mark the bitmap uptodate. We need to also zero-out the bitmap |
2461 | */ | 2461 | */ |
diff --git a/fs/fhandle.c b/fs/fhandle.c index f775bfdd6e4a..cccdc874bb55 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c | |||
@@ -22,7 +22,7 @@ static long do_sys_name_to_handle(struct path *path, | |||
22 | struct file_handle *handle = NULL; | 22 | struct file_handle *handle = NULL; |
23 | 23 | ||
24 | /* | 24 | /* |
25 | * We need t make sure wether the file system | 25 | * We need to make sure whether the file system |
26 | * support decoding of the file handle | 26 | * support decoding of the file handle |
27 | */ | 27 | */ |
28 | if (!path->dentry->d_sb->s_export_op || | 28 | if (!path->dentry->d_sb->s_export_op || |
@@ -40,7 +40,7 @@ static long do_sys_name_to_handle(struct path *path, | |||
40 | if (!handle) | 40 | if (!handle) |
41 | return -ENOMEM; | 41 | return -ENOMEM; |
42 | 42 | ||
43 | /* convert handle size to multiple of sizeof(u32) */ | 43 | /* convert handle size to multiple of sizeof(u32) */ |
44 | handle_dwords = f_handle.handle_bytes >> 2; | 44 | handle_dwords = f_handle.handle_bytes >> 2; |
45 | 45 | ||
46 | /* we ask for a non connected handle */ | 46 | /* we ask for a non connected handle */ |
@@ -519,12 +519,6 @@ struct files_struct init_files = { | |||
519 | .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), | 519 | .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), |
520 | }; | 520 | }; |
521 | 521 | ||
522 | void daemonize_descriptors(void) | ||
523 | { | ||
524 | atomic_inc(&init_files.count); | ||
525 | reset_files_struct(&init_files); | ||
526 | } | ||
527 | |||
528 | /* | 522 | /* |
529 | * allocate a file descriptor, mark it busy. | 523 | * allocate a file descriptor, mark it busy. |
530 | */ | 524 | */ |
@@ -685,7 +679,6 @@ void do_close_on_exec(struct files_struct *files) | |||
685 | struct fdtable *fdt; | 679 | struct fdtable *fdt; |
686 | 680 | ||
687 | /* exec unshares first */ | 681 | /* exec unshares first */ |
688 | BUG_ON(atomic_read(&files->count) != 1); | ||
689 | spin_lock(&files->file_lock); | 682 | spin_lock(&files->file_lock); |
690 | for (i = 0; ; i++) { | 683 | for (i = 0; ; i++) { |
691 | unsigned long set; | 684 | unsigned long set; |
@@ -995,16 +988,18 @@ int iterate_fd(struct files_struct *files, unsigned n, | |||
995 | const void *p) | 988 | const void *p) |
996 | { | 989 | { |
997 | struct fdtable *fdt; | 990 | struct fdtable *fdt; |
998 | struct file *file; | ||
999 | int res = 0; | 991 | int res = 0; |
1000 | if (!files) | 992 | if (!files) |
1001 | return 0; | 993 | return 0; |
1002 | spin_lock(&files->file_lock); | 994 | spin_lock(&files->file_lock); |
1003 | fdt = files_fdtable(files); | 995 | for (fdt = files_fdtable(files); n < fdt->max_fds; n++) { |
1004 | while (!res && n < fdt->max_fds) { | 996 | struct file *file; |
1005 | file = rcu_dereference_check_fdtable(files, fdt->fd[n++]); | 997 | file = rcu_dereference_check_fdtable(files, fdt->fd[n]); |
1006 | if (file) | 998 | if (!file) |
1007 | res = f(p, file, n); | 999 | continue; |
1000 | res = f(p, file, n); | ||
1001 | if (res) | ||
1002 | break; | ||
1008 | } | 1003 | } |
1009 | spin_unlock(&files->file_lock); | 1004 | spin_unlock(&files->file_lock); |
1010 | return res; | 1005 | return res; |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 51ea267d444c..310972b72a66 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -228,6 +228,8 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb) | |||
228 | static void inode_sync_complete(struct inode *inode) | 228 | static void inode_sync_complete(struct inode *inode) |
229 | { | 229 | { |
230 | inode->i_state &= ~I_SYNC; | 230 | inode->i_state &= ~I_SYNC; |
231 | /* If inode is clean an unused, put it into LRU now... */ | ||
232 | inode_add_lru(inode); | ||
231 | /* Waiters must see I_SYNC cleared before being woken up */ | 233 | /* Waiters must see I_SYNC cleared before being woken up */ |
232 | smp_mb(); | 234 | smp_mb(); |
233 | wake_up_bit(&inode->i_state, __I_SYNC); | 235 | wake_up_bit(&inode->i_state, __I_SYNC); |
@@ -1032,7 +1034,7 @@ int bdi_writeback_thread(void *data) | |||
1032 | while (!kthread_freezable_should_stop(NULL)) { | 1034 | while (!kthread_freezable_should_stop(NULL)) { |
1033 | /* | 1035 | /* |
1034 | * Remove own delayed wake-up timer, since we are already awake | 1036 | * Remove own delayed wake-up timer, since we are already awake |
1035 | * and we'll take care of the preriodic write-back. | 1037 | * and we'll take care of the periodic write-back. |
1036 | */ | 1038 | */ |
1037 | del_timer(&wb->wakeup_timer); | 1039 | del_timer(&wb->wakeup_timer); |
1038 | 1040 | ||
diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 5df4775fea03..fe6ca583bbc0 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c | |||
@@ -164,27 +164,3 @@ struct fs_struct init_fs = { | |||
164 | .seq = SEQCNT_ZERO, | 164 | .seq = SEQCNT_ZERO, |
165 | .umask = 0022, | 165 | .umask = 0022, |
166 | }; | 166 | }; |
167 | |||
168 | void daemonize_fs_struct(void) | ||
169 | { | ||
170 | struct fs_struct *fs = current->fs; | ||
171 | |||
172 | if (fs) { | ||
173 | int kill; | ||
174 | |||
175 | task_lock(current); | ||
176 | |||
177 | spin_lock(&init_fs.lock); | ||
178 | init_fs.users++; | ||
179 | spin_unlock(&init_fs.lock); | ||
180 | |||
181 | spin_lock(&fs->lock); | ||
182 | current->fs = &init_fs; | ||
183 | kill = !--fs->users; | ||
184 | spin_unlock(&fs->lock); | ||
185 | |||
186 | task_unlock(current); | ||
187 | if (kill) | ||
188 | free_fs_struct(fs); | ||
189 | } | ||
190 | } | ||
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 01c4975da4bc..30de4f2a2ea9 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -643,7 +643,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
643 | goto out_unlock; | 643 | goto out_unlock; |
644 | 644 | ||
645 | requested = data_blocks + ind_blocks; | 645 | requested = data_blocks + ind_blocks; |
646 | error = gfs2_inplace_reserve(ip, requested); | 646 | error = gfs2_inplace_reserve(ip, requested, 0); |
647 | if (error) | 647 | if (error) |
648 | goto out_qunlock; | 648 | goto out_qunlock; |
649 | } | 649 | } |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 1fd3ae237bdd..a68e91bcef3d 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -991,6 +991,41 @@ unlock: | |||
991 | return err; | 991 | return err; |
992 | } | 992 | } |
993 | 993 | ||
994 | /** | ||
995 | * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files | ||
996 | * @inode: The inode being truncated | ||
997 | * @oldsize: The original (larger) size | ||
998 | * @newsize: The new smaller size | ||
999 | * | ||
1000 | * With jdata files, we have to journal a revoke for each block which is | ||
1001 | * truncated. As a result, we need to split this into separate transactions | ||
1002 | * if the number of pages being truncated gets too large. | ||
1003 | */ | ||
1004 | |||
1005 | #define GFS2_JTRUNC_REVOKES 8192 | ||
1006 | |||
1007 | static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize) | ||
1008 | { | ||
1009 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
1010 | u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize; | ||
1011 | u64 chunk; | ||
1012 | int error; | ||
1013 | |||
1014 | while (oldsize != newsize) { | ||
1015 | chunk = oldsize - newsize; | ||
1016 | if (chunk > max_chunk) | ||
1017 | chunk = max_chunk; | ||
1018 | truncate_pagecache(inode, oldsize, oldsize - chunk); | ||
1019 | oldsize -= chunk; | ||
1020 | gfs2_trans_end(sdp); | ||
1021 | error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); | ||
1022 | if (error) | ||
1023 | return error; | ||
1024 | } | ||
1025 | |||
1026 | return 0; | ||
1027 | } | ||
1028 | |||
994 | static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) | 1029 | static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) |
995 | { | 1030 | { |
996 | struct gfs2_inode *ip = GFS2_I(inode); | 1031 | struct gfs2_inode *ip = GFS2_I(inode); |
@@ -1000,8 +1035,10 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) | |||
1000 | int journaled = gfs2_is_jdata(ip); | 1035 | int journaled = gfs2_is_jdata(ip); |
1001 | int error; | 1036 | int error; |
1002 | 1037 | ||
1003 | error = gfs2_trans_begin(sdp, | 1038 | if (journaled) |
1004 | RES_DINODE + (journaled ? RES_JDATA : 0), 0); | 1039 | error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES); |
1040 | else | ||
1041 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
1005 | if (error) | 1042 | if (error) |
1006 | return error; | 1043 | return error; |
1007 | 1044 | ||
@@ -1026,7 +1063,16 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) | |||
1026 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 1063 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
1027 | gfs2_dinode_out(ip, dibh->b_data); | 1064 | gfs2_dinode_out(ip, dibh->b_data); |
1028 | 1065 | ||
1029 | truncate_pagecache(inode, oldsize, newsize); | 1066 | if (journaled) |
1067 | error = gfs2_journaled_truncate(inode, oldsize, newsize); | ||
1068 | else | ||
1069 | truncate_pagecache(inode, oldsize, newsize); | ||
1070 | |||
1071 | if (error) { | ||
1072 | brelse(dibh); | ||
1073 | return error; | ||
1074 | } | ||
1075 | |||
1030 | out_brelse: | 1076 | out_brelse: |
1031 | brelse(dibh); | 1077 | brelse(dibh); |
1032 | out: | 1078 | out: |
@@ -1178,7 +1224,7 @@ static int do_grow(struct inode *inode, u64 size) | |||
1178 | if (error) | 1224 | if (error) |
1179 | return error; | 1225 | return error; |
1180 | 1226 | ||
1181 | error = gfs2_inplace_reserve(ip, 1); | 1227 | error = gfs2_inplace_reserve(ip, 1, 0); |
1182 | if (error) | 1228 | if (error) |
1183 | goto do_grow_qunlock; | 1229 | goto do_grow_qunlock; |
1184 | unstuff = 1; | 1230 | unstuff = 1; |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 259b088cfc4c..9a35670fdc38 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -1676,16 +1676,11 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, | |||
1676 | be16_add_cpu(&leaf->lf_entries, 1); | 1676 | be16_add_cpu(&leaf->lf_entries, 1); |
1677 | } | 1677 | } |
1678 | brelse(bh); | 1678 | brelse(bh); |
1679 | error = gfs2_meta_inode_buffer(ip, &bh); | ||
1680 | if (error) | ||
1681 | break; | ||
1682 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
1683 | ip->i_entries++; | 1679 | ip->i_entries++; |
1684 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 1680 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
1685 | if (S_ISDIR(nip->i_inode.i_mode)) | 1681 | if (S_ISDIR(nip->i_inode.i_mode)) |
1686 | inc_nlink(&ip->i_inode); | 1682 | inc_nlink(&ip->i_inode); |
1687 | gfs2_dinode_out(ip, bh->b_data); | 1683 | mark_inode_dirty(inode); |
1688 | brelse(bh); | ||
1689 | error = 0; | 1684 | error = 0; |
1690 | break; | 1685 | break; |
1691 | } | 1686 | } |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index e056b4ce4877..dfe2d8cb9b2c 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -432,7 +432,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
432 | if (ret) | 432 | if (ret) |
433 | goto out_unlock; | 433 | goto out_unlock; |
434 | gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); | 434 | gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); |
435 | ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); | 435 | ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); |
436 | if (ret) | 436 | if (ret) |
437 | goto out_quota_unlock; | 437 | goto out_quota_unlock; |
438 | 438 | ||
@@ -825,7 +825,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, | |||
825 | retry: | 825 | retry: |
826 | gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); | 826 | gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); |
827 | 827 | ||
828 | error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); | 828 | error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); |
829 | if (error) { | 829 | if (error) { |
830 | if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { | 830 | if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { |
831 | bytes >>= 1; | 831 | bytes >>= 1; |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index e6c2fd53cab2..992c5c0cb504 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -55,8 +55,6 @@ struct gfs2_glock_iter { | |||
55 | 55 | ||
56 | typedef void (*glock_examiner) (struct gfs2_glock * gl); | 56 | typedef void (*glock_examiner) (struct gfs2_glock * gl); |
57 | 57 | ||
58 | static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); | ||
59 | #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) | ||
60 | static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); | 58 | static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); |
61 | 59 | ||
62 | static struct dentry *gfs2_root; | 60 | static struct dentry *gfs2_root; |
@@ -107,10 +105,12 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu) | |||
107 | { | 105 | { |
108 | struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); | 106 | struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); |
109 | 107 | ||
110 | if (gl->gl_ops->go_flags & GLOF_ASPACE) | 108 | if (gl->gl_ops->go_flags & GLOF_ASPACE) { |
111 | kmem_cache_free(gfs2_glock_aspace_cachep, gl); | 109 | kmem_cache_free(gfs2_glock_aspace_cachep, gl); |
112 | else | 110 | } else { |
111 | kfree(gl->gl_lksb.sb_lvbptr); | ||
113 | kmem_cache_free(gfs2_glock_cachep, gl); | 112 | kmem_cache_free(gfs2_glock_cachep, gl); |
113 | } | ||
114 | } | 114 | } |
115 | 115 | ||
116 | void gfs2_glock_free(struct gfs2_glock *gl) | 116 | void gfs2_glock_free(struct gfs2_glock *gl) |
@@ -537,8 +537,8 @@ __acquires(&gl->gl_spin) | |||
537 | (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) | 537 | (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) |
538 | clear_bit(GLF_BLOCKING, &gl->gl_flags); | 538 | clear_bit(GLF_BLOCKING, &gl->gl_flags); |
539 | spin_unlock(&gl->gl_spin); | 539 | spin_unlock(&gl->gl_spin); |
540 | if (glops->go_xmote_th) | 540 | if (glops->go_sync) |
541 | glops->go_xmote_th(gl); | 541 | glops->go_sync(gl); |
542 | if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) | 542 | if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) |
543 | glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); | 543 | glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); |
544 | clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); | 544 | clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); |
@@ -547,7 +547,10 @@ __acquires(&gl->gl_spin) | |||
547 | if (sdp->sd_lockstruct.ls_ops->lm_lock) { | 547 | if (sdp->sd_lockstruct.ls_ops->lm_lock) { |
548 | /* lock_dlm */ | 548 | /* lock_dlm */ |
549 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); | 549 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); |
550 | GLOCK_BUG_ON(gl, ret); | 550 | if (ret) { |
551 | printk(KERN_ERR "GFS2: lm_lock ret %d\n", ret); | ||
552 | GLOCK_BUG_ON(gl, 1); | ||
553 | } | ||
551 | } else { /* lock_nolock */ | 554 | } else { /* lock_nolock */ |
552 | finish_xmote(gl, target); | 555 | finish_xmote(gl, target); |
553 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | 556 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) |
@@ -736,6 +739,16 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
736 | if (!gl) | 739 | if (!gl) |
737 | return -ENOMEM; | 740 | return -ENOMEM; |
738 | 741 | ||
742 | memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); | ||
743 | |||
744 | if (glops->go_flags & GLOF_LVB) { | ||
745 | gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL); | ||
746 | if (!gl->gl_lksb.sb_lvbptr) { | ||
747 | kmem_cache_free(cachep, gl); | ||
748 | return -ENOMEM; | ||
749 | } | ||
750 | } | ||
751 | |||
739 | atomic_inc(&sdp->sd_glock_disposal); | 752 | atomic_inc(&sdp->sd_glock_disposal); |
740 | gl->gl_sbd = sdp; | 753 | gl->gl_sbd = sdp; |
741 | gl->gl_flags = 0; | 754 | gl->gl_flags = 0; |
@@ -753,9 +766,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
753 | preempt_enable(); | 766 | preempt_enable(); |
754 | gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; | 767 | gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; |
755 | gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; | 768 | gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; |
756 | memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); | ||
757 | memset(gl->gl_lvb, 0, 32 * sizeof(char)); | ||
758 | gl->gl_lksb.sb_lvbptr = gl->gl_lvb; | ||
759 | gl->gl_tchange = jiffies; | 769 | gl->gl_tchange = jiffies; |
760 | gl->gl_object = NULL; | 770 | gl->gl_object = NULL; |
761 | gl->gl_hold_time = GL_GLOCK_DFT_HOLD; | 771 | gl->gl_hold_time = GL_GLOCK_DFT_HOLD; |
@@ -768,7 +778,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
768 | mapping->host = s->s_bdev->bd_inode; | 778 | mapping->host = s->s_bdev->bd_inode; |
769 | mapping->flags = 0; | 779 | mapping->flags = 0; |
770 | mapping_set_gfp_mask(mapping, GFP_NOFS); | 780 | mapping_set_gfp_mask(mapping, GFP_NOFS); |
771 | mapping->assoc_mapping = NULL; | 781 | mapping->private_data = NULL; |
772 | mapping->backing_dev_info = s->s_bdi; | 782 | mapping->backing_dev_info = s->s_bdi; |
773 | mapping->writeback_index = 0; | 783 | mapping->writeback_index = 0; |
774 | } | 784 | } |
@@ -777,6 +787,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
777 | tmp = search_bucket(hash, sdp, &name); | 787 | tmp = search_bucket(hash, sdp, &name); |
778 | if (tmp) { | 788 | if (tmp) { |
779 | spin_unlock_bucket(hash); | 789 | spin_unlock_bucket(hash); |
790 | kfree(gl->gl_lksb.sb_lvbptr); | ||
780 | kmem_cache_free(cachep, gl); | 791 | kmem_cache_free(cachep, gl); |
781 | atomic_dec(&sdp->sd_glock_disposal); | 792 | atomic_dec(&sdp->sd_glock_disposal); |
782 | gl = tmp; | 793 | gl = tmp; |
@@ -1013,7 +1024,7 @@ trap_recursive: | |||
1013 | printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid)); | 1024 | printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid)); |
1014 | printk(KERN_ERR "lock type: %d req lock state : %d\n", | 1025 | printk(KERN_ERR "lock type: %d req lock state : %d\n", |
1015 | gh->gh_gl->gl_name.ln_type, gh->gh_state); | 1026 | gh->gh_gl->gl_name.ln_type, gh->gh_state); |
1016 | __dump_glock(NULL, gl); | 1027 | gfs2_dump_glock(NULL, gl); |
1017 | BUG(); | 1028 | BUG(); |
1018 | } | 1029 | } |
1019 | 1030 | ||
@@ -1508,7 +1519,7 @@ static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl) | |||
1508 | { | 1519 | { |
1509 | int ret; | 1520 | int ret; |
1510 | spin_lock(&gl->gl_spin); | 1521 | spin_lock(&gl->gl_spin); |
1511 | ret = __dump_glock(seq, gl); | 1522 | ret = gfs2_dump_glock(seq, gl); |
1512 | spin_unlock(&gl->gl_spin); | 1523 | spin_unlock(&gl->gl_spin); |
1513 | return ret; | 1524 | return ret; |
1514 | } | 1525 | } |
@@ -1528,6 +1539,7 @@ static void dump_glock_func(struct gfs2_glock *gl) | |||
1528 | 1539 | ||
1529 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) | 1540 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) |
1530 | { | 1541 | { |
1542 | set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); | ||
1531 | glock_hash_walk(clear_glock, sdp); | 1543 | glock_hash_walk(clear_glock, sdp); |
1532 | flush_workqueue(glock_workqueue); | 1544 | flush_workqueue(glock_workqueue); |
1533 | wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); | 1545 | wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); |
@@ -1655,7 +1667,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) | |||
1655 | } | 1667 | } |
1656 | 1668 | ||
1657 | /** | 1669 | /** |
1658 | * __dump_glock - print information about a glock | 1670 | * gfs2_dump_glock - print information about a glock |
1659 | * @seq: The seq_file struct | 1671 | * @seq: The seq_file struct |
1660 | * @gl: the glock | 1672 | * @gl: the glock |
1661 | * | 1673 | * |
@@ -1672,7 +1684,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) | |||
1672 | * Returns: 0 on success, -ENOBUFS when we run out of space | 1684 | * Returns: 0 on success, -ENOBUFS when we run out of space |
1673 | */ | 1685 | */ |
1674 | 1686 | ||
1675 | static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) | 1687 | int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) |
1676 | { | 1688 | { |
1677 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 1689 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
1678 | unsigned long long dtime; | 1690 | unsigned long long dtime; |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 307ac31df781..fd580b7861d5 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
@@ -178,33 +178,33 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) | |||
178 | return NULL; | 178 | return NULL; |
179 | } | 179 | } |
180 | 180 | ||
181 | int gfs2_glock_get(struct gfs2_sbd *sdp, | 181 | extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, |
182 | u64 number, const struct gfs2_glock_operations *glops, | 182 | const struct gfs2_glock_operations *glops, |
183 | int create, struct gfs2_glock **glp); | 183 | int create, struct gfs2_glock **glp); |
184 | void gfs2_glock_hold(struct gfs2_glock *gl); | 184 | extern void gfs2_glock_hold(struct gfs2_glock *gl); |
185 | void gfs2_glock_put_nolock(struct gfs2_glock *gl); | 185 | extern void gfs2_glock_put_nolock(struct gfs2_glock *gl); |
186 | void gfs2_glock_put(struct gfs2_glock *gl); | 186 | extern void gfs2_glock_put(struct gfs2_glock *gl); |
187 | void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, | 187 | extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, |
188 | struct gfs2_holder *gh); | 188 | unsigned flags, struct gfs2_holder *gh); |
189 | void gfs2_holder_reinit(unsigned int state, unsigned flags, | 189 | extern void gfs2_holder_reinit(unsigned int state, unsigned flags, |
190 | struct gfs2_holder *gh); | 190 | struct gfs2_holder *gh); |
191 | void gfs2_holder_uninit(struct gfs2_holder *gh); | 191 | extern void gfs2_holder_uninit(struct gfs2_holder *gh); |
192 | int gfs2_glock_nq(struct gfs2_holder *gh); | 192 | extern int gfs2_glock_nq(struct gfs2_holder *gh); |
193 | int gfs2_glock_poll(struct gfs2_holder *gh); | 193 | extern int gfs2_glock_poll(struct gfs2_holder *gh); |
194 | int gfs2_glock_wait(struct gfs2_holder *gh); | 194 | extern int gfs2_glock_wait(struct gfs2_holder *gh); |
195 | void gfs2_glock_dq(struct gfs2_holder *gh); | 195 | extern void gfs2_glock_dq(struct gfs2_holder *gh); |
196 | void gfs2_glock_dq_wait(struct gfs2_holder *gh); | 196 | extern void gfs2_glock_dq_wait(struct gfs2_holder *gh); |
197 | 197 | extern void gfs2_glock_dq_uninit(struct gfs2_holder *gh); | |
198 | void gfs2_glock_dq_uninit(struct gfs2_holder *gh); | 198 | extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, |
199 | int gfs2_glock_nq_num(struct gfs2_sbd *sdp, | 199 | const struct gfs2_glock_operations *glops, |
200 | u64 number, const struct gfs2_glock_operations *glops, | 200 | unsigned int state, int flags, |
201 | unsigned int state, int flags, struct gfs2_holder *gh); | 201 | struct gfs2_holder *gh); |
202 | 202 | extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); | |
203 | int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); | 203 | extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); |
204 | void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); | 204 | extern void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); |
205 | void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); | 205 | extern int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); |
206 | 206 | #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0) | |
207 | __printf(2, 3) | 207 | extern __printf(2, 3) |
208 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); | 208 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); |
209 | 209 | ||
210 | /** | 210 | /** |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 32cc4fde975c..78d4184ffc7d 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -74,7 +74,7 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) | |||
74 | 74 | ||
75 | gfs2_trans_add_revoke(sdp, bd); | 75 | gfs2_trans_add_revoke(sdp, bd); |
76 | } | 76 | } |
77 | BUG_ON(!fsync && atomic_read(&gl->gl_ail_count)); | 77 | GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); |
78 | spin_unlock(&sdp->sd_ail_lock); | 78 | spin_unlock(&sdp->sd_ail_lock); |
79 | gfs2_log_unlock(sdp); | 79 | gfs2_log_unlock(sdp); |
80 | } | 80 | } |
@@ -96,7 +96,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) | |||
96 | tr.tr_ip = (unsigned long)__builtin_return_address(0); | 96 | tr.tr_ip = (unsigned long)__builtin_return_address(0); |
97 | sb_start_intwrite(sdp->sd_vfs); | 97 | sb_start_intwrite(sdp->sd_vfs); |
98 | gfs2_log_reserve(sdp, tr.tr_reserved); | 98 | gfs2_log_reserve(sdp, tr.tr_reserved); |
99 | BUG_ON(current->journal_info); | 99 | WARN_ON_ONCE(current->journal_info); |
100 | current->journal_info = &tr; | 100 | current->journal_info = &tr; |
101 | 101 | ||
102 | __gfs2_ail_flush(gl, 0); | 102 | __gfs2_ail_flush(gl, 0); |
@@ -139,7 +139,7 @@ static void rgrp_go_sync(struct gfs2_glock *gl) | |||
139 | 139 | ||
140 | if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) | 140 | if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) |
141 | return; | 141 | return; |
142 | BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE); | 142 | GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); |
143 | 143 | ||
144 | gfs2_log_flush(gl->gl_sbd, gl); | 144 | gfs2_log_flush(gl->gl_sbd, gl); |
145 | filemap_fdatawrite(metamapping); | 145 | filemap_fdatawrite(metamapping); |
@@ -168,7 +168,7 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags) | |||
168 | { | 168 | { |
169 | struct address_space *mapping = gfs2_glock2aspace(gl); | 169 | struct address_space *mapping = gfs2_glock2aspace(gl); |
170 | 170 | ||
171 | BUG_ON(!(flags & DIO_METADATA)); | 171 | WARN_ON_ONCE(!(flags & DIO_METADATA)); |
172 | gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); | 172 | gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); |
173 | truncate_inode_pages(mapping, 0); | 173 | truncate_inode_pages(mapping, 0); |
174 | 174 | ||
@@ -197,7 +197,7 @@ static void inode_go_sync(struct gfs2_glock *gl) | |||
197 | if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) | 197 | if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) |
198 | return; | 198 | return; |
199 | 199 | ||
200 | BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE); | 200 | GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); |
201 | 201 | ||
202 | gfs2_log_flush(gl->gl_sbd, gl); | 202 | gfs2_log_flush(gl->gl_sbd, gl); |
203 | filemap_fdatawrite(metamapping); | 203 | filemap_fdatawrite(metamapping); |
@@ -536,7 +536,7 @@ const struct gfs2_glock_operations gfs2_meta_glops = { | |||
536 | }; | 536 | }; |
537 | 537 | ||
538 | const struct gfs2_glock_operations gfs2_inode_glops = { | 538 | const struct gfs2_glock_operations gfs2_inode_glops = { |
539 | .go_xmote_th = inode_go_sync, | 539 | .go_sync = inode_go_sync, |
540 | .go_inval = inode_go_inval, | 540 | .go_inval = inode_go_inval, |
541 | .go_demote_ok = inode_go_demote_ok, | 541 | .go_demote_ok = inode_go_demote_ok, |
542 | .go_lock = inode_go_lock, | 542 | .go_lock = inode_go_lock, |
@@ -546,17 +546,17 @@ const struct gfs2_glock_operations gfs2_inode_glops = { | |||
546 | }; | 546 | }; |
547 | 547 | ||
548 | const struct gfs2_glock_operations gfs2_rgrp_glops = { | 548 | const struct gfs2_glock_operations gfs2_rgrp_glops = { |
549 | .go_xmote_th = rgrp_go_sync, | 549 | .go_sync = rgrp_go_sync, |
550 | .go_inval = rgrp_go_inval, | 550 | .go_inval = rgrp_go_inval, |
551 | .go_lock = gfs2_rgrp_go_lock, | 551 | .go_lock = gfs2_rgrp_go_lock, |
552 | .go_unlock = gfs2_rgrp_go_unlock, | 552 | .go_unlock = gfs2_rgrp_go_unlock, |
553 | .go_dump = gfs2_rgrp_dump, | 553 | .go_dump = gfs2_rgrp_dump, |
554 | .go_type = LM_TYPE_RGRP, | 554 | .go_type = LM_TYPE_RGRP, |
555 | .go_flags = GLOF_ASPACE, | 555 | .go_flags = GLOF_ASPACE | GLOF_LVB, |
556 | }; | 556 | }; |
557 | 557 | ||
558 | const struct gfs2_glock_operations gfs2_trans_glops = { | 558 | const struct gfs2_glock_operations gfs2_trans_glops = { |
559 | .go_xmote_th = trans_go_sync, | 559 | .go_sync = trans_go_sync, |
560 | .go_xmote_bh = trans_go_xmote_bh, | 560 | .go_xmote_bh = trans_go_xmote_bh, |
561 | .go_demote_ok = trans_go_demote_ok, | 561 | .go_demote_ok = trans_go_demote_ok, |
562 | .go_type = LM_TYPE_NONDISK, | 562 | .go_type = LM_TYPE_NONDISK, |
@@ -577,6 +577,7 @@ const struct gfs2_glock_operations gfs2_nondisk_glops = { | |||
577 | 577 | ||
578 | const struct gfs2_glock_operations gfs2_quota_glops = { | 578 | const struct gfs2_glock_operations gfs2_quota_glops = { |
579 | .go_type = LM_TYPE_QUOTA, | 579 | .go_type = LM_TYPE_QUOTA, |
580 | .go_flags = GLOF_LVB, | ||
580 | }; | 581 | }; |
581 | 582 | ||
582 | const struct gfs2_glock_operations gfs2_journal_glops = { | 583 | const struct gfs2_glock_operations gfs2_journal_glops = { |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 3d469d37345e..c373a24fedd9 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -205,7 +205,7 @@ struct lm_lockname { | |||
205 | 205 | ||
206 | 206 | ||
207 | struct gfs2_glock_operations { | 207 | struct gfs2_glock_operations { |
208 | void (*go_xmote_th) (struct gfs2_glock *gl); | 208 | void (*go_sync) (struct gfs2_glock *gl); |
209 | int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh); | 209 | int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh); |
210 | void (*go_inval) (struct gfs2_glock *gl, int flags); | 210 | void (*go_inval) (struct gfs2_glock *gl, int flags); |
211 | int (*go_demote_ok) (const struct gfs2_glock *gl); | 211 | int (*go_demote_ok) (const struct gfs2_glock *gl); |
@@ -216,6 +216,7 @@ struct gfs2_glock_operations { | |||
216 | const int go_type; | 216 | const int go_type; |
217 | const unsigned long go_flags; | 217 | const unsigned long go_flags; |
218 | #define GLOF_ASPACE 1 | 218 | #define GLOF_ASPACE 1 |
219 | #define GLOF_LVB 2 | ||
219 | }; | 220 | }; |
220 | 221 | ||
221 | enum { | 222 | enum { |
@@ -321,7 +322,6 @@ struct gfs2_glock { | |||
321 | ktime_t gl_dstamp; | 322 | ktime_t gl_dstamp; |
322 | struct gfs2_lkstats gl_stats; | 323 | struct gfs2_lkstats gl_stats; |
323 | struct dlm_lksb gl_lksb; | 324 | struct dlm_lksb gl_lksb; |
324 | char gl_lvb[32]; | ||
325 | unsigned long gl_tchange; | 325 | unsigned long gl_tchange; |
326 | void *gl_object; | 326 | void *gl_object; |
327 | 327 | ||
@@ -539,6 +539,7 @@ enum { | |||
539 | SDF_DEMOTE = 5, | 539 | SDF_DEMOTE = 5, |
540 | SDF_NOJOURNALID = 6, | 540 | SDF_NOJOURNALID = 6, |
541 | SDF_RORECOVERY = 7, /* read only recovery */ | 541 | SDF_RORECOVERY = 7, /* read only recovery */ |
542 | SDF_SKIP_DLM_UNLOCK = 8, | ||
542 | }; | 543 | }; |
543 | 544 | ||
544 | #define GFS2_FSNAME_LEN 256 | 545 | #define GFS2_FSNAME_LEN 256 |
@@ -621,6 +622,7 @@ struct gfs2_sbd { | |||
621 | u32 sd_hash_bsize_shift; | 622 | u32 sd_hash_bsize_shift; |
622 | u32 sd_hash_ptrs; /* Number of pointers in a hash block */ | 623 | u32 sd_hash_ptrs; /* Number of pointers in a hash block */ |
623 | u32 sd_qc_per_block; | 624 | u32 sd_qc_per_block; |
625 | u32 sd_blocks_per_bitmap; | ||
624 | u32 sd_max_dirres; /* Max blocks needed to add a directory entry */ | 626 | u32 sd_max_dirres; /* Max blocks needed to add a directory entry */ |
625 | u32 sd_max_height; /* Max height of a file's metadata tree */ | 627 | u32 sd_max_height; /* Max height of a file's metadata tree */ |
626 | u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1]; | 628 | u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1]; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 381893ceefa4..2b6f5698ef18 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -364,34 +364,34 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, | |||
364 | return 0; | 364 | return 0; |
365 | } | 365 | } |
366 | 366 | ||
367 | static void munge_mode_uid_gid(struct gfs2_inode *dip, umode_t *mode, | 367 | static void munge_mode_uid_gid(const struct gfs2_inode *dip, |
368 | unsigned int *uid, unsigned int *gid) | 368 | struct inode *inode) |
369 | { | 369 | { |
370 | if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && | 370 | if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && |
371 | (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { | 371 | (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { |
372 | if (S_ISDIR(*mode)) | 372 | if (S_ISDIR(inode->i_mode)) |
373 | *mode |= S_ISUID; | 373 | inode->i_mode |= S_ISUID; |
374 | else if (dip->i_inode.i_uid != current_fsuid()) | 374 | else if (dip->i_inode.i_uid != current_fsuid()) |
375 | *mode &= ~07111; | 375 | inode->i_mode &= ~07111; |
376 | *uid = dip->i_inode.i_uid; | 376 | inode->i_uid = dip->i_inode.i_uid; |
377 | } else | 377 | } else |
378 | *uid = current_fsuid(); | 378 | inode->i_uid = current_fsuid(); |
379 | 379 | ||
380 | if (dip->i_inode.i_mode & S_ISGID) { | 380 | if (dip->i_inode.i_mode & S_ISGID) { |
381 | if (S_ISDIR(*mode)) | 381 | if (S_ISDIR(inode->i_mode)) |
382 | *mode |= S_ISGID; | 382 | inode->i_mode |= S_ISGID; |
383 | *gid = dip->i_inode.i_gid; | 383 | inode->i_gid = dip->i_inode.i_gid; |
384 | } else | 384 | } else |
385 | *gid = current_fsgid(); | 385 | inode->i_gid = current_fsgid(); |
386 | } | 386 | } |
387 | 387 | ||
388 | static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) | 388 | static int alloc_dinode(struct gfs2_inode *ip, u32 flags) |
389 | { | 389 | { |
390 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 390 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
391 | int error; | 391 | int error; |
392 | int dblocks = 1; | 392 | int dblocks = 1; |
393 | 393 | ||
394 | error = gfs2_inplace_reserve(dip, RES_DINODE); | 394 | error = gfs2_inplace_reserve(ip, RES_DINODE, flags); |
395 | if (error) | 395 | if (error) |
396 | goto out; | 396 | goto out; |
397 | 397 | ||
@@ -399,12 +399,15 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) | |||
399 | if (error) | 399 | if (error) |
400 | goto out_ipreserv; | 400 | goto out_ipreserv; |
401 | 401 | ||
402 | error = gfs2_alloc_blocks(dip, no_addr, &dblocks, 1, generation); | 402 | error = gfs2_alloc_blocks(ip, &ip->i_no_addr, &dblocks, 1, &ip->i_generation); |
403 | ip->i_no_formal_ino = ip->i_generation; | ||
404 | ip->i_inode.i_ino = ip->i_no_addr; | ||
405 | ip->i_goal = ip->i_no_addr; | ||
403 | 406 | ||
404 | gfs2_trans_end(sdp); | 407 | gfs2_trans_end(sdp); |
405 | 408 | ||
406 | out_ipreserv: | 409 | out_ipreserv: |
407 | gfs2_inplace_release(dip); | 410 | gfs2_inplace_release(ip); |
408 | out: | 411 | out: |
409 | return error; | 412 | return error; |
410 | } | 413 | } |
@@ -429,52 +432,42 @@ static void gfs2_init_dir(struct buffer_head *dibh, | |||
429 | /** | 432 | /** |
430 | * init_dinode - Fill in a new dinode structure | 433 | * init_dinode - Fill in a new dinode structure |
431 | * @dip: The directory this inode is being created in | 434 | * @dip: The directory this inode is being created in |
432 | * @gl: The glock covering the new inode | 435 | * @ip: The inode |
433 | * @inum: The inode number | ||
434 | * @mode: The file permissions | ||
435 | * @uid: The uid of the new inode | ||
436 | * @gid: The gid of the new inode | ||
437 | * @generation: The generation number of the new inode | ||
438 | * @dev: The device number (if a device node) | ||
439 | * @symname: The symlink destination (if a symlink) | 436 | * @symname: The symlink destination (if a symlink) |
440 | * @size: The inode size (ignored for directories) | ||
441 | * @bhp: The buffer head (returned to caller) | 437 | * @bhp: The buffer head (returned to caller) |
442 | * | 438 | * |
443 | */ | 439 | */ |
444 | 440 | ||
445 | static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, | 441 | static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip, |
446 | const struct gfs2_inum_host *inum, umode_t mode, | 442 | const char *symname, struct buffer_head **bhp) |
447 | unsigned int uid, unsigned int gid, | ||
448 | const u64 *generation, dev_t dev, const char *symname, | ||
449 | unsigned size, struct buffer_head **bhp) | ||
450 | { | 443 | { |
451 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 444 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
452 | struct gfs2_dinode *di; | 445 | struct gfs2_dinode *di; |
453 | struct buffer_head *dibh; | 446 | struct buffer_head *dibh; |
454 | struct timespec tv = CURRENT_TIME; | 447 | struct timespec tv = CURRENT_TIME; |
455 | 448 | ||
456 | dibh = gfs2_meta_new(gl, inum->no_addr); | 449 | dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr); |
457 | gfs2_trans_add_bh(gl, dibh, 1); | 450 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
458 | gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); | 451 | gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); |
459 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | 452 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); |
460 | di = (struct gfs2_dinode *)dibh->b_data; | 453 | di = (struct gfs2_dinode *)dibh->b_data; |
461 | 454 | ||
462 | di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino); | 455 | di->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); |
463 | di->di_num.no_addr = cpu_to_be64(inum->no_addr); | 456 | di->di_num.no_addr = cpu_to_be64(ip->i_no_addr); |
464 | di->di_mode = cpu_to_be32(mode); | 457 | di->di_mode = cpu_to_be32(ip->i_inode.i_mode); |
465 | di->di_uid = cpu_to_be32(uid); | 458 | di->di_uid = cpu_to_be32(ip->i_inode.i_uid); |
466 | di->di_gid = cpu_to_be32(gid); | 459 | di->di_gid = cpu_to_be32(ip->i_inode.i_gid); |
467 | di->di_nlink = 0; | 460 | di->di_nlink = 0; |
468 | di->di_size = cpu_to_be64(size); | 461 | di->di_size = cpu_to_be64(ip->i_inode.i_size); |
469 | di->di_blocks = cpu_to_be64(1); | 462 | di->di_blocks = cpu_to_be64(1); |
470 | di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); | 463 | di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); |
471 | di->di_major = cpu_to_be32(MAJOR(dev)); | 464 | di->di_major = cpu_to_be32(MAJOR(ip->i_inode.i_rdev)); |
472 | di->di_minor = cpu_to_be32(MINOR(dev)); | 465 | di->di_minor = cpu_to_be32(MINOR(ip->i_inode.i_rdev)); |
473 | di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); | 466 | di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_no_addr); |
474 | di->di_generation = cpu_to_be64(*generation); | 467 | di->di_generation = cpu_to_be64(ip->i_generation); |
475 | di->di_flags = 0; | 468 | di->di_flags = 0; |
476 | di->__pad1 = 0; | 469 | di->__pad1 = 0; |
477 | di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); | 470 | di->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) ? GFS2_FORMAT_DE : 0); |
478 | di->di_height = 0; | 471 | di->di_height = 0; |
479 | di->__pad2 = 0; | 472 | di->__pad2 = 0; |
480 | di->__pad3 = 0; | 473 | di->__pad3 = 0; |
@@ -487,7 +480,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, | |||
487 | di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); | 480 | di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); |
488 | memset(&di->di_reserved, 0, sizeof(di->di_reserved)); | 481 | memset(&di->di_reserved, 0, sizeof(di->di_reserved)); |
489 | 482 | ||
490 | switch(mode & S_IFMT) { | 483 | switch(ip->i_inode.i_mode & S_IFMT) { |
491 | case S_IFREG: | 484 | case S_IFREG: |
492 | if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || | 485 | if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || |
493 | gfs2_tune_get(sdp, gt_new_files_jdata)) | 486 | gfs2_tune_get(sdp, gt_new_files_jdata)) |
@@ -502,7 +495,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, | |||
502 | gfs2_init_dir(dibh, dip); | 495 | gfs2_init_dir(dibh, dip); |
503 | break; | 496 | break; |
504 | case S_IFLNK: | 497 | case S_IFLNK: |
505 | memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size); | 498 | memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, ip->i_inode.i_size); |
506 | break; | 499 | break; |
507 | } | 500 | } |
508 | 501 | ||
@@ -511,25 +504,22 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, | |||
511 | *bhp = dibh; | 504 | *bhp = dibh; |
512 | } | 505 | } |
513 | 506 | ||
514 | static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, | 507 | static int make_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip, |
515 | umode_t mode, const struct gfs2_inum_host *inum, | 508 | const char *symname, struct buffer_head **bhp) |
516 | const u64 *generation, dev_t dev, const char *symname, | ||
517 | unsigned int size, struct buffer_head **bhp) | ||
518 | { | 509 | { |
510 | struct inode *inode = &ip->i_inode; | ||
519 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 511 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
520 | unsigned int uid, gid; | ||
521 | int error; | 512 | int error; |
522 | 513 | ||
523 | munge_mode_uid_gid(dip, &mode, &uid, &gid); | ||
524 | error = gfs2_rindex_update(sdp); | 514 | error = gfs2_rindex_update(sdp); |
525 | if (error) | 515 | if (error) |
526 | return error; | 516 | return error; |
527 | 517 | ||
528 | error = gfs2_quota_lock(dip, uid, gid); | 518 | error = gfs2_quota_lock(dip, inode->i_uid, inode->i_gid); |
529 | if (error) | 519 | if (error) |
530 | return error; | 520 | return error; |
531 | 521 | ||
532 | error = gfs2_quota_check(dip, uid, gid); | 522 | error = gfs2_quota_check(dip, inode->i_uid, inode->i_gid); |
533 | if (error) | 523 | if (error) |
534 | goto out_quota; | 524 | goto out_quota; |
535 | 525 | ||
@@ -537,8 +527,8 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, | |||
537 | if (error) | 527 | if (error) |
538 | goto out_quota; | 528 | goto out_quota; |
539 | 529 | ||
540 | init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, symname, size, bhp); | 530 | init_dinode(dip, ip, symname, bhp); |
541 | gfs2_quota_change(dip, +1, uid, gid); | 531 | gfs2_quota_change(dip, +1, inode->i_uid, inode->i_gid); |
542 | gfs2_trans_end(sdp); | 532 | gfs2_trans_end(sdp); |
543 | 533 | ||
544 | out_quota: | 534 | out_quota: |
@@ -570,7 +560,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | |||
570 | if (error) | 560 | if (error) |
571 | goto fail_quota_locks; | 561 | goto fail_quota_locks; |
572 | 562 | ||
573 | error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); | 563 | error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); |
574 | if (error) | 564 | if (error) |
575 | goto fail_quota_locks; | 565 | goto fail_quota_locks; |
576 | 566 | ||
@@ -657,19 +647,14 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
657 | struct inode *inode = NULL; | 647 | struct inode *inode = NULL; |
658 | struct gfs2_inode *dip = GFS2_I(dir), *ip; | 648 | struct gfs2_inode *dip = GFS2_I(dir), *ip; |
659 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 649 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
660 | struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; | 650 | struct gfs2_glock *io_gl; |
661 | int error; | 651 | int error; |
662 | u64 generation; | ||
663 | struct buffer_head *bh = NULL; | 652 | struct buffer_head *bh = NULL; |
653 | u32 aflags = 0; | ||
664 | 654 | ||
665 | if (!name->len || name->len > GFS2_FNAMESIZE) | 655 | if (!name->len || name->len > GFS2_FNAMESIZE) |
666 | return -ENAMETOOLONG; | 656 | return -ENAMETOOLONG; |
667 | 657 | ||
668 | /* We need a reservation to allocate the new dinode block. The | ||
669 | directory ip temporarily points to the reservation, but this is | ||
670 | being done to get a set of contiguous blocks for the new dinode. | ||
671 | Since this is a create, we don't have a sizehint yet, so it will | ||
672 | have to use the minimum reservation size. */ | ||
673 | error = gfs2_rs_alloc(dip); | 658 | error = gfs2_rs_alloc(dip); |
674 | if (error) | 659 | if (error) |
675 | return error; | 660 | return error; |
@@ -688,45 +673,72 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
688 | if (error) | 673 | if (error) |
689 | goto fail_gunlock; | 674 | goto fail_gunlock; |
690 | 675 | ||
691 | error = alloc_dinode(dip, &inum.no_addr, &generation); | 676 | inode = new_inode(sdp->sd_vfs); |
677 | if (!inode) { | ||
678 | gfs2_glock_dq_uninit(ghs); | ||
679 | return -ENOMEM; | ||
680 | } | ||
681 | ip = GFS2_I(inode); | ||
682 | error = gfs2_rs_alloc(ip); | ||
692 | if (error) | 683 | if (error) |
693 | goto fail_gunlock; | 684 | goto fail_free_inode; |
694 | inum.no_formal_ino = generation; | 685 | |
686 | set_bit(GIF_INVALID, &ip->i_flags); | ||
687 | inode->i_mode = mode; | ||
688 | inode->i_rdev = dev; | ||
689 | inode->i_size = size; | ||
690 | munge_mode_uid_gid(dip, inode); | ||
691 | ip->i_goal = dip->i_goal; | ||
695 | 692 | ||
696 | error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, | 693 | if ((GFS2_I(sdp->sd_root_dir->d_inode) == dip) || |
697 | LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); | 694 | (dip->i_diskflags & GFS2_DIF_TOPDIR)) |
695 | aflags |= GFS2_AF_ORLOV; | ||
696 | |||
697 | error = alloc_dinode(ip, aflags); | ||
698 | if (error) | 698 | if (error) |
699 | goto fail_gunlock; | 699 | goto fail_free_inode; |
700 | 700 | ||
701 | error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, symname, size, &bh); | 701 | error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); |
702 | if (error) | 702 | if (error) |
703 | goto fail_gunlock2; | 703 | goto fail_free_inode; |
704 | 704 | ||
705 | inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, | 705 | ip->i_gl->gl_object = ip; |
706 | inum.no_formal_ino, 0); | 706 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); |
707 | if (IS_ERR(inode)) | 707 | if (error) |
708 | goto fail_free_inode; | ||
709 | |||
710 | error = make_dinode(dip, ip, symname, &bh); | ||
711 | if (error) | ||
708 | goto fail_gunlock2; | 712 | goto fail_gunlock2; |
709 | 713 | ||
710 | ip = GFS2_I(inode); | 714 | error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl); |
711 | error = gfs2_inode_refresh(ip); | ||
712 | if (error) | 715 | if (error) |
713 | goto fail_gunlock2; | 716 | goto fail_gunlock2; |
714 | 717 | ||
715 | error = gfs2_rs_alloc(ip); | 718 | error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); |
716 | if (error) | 719 | if (error) |
717 | goto fail_gunlock2; | 720 | goto fail_gunlock2; |
718 | 721 | ||
722 | ip->i_iopen_gh.gh_gl->gl_object = ip; | ||
723 | gfs2_glock_put(io_gl); | ||
724 | gfs2_set_iop(inode); | ||
725 | insert_inode_hash(inode); | ||
726 | |||
727 | error = gfs2_inode_refresh(ip); | ||
728 | if (error) | ||
729 | goto fail_gunlock3; | ||
730 | |||
719 | error = gfs2_acl_create(dip, inode); | 731 | error = gfs2_acl_create(dip, inode); |
720 | if (error) | 732 | if (error) |
721 | goto fail_gunlock2; | 733 | goto fail_gunlock3; |
722 | 734 | ||
723 | error = gfs2_security_init(dip, ip, name); | 735 | error = gfs2_security_init(dip, ip, name); |
724 | if (error) | 736 | if (error) |
725 | goto fail_gunlock2; | 737 | goto fail_gunlock3; |
726 | 738 | ||
727 | error = link_dinode(dip, name, ip); | 739 | error = link_dinode(dip, name, ip); |
728 | if (error) | 740 | if (error) |
729 | goto fail_gunlock2; | 741 | goto fail_gunlock3; |
730 | 742 | ||
731 | if (bh) | 743 | if (bh) |
732 | brelse(bh); | 744 | brelse(bh); |
@@ -739,8 +751,20 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
739 | d_instantiate(dentry, inode); | 751 | d_instantiate(dentry, inode); |
740 | return 0; | 752 | return 0; |
741 | 753 | ||
754 | fail_gunlock3: | ||
755 | gfs2_glock_dq_uninit(ghs + 1); | ||
756 | if (ip->i_gl) | ||
757 | gfs2_glock_put(ip->i_gl); | ||
758 | goto fail_gunlock; | ||
759 | |||
742 | fail_gunlock2: | 760 | fail_gunlock2: |
743 | gfs2_glock_dq_uninit(ghs + 1); | 761 | gfs2_glock_dq_uninit(ghs + 1); |
762 | fail_free_inode: | ||
763 | if (ip->i_gl) | ||
764 | gfs2_glock_put(ip->i_gl); | ||
765 | gfs2_rs_delete(ip); | ||
766 | free_inode_nonrcu(inode); | ||
767 | inode = NULL; | ||
744 | fail_gunlock: | 768 | fail_gunlock: |
745 | gfs2_glock_dq_uninit(ghs); | 769 | gfs2_glock_dq_uninit(ghs); |
746 | if (inode && !IS_ERR(inode)) { | 770 | if (inode && !IS_ERR(inode)) { |
@@ -748,7 +772,6 @@ fail_gunlock: | |||
748 | iput(inode); | 772 | iput(inode); |
749 | } | 773 | } |
750 | fail: | 774 | fail: |
751 | gfs2_rs_delete(dip); | ||
752 | if (bh) | 775 | if (bh) |
753 | brelse(bh); | 776 | brelse(bh); |
754 | return error; | 777 | return error; |
@@ -884,7 +907,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
884 | if (error) | 907 | if (error) |
885 | goto out_gunlock; | 908 | goto out_gunlock; |
886 | 909 | ||
887 | error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); | 910 | error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); |
888 | if (error) | 911 | if (error) |
889 | goto out_gunlock_q; | 912 | goto out_gunlock_q; |
890 | 913 | ||
@@ -977,7 +1000,6 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | |||
977 | * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it | 1000 | * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it |
978 | * @dip: The parent directory | 1001 | * @dip: The parent directory |
979 | * @name: The name of the entry in the parent directory | 1002 | * @name: The name of the entry in the parent directory |
980 | * @bh: The inode buffer for the inode to be removed | ||
981 | * @inode: The inode to be removed | 1003 | * @inode: The inode to be removed |
982 | * | 1004 | * |
983 | * Called with all the locks and in a transaction. This will only be | 1005 | * Called with all the locks and in a transaction. This will only be |
@@ -987,8 +1009,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | |||
987 | */ | 1009 | */ |
988 | 1010 | ||
989 | static int gfs2_unlink_inode(struct gfs2_inode *dip, | 1011 | static int gfs2_unlink_inode(struct gfs2_inode *dip, |
990 | const struct dentry *dentry, | 1012 | const struct dentry *dentry) |
991 | struct buffer_head *bh) | ||
992 | { | 1013 | { |
993 | struct inode *inode = dentry->d_inode; | 1014 | struct inode *inode = dentry->d_inode; |
994 | struct gfs2_inode *ip = GFS2_I(inode); | 1015 | struct gfs2_inode *ip = GFS2_I(inode); |
@@ -1028,7 +1049,6 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
1028 | struct gfs2_sbd *sdp = GFS2_SB(dir); | 1049 | struct gfs2_sbd *sdp = GFS2_SB(dir); |
1029 | struct inode *inode = dentry->d_inode; | 1050 | struct inode *inode = dentry->d_inode; |
1030 | struct gfs2_inode *ip = GFS2_I(inode); | 1051 | struct gfs2_inode *ip = GFS2_I(inode); |
1031 | struct buffer_head *bh; | ||
1032 | struct gfs2_holder ghs[3]; | 1052 | struct gfs2_holder ghs[3]; |
1033 | struct gfs2_rgrpd *rgd; | 1053 | struct gfs2_rgrpd *rgd; |
1034 | int error; | 1054 | int error; |
@@ -1077,14 +1097,9 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
1077 | 1097 | ||
1078 | error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0); | 1098 | error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0); |
1079 | if (error) | 1099 | if (error) |
1080 | goto out_gunlock; | ||
1081 | |||
1082 | error = gfs2_meta_inode_buffer(ip, &bh); | ||
1083 | if (error) | ||
1084 | goto out_end_trans; | 1100 | goto out_end_trans; |
1085 | 1101 | ||
1086 | error = gfs2_unlink_inode(dip, dentry, bh); | 1102 | error = gfs2_unlink_inode(dip, dentry); |
1087 | brelse(bh); | ||
1088 | 1103 | ||
1089 | out_end_trans: | 1104 | out_end_trans: |
1090 | gfs2_trans_end(sdp); | 1105 | gfs2_trans_end(sdp); |
@@ -1365,7 +1380,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
1365 | if (error) | 1380 | if (error) |
1366 | goto out_gunlock; | 1381 | goto out_gunlock; |
1367 | 1382 | ||
1368 | error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres); | 1383 | error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres, 0); |
1369 | if (error) | 1384 | if (error) |
1370 | goto out_gunlock_q; | 1385 | goto out_gunlock_q; |
1371 | 1386 | ||
@@ -1384,14 +1399,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
1384 | 1399 | ||
1385 | /* Remove the target file, if it exists */ | 1400 | /* Remove the target file, if it exists */ |
1386 | 1401 | ||
1387 | if (nip) { | 1402 | if (nip) |
1388 | struct buffer_head *bh; | 1403 | error = gfs2_unlink_inode(ndip, ndentry); |
1389 | error = gfs2_meta_inode_buffer(nip, &bh); | ||
1390 | if (error) | ||
1391 | goto out_end_trans; | ||
1392 | error = gfs2_unlink_inode(ndip, ndentry, bh); | ||
1393 | brelse(bh); | ||
1394 | } | ||
1395 | 1404 | ||
1396 | if (dir_rename) { | 1405 | if (dir_rename) { |
1397 | error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR); | 1406 | error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR); |
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 0fb6539b0c8c..8dad6b093716 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c | |||
@@ -120,8 +120,8 @@ static void gdlm_ast(void *arg) | |||
120 | gfs2_update_reply_times(gl); | 120 | gfs2_update_reply_times(gl); |
121 | BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); | 121 | BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); |
122 | 122 | ||
123 | if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) | 123 | if ((gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) && gl->gl_lksb.sb_lvbptr) |
124 | memset(gl->gl_lvb, 0, GDLM_LVB_SIZE); | 124 | memset(gl->gl_lksb.sb_lvbptr, 0, GDLM_LVB_SIZE); |
125 | 125 | ||
126 | switch (gl->gl_lksb.sb_status) { | 126 | switch (gl->gl_lksb.sb_status) { |
127 | case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ | 127 | case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ |
@@ -203,8 +203,10 @@ static int make_mode(const unsigned int lmstate) | |||
203 | static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, | 203 | static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, |
204 | const int req) | 204 | const int req) |
205 | { | 205 | { |
206 | u32 lkf = DLM_LKF_VALBLK; | 206 | u32 lkf = 0; |
207 | u32 lkid = gl->gl_lksb.sb_lkid; | 207 | |
208 | if (gl->gl_lksb.sb_lvbptr) | ||
209 | lkf |= DLM_LKF_VALBLK; | ||
208 | 210 | ||
209 | if (gfs_flags & LM_FLAG_TRY) | 211 | if (gfs_flags & LM_FLAG_TRY) |
210 | lkf |= DLM_LKF_NOQUEUE; | 212 | lkf |= DLM_LKF_NOQUEUE; |
@@ -228,7 +230,7 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, | |||
228 | BUG(); | 230 | BUG(); |
229 | } | 231 | } |
230 | 232 | ||
231 | if (lkid != 0) { | 233 | if (gl->gl_lksb.sb_lkid != 0) { |
232 | lkf |= DLM_LKF_CONVERT; | 234 | lkf |= DLM_LKF_CONVERT; |
233 | if (test_bit(GLF_BLOCKING, &gl->gl_flags)) | 235 | if (test_bit(GLF_BLOCKING, &gl->gl_flags)) |
234 | lkf |= DLM_LKF_QUECVT; | 236 | lkf |= DLM_LKF_QUECVT; |
@@ -289,6 +291,14 @@ static void gdlm_put_lock(struct gfs2_glock *gl) | |||
289 | gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); | 291 | gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); |
290 | gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); | 292 | gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); |
291 | gfs2_update_request_times(gl); | 293 | gfs2_update_request_times(gl); |
294 | |||
295 | /* don't want to skip dlm_unlock writing the lvb when lock is ex */ | ||
296 | if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && | ||
297 | gl->gl_lksb.sb_lvbptr && (gl->gl_state != LM_ST_EXCLUSIVE)) { | ||
298 | gfs2_glock_free(gl); | ||
299 | return; | ||
300 | } | ||
301 | |||
292 | error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, | 302 | error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, |
293 | NULL, gl); | 303 | NULL, gl); |
294 | if (error) { | 304 | if (error) { |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e443966c8106..0e3554edb8f2 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -278,6 +278,9 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent) | |||
278 | sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - | 278 | sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - |
279 | sizeof(struct gfs2_meta_header)) / | 279 | sizeof(struct gfs2_meta_header)) / |
280 | sizeof(struct gfs2_quota_change); | 280 | sizeof(struct gfs2_quota_change); |
281 | sdp->sd_blocks_per_bitmap = (sdp->sd_sb.sb_bsize - | ||
282 | sizeof(struct gfs2_meta_header)) | ||
283 | * GFS2_NBBY; /* not the rgrp bitmap, subsequent bitmaps only */ | ||
281 | 284 | ||
282 | /* Compute maximum reservation required to add a entry to a directory */ | 285 | /* Compute maximum reservation required to add a entry to a directory */ |
283 | 286 | ||
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index c5af8e18f27a..ae55e248c3b7 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -816,7 +816,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
816 | blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; | 816 | blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; |
817 | 817 | ||
818 | reserved = 1 + (nalloc * (data_blocks + ind_blocks)); | 818 | reserved = 1 + (nalloc * (data_blocks + ind_blocks)); |
819 | error = gfs2_inplace_reserve(ip, reserved); | 819 | error = gfs2_inplace_reserve(ip, reserved, 0); |
820 | if (error) | 820 | if (error) |
821 | goto out_alloc; | 821 | goto out_alloc; |
822 | 822 | ||
@@ -869,7 +869,7 @@ static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd) | |||
869 | if (error < 0) | 869 | if (error < 0) |
870 | return error; | 870 | return error; |
871 | 871 | ||
872 | qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; | 872 | qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; |
873 | qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); | 873 | qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); |
874 | qlvb->__pad = 0; | 874 | qlvb->__pad = 0; |
875 | qlvb->qb_limit = q.qu_limit; | 875 | qlvb->qb_limit = q.qu_limit; |
@@ -893,7 +893,7 @@ restart: | |||
893 | if (error) | 893 | if (error) |
894 | return error; | 894 | return error; |
895 | 895 | ||
896 | qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; | 896 | qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; |
897 | 897 | ||
898 | if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { | 898 | if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { |
899 | gfs2_glock_dq_uninit(q_gh); | 899 | gfs2_glock_dq_uninit(q_gh); |
@@ -1506,7 +1506,7 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid, | |||
1506 | if (error) | 1506 | if (error) |
1507 | goto out; | 1507 | goto out; |
1508 | 1508 | ||
1509 | qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; | 1509 | qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; |
1510 | fdq->d_version = FS_DQUOT_VERSION; | 1510 | fdq->d_version = FS_DQUOT_VERSION; |
1511 | fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; | 1511 | fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; |
1512 | fdq->d_id = from_kqid(&init_user_ns, qid); | 1512 | fdq->d_id = from_kqid(&init_user_ns, qid); |
@@ -1605,7 +1605,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, | |||
1605 | gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), | 1605 | gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), |
1606 | &data_blocks, &ind_blocks); | 1606 | &data_blocks, &ind_blocks); |
1607 | blocks = 1 + data_blocks + ind_blocks; | 1607 | blocks = 1 + data_blocks + ind_blocks; |
1608 | error = gfs2_inplace_reserve(ip, blocks); | 1608 | error = gfs2_inplace_reserve(ip, blocks, 0); |
1609 | if (error) | 1609 | if (error) |
1610 | goto out_i; | 1610 | goto out_i; |
1611 | blocks += gfs2_rg_blocks(ip, blocks); | 1611 | blocks += gfs2_rg_blocks(ip, blocks); |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 38fe18f2f055..37ee061d899e 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/prefetch.h> | 16 | #include <linux/prefetch.h> |
17 | #include <linux/blkdev.h> | 17 | #include <linux/blkdev.h> |
18 | #include <linux/rbtree.h> | 18 | #include <linux/rbtree.h> |
19 | #include <linux/random.h> | ||
19 | 20 | ||
20 | #include "gfs2.h" | 21 | #include "gfs2.h" |
21 | #include "incore.h" | 22 | #include "incore.h" |
@@ -251,22 +252,25 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, | |||
251 | static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) | 252 | static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) |
252 | { | 253 | { |
253 | u64 rblock = block - rbm->rgd->rd_data0; | 254 | u64 rblock = block - rbm->rgd->rd_data0; |
254 | u32 goal = (u32)rblock; | 255 | u32 x; |
255 | int x; | ||
256 | 256 | ||
257 | if (WARN_ON_ONCE(rblock > UINT_MAX)) | 257 | if (WARN_ON_ONCE(rblock > UINT_MAX)) |
258 | return -EINVAL; | 258 | return -EINVAL; |
259 | if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) | 259 | if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) |
260 | return -E2BIG; | 260 | return -E2BIG; |
261 | 261 | ||
262 | for (x = 0; x < rbm->rgd->rd_length; x++) { | 262 | rbm->bi = rbm->rgd->rd_bits; |
263 | rbm->bi = rbm->rgd->rd_bits + x; | 263 | rbm->offset = (u32)(rblock); |
264 | if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { | 264 | /* Check if the block is within the first block */ |
265 | rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); | 265 | if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) |
266 | break; | 266 | return 0; |
267 | } | ||
268 | } | ||
269 | 267 | ||
268 | /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */ | ||
269 | rbm->offset += (sizeof(struct gfs2_rgrp) - | ||
270 | sizeof(struct gfs2_meta_header)) * GFS2_NBBY; | ||
271 | x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; | ||
272 | rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; | ||
273 | rbm->bi += x; | ||
270 | return 0; | 274 | return 0; |
271 | } | 275 | } |
272 | 276 | ||
@@ -875,7 +879,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) | |||
875 | goto fail; | 879 | goto fail; |
876 | 880 | ||
877 | rgd->rd_gl->gl_object = rgd; | 881 | rgd->rd_gl->gl_object = rgd; |
878 | rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; | 882 | rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; |
879 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; | 883 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; |
880 | if (rgd->rd_data > sdp->sd_max_rg_data) | 884 | if (rgd->rd_data > sdp->sd_max_rg_data) |
881 | sdp->sd_max_rg_data = rgd->rd_data; | 885 | sdp->sd_max_rg_data = rgd->rd_data; |
@@ -1678,13 +1682,105 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip | |||
1678 | return; | 1682 | return; |
1679 | } | 1683 | } |
1680 | 1684 | ||
1685 | /** | ||
1686 | * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested | ||
1687 | * @rgd: The rgrp in question | ||
1688 | * @loops: An indication of how picky we can be (0=very, 1=less so) | ||
1689 | * | ||
1690 | * This function uses the recently added glock statistics in order to | ||
1691 | * figure out whether a parciular resource group is suffering from | ||
1692 | * contention from multiple nodes. This is done purely on the basis | ||
1693 | * of timings, since this is the only data we have to work with and | ||
1694 | * our aim here is to reject a resource group which is highly contended | ||
1695 | * but (very important) not to do this too often in order to ensure that | ||
1696 | * we do not land up introducing fragmentation by changing resource | ||
1697 | * groups when not actually required. | ||
1698 | * | ||
1699 | * The calculation is fairly simple, we want to know whether the SRTTB | ||
1700 | * (i.e. smoothed round trip time for blocking operations) to acquire | ||
1701 | * the lock for this rgrp's glock is significantly greater than the | ||
1702 | * time taken for resource groups on average. We introduce a margin in | ||
1703 | * the form of the variable @var which is computed as the sum of the two | ||
1704 | * respective variences, and multiplied by a factor depending on @loops | ||
1705 | * and whether we have a lot of data to base the decision on. This is | ||
1706 | * then tested against the square difference of the means in order to | ||
1707 | * decide whether the result is statistically significant or not. | ||
1708 | * | ||
1709 | * Returns: A boolean verdict on the congestion status | ||
1710 | */ | ||
1711 | |||
1712 | static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) | ||
1713 | { | ||
1714 | const struct gfs2_glock *gl = rgd->rd_gl; | ||
1715 | const struct gfs2_sbd *sdp = gl->gl_sbd; | ||
1716 | struct gfs2_lkstats *st; | ||
1717 | s64 r_dcount, l_dcount; | ||
1718 | s64 r_srttb, l_srttb; | ||
1719 | s64 srttb_diff; | ||
1720 | s64 sqr_diff; | ||
1721 | s64 var; | ||
1722 | |||
1723 | preempt_disable(); | ||
1724 | st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP]; | ||
1725 | r_srttb = st->stats[GFS2_LKS_SRTTB]; | ||
1726 | r_dcount = st->stats[GFS2_LKS_DCOUNT]; | ||
1727 | var = st->stats[GFS2_LKS_SRTTVARB] + | ||
1728 | gl->gl_stats.stats[GFS2_LKS_SRTTVARB]; | ||
1729 | preempt_enable(); | ||
1730 | |||
1731 | l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB]; | ||
1732 | l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT]; | ||
1733 | |||
1734 | if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0)) | ||
1735 | return false; | ||
1736 | |||
1737 | srttb_diff = r_srttb - l_srttb; | ||
1738 | sqr_diff = srttb_diff * srttb_diff; | ||
1739 | |||
1740 | var *= 2; | ||
1741 | if (l_dcount < 8 || r_dcount < 8) | ||
1742 | var *= 2; | ||
1743 | if (loops == 1) | ||
1744 | var *= 2; | ||
1745 | |||
1746 | return ((srttb_diff < 0) && (sqr_diff > var)); | ||
1747 | } | ||
1748 | |||
1749 | /** | ||
1750 | * gfs2_rgrp_used_recently | ||
1751 | * @rs: The block reservation with the rgrp to test | ||
1752 | * @msecs: The time limit in milliseconds | ||
1753 | * | ||
1754 | * Returns: True if the rgrp glock has been used within the time limit | ||
1755 | */ | ||
1756 | static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, | ||
1757 | u64 msecs) | ||
1758 | { | ||
1759 | u64 tdiff; | ||
1760 | |||
1761 | tdiff = ktime_to_ns(ktime_sub(ktime_get_real(), | ||
1762 | rs->rs_rbm.rgd->rd_gl->gl_dstamp)); | ||
1763 | |||
1764 | return tdiff > (msecs * 1000 * 1000); | ||
1765 | } | ||
1766 | |||
1767 | static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) | ||
1768 | { | ||
1769 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
1770 | u32 skip; | ||
1771 | |||
1772 | get_random_bytes(&skip, sizeof(skip)); | ||
1773 | return skip % sdp->sd_rgrps; | ||
1774 | } | ||
1775 | |||
1681 | static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) | 1776 | static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) |
1682 | { | 1777 | { |
1683 | struct gfs2_rgrpd *rgd = *pos; | 1778 | struct gfs2_rgrpd *rgd = *pos; |
1779 | struct gfs2_sbd *sdp = rgd->rd_sbd; | ||
1684 | 1780 | ||
1685 | rgd = gfs2_rgrpd_get_next(rgd); | 1781 | rgd = gfs2_rgrpd_get_next(rgd); |
1686 | if (rgd == NULL) | 1782 | if (rgd == NULL) |
1687 | rgd = gfs2_rgrpd_get_next(NULL); | 1783 | rgd = gfs2_rgrpd_get_first(sdp); |
1688 | *pos = rgd; | 1784 | *pos = rgd; |
1689 | if (rgd != begin) /* If we didn't wrap */ | 1785 | if (rgd != begin) /* If we didn't wrap */ |
1690 | return true; | 1786 | return true; |
@@ -1699,14 +1795,15 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b | |||
1699 | * Returns: errno | 1795 | * Returns: errno |
1700 | */ | 1796 | */ |
1701 | 1797 | ||
1702 | int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) | 1798 | int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) |
1703 | { | 1799 | { |
1704 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1800 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1705 | struct gfs2_rgrpd *begin = NULL; | 1801 | struct gfs2_rgrpd *begin = NULL; |
1706 | struct gfs2_blkreserv *rs = ip->i_res; | 1802 | struct gfs2_blkreserv *rs = ip->i_res; |
1707 | int error = 0, rg_locked, flags = LM_FLAG_TRY; | 1803 | int error = 0, rg_locked, flags = 0; |
1708 | u64 last_unlinked = NO_BLOCK; | 1804 | u64 last_unlinked = NO_BLOCK; |
1709 | int loops = 0; | 1805 | int loops = 0; |
1806 | u32 skip = 0; | ||
1710 | 1807 | ||
1711 | if (sdp->sd_args.ar_rgrplvb) | 1808 | if (sdp->sd_args.ar_rgrplvb) |
1712 | flags |= GL_SKIP; | 1809 | flags |= GL_SKIP; |
@@ -1720,6 +1817,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) | |||
1720 | } else { | 1817 | } else { |
1721 | rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); | 1818 | rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); |
1722 | } | 1819 | } |
1820 | if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV)) | ||
1821 | skip = gfs2_orlov_skip(ip); | ||
1723 | if (rs->rs_rbm.rgd == NULL) | 1822 | if (rs->rs_rbm.rgd == NULL) |
1724 | return -EBADSLT; | 1823 | return -EBADSLT; |
1725 | 1824 | ||
@@ -1728,13 +1827,20 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) | |||
1728 | 1827 | ||
1729 | if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { | 1828 | if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { |
1730 | rg_locked = 0; | 1829 | rg_locked = 0; |
1830 | if (skip && skip--) | ||
1831 | goto next_rgrp; | ||
1832 | if (!gfs2_rs_active(rs) && (loops < 2) && | ||
1833 | gfs2_rgrp_used_recently(rs, 1000) && | ||
1834 | gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) | ||
1835 | goto next_rgrp; | ||
1731 | error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, | 1836 | error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, |
1732 | LM_ST_EXCLUSIVE, flags, | 1837 | LM_ST_EXCLUSIVE, flags, |
1733 | &rs->rs_rgd_gh); | 1838 | &rs->rs_rgd_gh); |
1734 | if (error == GLR_TRYFAILED) | ||
1735 | goto next_rgrp; | ||
1736 | if (unlikely(error)) | 1839 | if (unlikely(error)) |
1737 | return error; | 1840 | return error; |
1841 | if (!gfs2_rs_active(rs) && (loops < 2) && | ||
1842 | gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) | ||
1843 | goto skip_rgrp; | ||
1738 | if (sdp->sd_args.ar_rgrplvb) { | 1844 | if (sdp->sd_args.ar_rgrplvb) { |
1739 | error = update_rgrp_lvb(rs->rs_rbm.rgd); | 1845 | error = update_rgrp_lvb(rs->rs_rbm.rgd); |
1740 | if (unlikely(error)) { | 1846 | if (unlikely(error)) { |
@@ -1781,12 +1887,13 @@ next_rgrp: | |||
1781 | /* Find the next rgrp, and continue looking */ | 1887 | /* Find the next rgrp, and continue looking */ |
1782 | if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) | 1888 | if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) |
1783 | continue; | 1889 | continue; |
1890 | if (skip) | ||
1891 | continue; | ||
1784 | 1892 | ||
1785 | /* If we've scanned all the rgrps, but found no free blocks | 1893 | /* If we've scanned all the rgrps, but found no free blocks |
1786 | * then this checks for some less likely conditions before | 1894 | * then this checks for some less likely conditions before |
1787 | * trying again. | 1895 | * trying again. |
1788 | */ | 1896 | */ |
1789 | flags &= ~LM_FLAG_TRY; | ||
1790 | loops++; | 1897 | loops++; |
1791 | /* Check that fs hasn't grown if writing to rindex */ | 1898 | /* Check that fs hasn't grown if writing to rindex */ |
1792 | if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { | 1899 | if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { |
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 24077958dcf6..842185853f6b 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
@@ -39,7 +39,8 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); | |||
39 | 39 | ||
40 | extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); | 40 | extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); |
41 | 41 | ||
42 | extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested); | 42 | #define GFS2_AF_ORLOV 1 |
43 | extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 flags); | ||
43 | extern void gfs2_inplace_release(struct gfs2_inode *ip); | 44 | extern void gfs2_inplace_release(struct gfs2_inode *ip); |
44 | 45 | ||
45 | extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, | 46 | extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, |
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index bbdc78af60ca..2ee13e841e9f 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h | |||
@@ -486,7 +486,7 @@ TRACE_EVENT(gfs2_block_alloc, | |||
486 | ), | 486 | ), |
487 | 487 | ||
488 | TP_fast_assign( | 488 | TP_fast_assign( |
489 | __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev; | 489 | __entry->dev = rgd->rd_gl->gl_sbd->sd_vfs->s_dev; |
490 | __entry->start = block; | 490 | __entry->start = block; |
491 | __entry->inum = ip->i_no_addr; | 491 | __entry->inum = ip->i_no_addr; |
492 | __entry->len = len; | 492 | __entry->len = len; |
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index db330e5518cd..76c144b3c9bb 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c | |||
@@ -734,7 +734,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
734 | if (error) | 734 | if (error) |
735 | return error; | 735 | return error; |
736 | 736 | ||
737 | error = gfs2_inplace_reserve(ip, blks); | 737 | error = gfs2_inplace_reserve(ip, blks, 0); |
738 | if (error) | 738 | if (error) |
739 | goto out_gunlock_q; | 739 | goto out_gunlock_q; |
740 | 740 | ||
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c5bc355d8243..78bde32ea951 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * hugetlbpage-backed filesystem. Based on ramfs. | 2 | * hugetlbpage-backed filesystem. Based on ramfs. |
3 | * | 3 | * |
4 | * William Irwin, 2002 | 4 | * Nadia Yvette Chambers, 2002 |
5 | * | 5 | * |
6 | * Copyright (C) 2002 Linus Torvalds. | 6 | * Copyright (C) 2002 Linus Torvalds. |
7 | */ | 7 | */ |
@@ -151,8 +151,8 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
151 | { | 151 | { |
152 | struct mm_struct *mm = current->mm; | 152 | struct mm_struct *mm = current->mm; |
153 | struct vm_area_struct *vma; | 153 | struct vm_area_struct *vma; |
154 | unsigned long start_addr; | ||
155 | struct hstate *h = hstate_file(file); | 154 | struct hstate *h = hstate_file(file); |
155 | struct vm_unmapped_area_info info; | ||
156 | 156 | ||
157 | if (len & ~huge_page_mask(h)) | 157 | if (len & ~huge_page_mask(h)) |
158 | return -EINVAL; | 158 | return -EINVAL; |
@@ -173,39 +173,13 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
173 | return addr; | 173 | return addr; |
174 | } | 174 | } |
175 | 175 | ||
176 | if (len > mm->cached_hole_size) | 176 | info.flags = 0; |
177 | start_addr = mm->free_area_cache; | 177 | info.length = len; |
178 | else { | 178 | info.low_limit = TASK_UNMAPPED_BASE; |
179 | start_addr = TASK_UNMAPPED_BASE; | 179 | info.high_limit = TASK_SIZE; |
180 | mm->cached_hole_size = 0; | 180 | info.align_mask = PAGE_MASK & ~huge_page_mask(h); |
181 | } | 181 | info.align_offset = 0; |
182 | 182 | return vm_unmapped_area(&info); | |
183 | full_search: | ||
184 | addr = ALIGN(start_addr, huge_page_size(h)); | ||
185 | |||
186 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { | ||
187 | /* At this point: (!vma || addr < vma->vm_end). */ | ||
188 | if (TASK_SIZE - len < addr) { | ||
189 | /* | ||
190 | * Start a new search - just in case we missed | ||
191 | * some holes. | ||
192 | */ | ||
193 | if (start_addr != TASK_UNMAPPED_BASE) { | ||
194 | start_addr = TASK_UNMAPPED_BASE; | ||
195 | mm->cached_hole_size = 0; | ||
196 | goto full_search; | ||
197 | } | ||
198 | return -ENOMEM; | ||
199 | } | ||
200 | |||
201 | if (!vma || addr + len <= vma->vm_start) { | ||
202 | mm->free_area_cache = addr + len; | ||
203 | return addr; | ||
204 | } | ||
205 | if (addr + mm->cached_hole_size < vma->vm_start) | ||
206 | mm->cached_hole_size = vma->vm_start - addr; | ||
207 | addr = ALIGN(vma->vm_end, huge_page_size(h)); | ||
208 | } | ||
209 | } | 183 | } |
210 | #endif | 184 | #endif |
211 | 185 | ||
@@ -608,11 +582,11 @@ static int hugetlbfs_migrate_page(struct address_space *mapping, | |||
608 | int rc; | 582 | int rc; |
609 | 583 | ||
610 | rc = migrate_huge_page_move_mapping(mapping, newpage, page); | 584 | rc = migrate_huge_page_move_mapping(mapping, newpage, page); |
611 | if (rc) | 585 | if (rc != MIGRATEPAGE_SUCCESS) |
612 | return rc; | 586 | return rc; |
613 | migrate_page_copy(newpage, page); | 587 | migrate_page_copy(newpage, page); |
614 | 588 | ||
615 | return 0; | 589 | return MIGRATEPAGE_SUCCESS; |
616 | } | 590 | } |
617 | 591 | ||
618 | static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 592 | static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
@@ -923,7 +897,7 @@ static struct file_system_type hugetlbfs_fs_type = { | |||
923 | .kill_sb = kill_litter_super, | 897 | .kill_sb = kill_litter_super, |
924 | }; | 898 | }; |
925 | 899 | ||
926 | static struct vfsmount *hugetlbfs_vfsmount; | 900 | static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; |
927 | 901 | ||
928 | static int can_do_hugetlb_shm(void) | 902 | static int can_do_hugetlb_shm(void) |
929 | { | 903 | { |
@@ -932,9 +906,22 @@ static int can_do_hugetlb_shm(void) | |||
932 | return capable(CAP_IPC_LOCK) || in_group_p(shm_group); | 906 | return capable(CAP_IPC_LOCK) || in_group_p(shm_group); |
933 | } | 907 | } |
934 | 908 | ||
909 | static int get_hstate_idx(int page_size_log) | ||
910 | { | ||
911 | struct hstate *h; | ||
912 | |||
913 | if (!page_size_log) | ||
914 | return default_hstate_idx; | ||
915 | h = size_to_hstate(1 << page_size_log); | ||
916 | if (!h) | ||
917 | return -1; | ||
918 | return h - hstates; | ||
919 | } | ||
920 | |||
935 | struct file *hugetlb_file_setup(const char *name, unsigned long addr, | 921 | struct file *hugetlb_file_setup(const char *name, unsigned long addr, |
936 | size_t size, vm_flags_t acctflag, | 922 | size_t size, vm_flags_t acctflag, |
937 | struct user_struct **user, int creat_flags) | 923 | struct user_struct **user, |
924 | int creat_flags, int page_size_log) | ||
938 | { | 925 | { |
939 | int error = -ENOMEM; | 926 | int error = -ENOMEM; |
940 | struct file *file; | 927 | struct file *file; |
@@ -944,9 +931,14 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr, | |||
944 | struct qstr quick_string; | 931 | struct qstr quick_string; |
945 | struct hstate *hstate; | 932 | struct hstate *hstate; |
946 | unsigned long num_pages; | 933 | unsigned long num_pages; |
934 | int hstate_idx; | ||
935 | |||
936 | hstate_idx = get_hstate_idx(page_size_log); | ||
937 | if (hstate_idx < 0) | ||
938 | return ERR_PTR(-ENODEV); | ||
947 | 939 | ||
948 | *user = NULL; | 940 | *user = NULL; |
949 | if (!hugetlbfs_vfsmount) | 941 | if (!hugetlbfs_vfsmount[hstate_idx]) |
950 | return ERR_PTR(-ENOENT); | 942 | return ERR_PTR(-ENOENT); |
951 | 943 | ||
952 | if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { | 944 | if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { |
@@ -963,7 +955,7 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr, | |||
963 | } | 955 | } |
964 | } | 956 | } |
965 | 957 | ||
966 | root = hugetlbfs_vfsmount->mnt_root; | 958 | root = hugetlbfs_vfsmount[hstate_idx]->mnt_root; |
967 | quick_string.name = name; | 959 | quick_string.name = name; |
968 | quick_string.len = strlen(quick_string.name); | 960 | quick_string.len = strlen(quick_string.name); |
969 | quick_string.hash = 0; | 961 | quick_string.hash = 0; |
@@ -971,7 +963,7 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr, | |||
971 | if (!path.dentry) | 963 | if (!path.dentry) |
972 | goto out_shm_unlock; | 964 | goto out_shm_unlock; |
973 | 965 | ||
974 | path.mnt = mntget(hugetlbfs_vfsmount); | 966 | path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]); |
975 | error = -ENOSPC; | 967 | error = -ENOSPC; |
976 | inode = hugetlbfs_get_inode(root->d_sb, NULL, S_IFREG | S_IRWXUGO, 0); | 968 | inode = hugetlbfs_get_inode(root->d_sb, NULL, S_IFREG | S_IRWXUGO, 0); |
977 | if (!inode) | 969 | if (!inode) |
@@ -1011,8 +1003,9 @@ out_shm_unlock: | |||
1011 | 1003 | ||
1012 | static int __init init_hugetlbfs_fs(void) | 1004 | static int __init init_hugetlbfs_fs(void) |
1013 | { | 1005 | { |
1006 | struct hstate *h; | ||
1014 | int error; | 1007 | int error; |
1015 | struct vfsmount *vfsmount; | 1008 | int i; |
1016 | 1009 | ||
1017 | error = bdi_init(&hugetlbfs_backing_dev_info); | 1010 | error = bdi_init(&hugetlbfs_backing_dev_info); |
1018 | if (error) | 1011 | if (error) |
@@ -1029,14 +1022,26 @@ static int __init init_hugetlbfs_fs(void) | |||
1029 | if (error) | 1022 | if (error) |
1030 | goto out; | 1023 | goto out; |
1031 | 1024 | ||
1032 | vfsmount = kern_mount(&hugetlbfs_fs_type); | 1025 | i = 0; |
1026 | for_each_hstate(h) { | ||
1027 | char buf[50]; | ||
1028 | unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10); | ||
1033 | 1029 | ||
1034 | if (!IS_ERR(vfsmount)) { | 1030 | snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb); |
1035 | hugetlbfs_vfsmount = vfsmount; | 1031 | hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type, |
1036 | return 0; | 1032 | buf); |
1037 | } | ||
1038 | 1033 | ||
1039 | error = PTR_ERR(vfsmount); | 1034 | if (IS_ERR(hugetlbfs_vfsmount[i])) { |
1035 | pr_err("hugetlb: Cannot mount internal hugetlbfs for " | ||
1036 | "page size %uK", ps_kb); | ||
1037 | error = PTR_ERR(hugetlbfs_vfsmount[i]); | ||
1038 | hugetlbfs_vfsmount[i] = NULL; | ||
1039 | } | ||
1040 | i++; | ||
1041 | } | ||
1042 | /* Non default hstates are optional */ | ||
1043 | if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx])) | ||
1044 | return 0; | ||
1040 | 1045 | ||
1041 | out: | 1046 | out: |
1042 | kmem_cache_destroy(hugetlbfs_inode_cachep); | 1047 | kmem_cache_destroy(hugetlbfs_inode_cachep); |
@@ -1047,13 +1052,19 @@ static int __init init_hugetlbfs_fs(void) | |||
1047 | 1052 | ||
1048 | static void __exit exit_hugetlbfs_fs(void) | 1053 | static void __exit exit_hugetlbfs_fs(void) |
1049 | { | 1054 | { |
1055 | struct hstate *h; | ||
1056 | int i; | ||
1057 | |||
1058 | |||
1050 | /* | 1059 | /* |
1051 | * Make sure all delayed rcu free inodes are flushed before we | 1060 | * Make sure all delayed rcu free inodes are flushed before we |
1052 | * destroy cache. | 1061 | * destroy cache. |
1053 | */ | 1062 | */ |
1054 | rcu_barrier(); | 1063 | rcu_barrier(); |
1055 | kmem_cache_destroy(hugetlbfs_inode_cachep); | 1064 | kmem_cache_destroy(hugetlbfs_inode_cachep); |
1056 | kern_unmount(hugetlbfs_vfsmount); | 1065 | i = 0; |
1066 | for_each_hstate(h) | ||
1067 | kern_unmount(hugetlbfs_vfsmount[i++]); | ||
1057 | unregister_filesystem(&hugetlbfs_fs_type); | 1068 | unregister_filesystem(&hugetlbfs_fs_type); |
1058 | bdi_destroy(&hugetlbfs_backing_dev_info); | 1069 | bdi_destroy(&hugetlbfs_backing_dev_info); |
1059 | } | 1070 | } |
diff --git a/fs/inode.c b/fs/inode.c index b03c71957246..14084b72b259 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -165,7 +165,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) | |||
165 | mapping->host = inode; | 165 | mapping->host = inode; |
166 | mapping->flags = 0; | 166 | mapping->flags = 0; |
167 | mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); | 167 | mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); |
168 | mapping->assoc_mapping = NULL; | 168 | mapping->private_data = NULL; |
169 | mapping->backing_dev_info = &default_backing_dev_info; | 169 | mapping->backing_dev_info = &default_backing_dev_info; |
170 | mapping->writeback_index = 0; | 170 | mapping->writeback_index = 0; |
171 | 171 | ||
@@ -408,6 +408,19 @@ static void inode_lru_list_add(struct inode *inode) | |||
408 | spin_unlock(&inode->i_sb->s_inode_lru_lock); | 408 | spin_unlock(&inode->i_sb->s_inode_lru_lock); |
409 | } | 409 | } |
410 | 410 | ||
411 | /* | ||
412 | * Add inode to LRU if needed (inode is unused and clean). | ||
413 | * | ||
414 | * Needs inode->i_lock held. | ||
415 | */ | ||
416 | void inode_add_lru(struct inode *inode) | ||
417 | { | ||
418 | if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) && | ||
419 | !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE) | ||
420 | inode_lru_list_add(inode); | ||
421 | } | ||
422 | |||
423 | |||
411 | static void inode_lru_list_del(struct inode *inode) | 424 | static void inode_lru_list_del(struct inode *inode) |
412 | { | 425 | { |
413 | spin_lock(&inode->i_sb->s_inode_lru_lock); | 426 | spin_lock(&inode->i_sb->s_inode_lru_lock); |
@@ -1390,8 +1403,7 @@ static void iput_final(struct inode *inode) | |||
1390 | 1403 | ||
1391 | if (!drop && (sb->s_flags & MS_ACTIVE)) { | 1404 | if (!drop && (sb->s_flags & MS_ACTIVE)) { |
1392 | inode->i_state |= I_REFERENCED; | 1405 | inode->i_state |= I_REFERENCED; |
1393 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | 1406 | inode_add_lru(inode); |
1394 | inode_lru_list_add(inode); | ||
1395 | spin_unlock(&inode->i_lock); | 1407 | spin_unlock(&inode->i_lock); |
1396 | return; | 1408 | return; |
1397 | } | 1409 | } |
diff --git a/fs/internal.h b/fs/internal.h index 916b7cbf3e3e..2f6af7f645eb 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -110,6 +110,7 @@ extern int open_check_o_direct(struct file *f); | |||
110 | * inode.c | 110 | * inode.c |
111 | */ | 111 | */ |
112 | extern spinlock_t inode_sb_list_lock; | 112 | extern spinlock_t inode_sb_list_lock; |
113 | extern void inode_add_lru(struct inode *inode); | ||
113 | 114 | ||
114 | /* | 115 | /* |
115 | * fs-writeback.c | 116 | * fs-writeback.c |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 78b7f84241d4..071d6905f0dd 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -1259,7 +1259,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1259 | goto not_jbd; | 1259 | goto not_jbd; |
1260 | } | 1260 | } |
1261 | 1261 | ||
1262 | /* keep track of wether or not this transaction modified us */ | 1262 | /* keep track of whether or not this transaction modified us */ |
1263 | was_modified = jh->b_modified; | 1263 | was_modified = jh->b_modified; |
1264 | 1264 | ||
1265 | /* | 1265 | /* |
@@ -1961,7 +1961,9 @@ retry: | |||
1961 | spin_unlock(&journal->j_list_lock); | 1961 | spin_unlock(&journal->j_list_lock); |
1962 | jbd_unlock_bh_state(bh); | 1962 | jbd_unlock_bh_state(bh); |
1963 | spin_unlock(&journal->j_state_lock); | 1963 | spin_unlock(&journal->j_state_lock); |
1964 | unlock_buffer(bh); | ||
1964 | log_wait_commit(journal, tid); | 1965 | log_wait_commit(journal, tid); |
1966 | lock_buffer(bh); | ||
1965 | goto retry; | 1967 | goto retry; |
1966 | } | 1968 | } |
1967 | /* | 1969 | /* |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index a74ba4659549..d8da40e99d84 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -1261,7 +1261,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1261 | goto not_jbd; | 1261 | goto not_jbd; |
1262 | } | 1262 | } |
1263 | 1263 | ||
1264 | /* keep track of wether or not this transaction modified us */ | 1264 | /* keep track of whether or not this transaction modified us */ |
1265 | was_modified = jh->b_modified; | 1265 | was_modified = jh->b_modified; |
1266 | 1266 | ||
1267 | /* | 1267 | /* |
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 60ef3fb707ff..1506673c087e 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c | |||
@@ -138,33 +138,39 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, | |||
138 | struct page *pg; | 138 | struct page *pg; |
139 | struct inode *inode = mapping->host; | 139 | struct inode *inode = mapping->host; |
140 | struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); | 140 | struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); |
141 | struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); | ||
142 | struct jffs2_raw_inode ri; | ||
143 | uint32_t alloc_len = 0; | ||
141 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | 144 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
142 | uint32_t pageofs = index << PAGE_CACHE_SHIFT; | 145 | uint32_t pageofs = index << PAGE_CACHE_SHIFT; |
143 | int ret = 0; | 146 | int ret = 0; |
144 | 147 | ||
148 | jffs2_dbg(1, "%s()\n", __func__); | ||
149 | |||
150 | if (pageofs > inode->i_size) { | ||
151 | ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, | ||
152 | ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); | ||
153 | if (ret) | ||
154 | return ret; | ||
155 | } | ||
156 | |||
157 | mutex_lock(&f->sem); | ||
145 | pg = grab_cache_page_write_begin(mapping, index, flags); | 158 | pg = grab_cache_page_write_begin(mapping, index, flags); |
146 | if (!pg) | 159 | if (!pg) { |
160 | if (alloc_len) | ||
161 | jffs2_complete_reservation(c); | ||
162 | mutex_unlock(&f->sem); | ||
147 | return -ENOMEM; | 163 | return -ENOMEM; |
164 | } | ||
148 | *pagep = pg; | 165 | *pagep = pg; |
149 | 166 | ||
150 | jffs2_dbg(1, "%s()\n", __func__); | 167 | if (alloc_len) { |
151 | |||
152 | if (pageofs > inode->i_size) { | ||
153 | /* Make new hole frag from old EOF to new page */ | 168 | /* Make new hole frag from old EOF to new page */ |
154 | struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); | ||
155 | struct jffs2_raw_inode ri; | ||
156 | struct jffs2_full_dnode *fn; | 169 | struct jffs2_full_dnode *fn; |
157 | uint32_t alloc_len; | ||
158 | 170 | ||
159 | jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", | 171 | jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", |
160 | (unsigned int)inode->i_size, pageofs); | 172 | (unsigned int)inode->i_size, pageofs); |
161 | 173 | ||
162 | ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, | ||
163 | ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); | ||
164 | if (ret) | ||
165 | goto out_page; | ||
166 | |||
167 | mutex_lock(&f->sem); | ||
168 | memset(&ri, 0, sizeof(ri)); | 174 | memset(&ri, 0, sizeof(ri)); |
169 | 175 | ||
170 | ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); | 176 | ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); |
@@ -191,7 +197,6 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, | |||
191 | if (IS_ERR(fn)) { | 197 | if (IS_ERR(fn)) { |
192 | ret = PTR_ERR(fn); | 198 | ret = PTR_ERR(fn); |
193 | jffs2_complete_reservation(c); | 199 | jffs2_complete_reservation(c); |
194 | mutex_unlock(&f->sem); | ||
195 | goto out_page; | 200 | goto out_page; |
196 | } | 201 | } |
197 | ret = jffs2_add_full_dnode_to_inode(c, f, fn); | 202 | ret = jffs2_add_full_dnode_to_inode(c, f, fn); |
@@ -206,12 +211,10 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, | |||
206 | jffs2_mark_node_obsolete(c, fn->raw); | 211 | jffs2_mark_node_obsolete(c, fn->raw); |
207 | jffs2_free_full_dnode(fn); | 212 | jffs2_free_full_dnode(fn); |
208 | jffs2_complete_reservation(c); | 213 | jffs2_complete_reservation(c); |
209 | mutex_unlock(&f->sem); | ||
210 | goto out_page; | 214 | goto out_page; |
211 | } | 215 | } |
212 | jffs2_complete_reservation(c); | 216 | jffs2_complete_reservation(c); |
213 | inode->i_size = pageofs; | 217 | inode->i_size = pageofs; |
214 | mutex_unlock(&f->sem); | ||
215 | } | 218 | } |
216 | 219 | ||
217 | /* | 220 | /* |
@@ -220,18 +223,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, | |||
220 | * case of a short-copy. | 223 | * case of a short-copy. |
221 | */ | 224 | */ |
222 | if (!PageUptodate(pg)) { | 225 | if (!PageUptodate(pg)) { |
223 | mutex_lock(&f->sem); | ||
224 | ret = jffs2_do_readpage_nolock(inode, pg); | 226 | ret = jffs2_do_readpage_nolock(inode, pg); |
225 | mutex_unlock(&f->sem); | ||
226 | if (ret) | 227 | if (ret) |
227 | goto out_page; | 228 | goto out_page; |
228 | } | 229 | } |
230 | mutex_unlock(&f->sem); | ||
229 | jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); | 231 | jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); |
230 | return ret; | 232 | return ret; |
231 | 233 | ||
232 | out_page: | 234 | out_page: |
233 | unlock_page(pg); | 235 | unlock_page(pg); |
234 | page_cache_release(pg); | 236 | page_cache_release(pg); |
237 | mutex_unlock(&f->sem); | ||
235 | return ret; | 238 | return ret; |
236 | } | 239 | } |
237 | 240 | ||
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index adb90116d36b..af49e2d6941a 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c | |||
@@ -33,7 +33,7 @@ | |||
33 | * are being written out - and waiting for GC to make progress, naturally. | 33 | * are being written out - and waiting for GC to make progress, naturally. |
34 | * | 34 | * |
35 | * So we cannot just call iget() or some variant of it, but first have to check | 35 | * So we cannot just call iget() or some variant of it, but first have to check |
36 | * wether the inode in question might be in I_FREEING state. Therefore we | 36 | * whether the inode in question might be in I_FREEING state. Therefore we |
37 | * maintain our own per-sb list of "almost deleted" inodes and check against | 37 | * maintain our own per-sb list of "almost deleted" inodes and check against |
38 | * that list first. Normally this should be at most 1-2 entries long. | 38 | * that list first. Normally this should be at most 1-2 entries long. |
39 | * | 39 | * |
diff --git a/fs/namei.c b/fs/namei.c index 937f9d50c84b..5f4cdf3ad913 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -2131,6 +2131,11 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) | |||
2131 | if (!len) | 2131 | if (!len) |
2132 | return ERR_PTR(-EACCES); | 2132 | return ERR_PTR(-EACCES); |
2133 | 2133 | ||
2134 | if (unlikely(name[0] == '.')) { | ||
2135 | if (len < 2 || (len == 2 && name[1] == '.')) | ||
2136 | return ERR_PTR(-EACCES); | ||
2137 | } | ||
2138 | |||
2134 | while (len--) { | 2139 | while (len--) { |
2135 | c = *(const unsigned char *)name++; | 2140 | c = *(const unsigned char *)name++; |
2136 | if (c == '/' || c == '\0') | 2141 | if (c == '/' || c == '\0') |
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index be20a7e171a0..63d14a99483d 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c | |||
@@ -89,7 +89,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area, | |||
89 | /* | 89 | /* |
90 | * If I understand ncp_read_kernel() properly, the above always | 90 | * If I understand ncp_read_kernel() properly, the above always |
91 | * fetches from the network, here the analogue of disk. | 91 | * fetches from the network, here the analogue of disk. |
92 | * -- wli | 92 | * -- nyc |
93 | */ | 93 | */ |
94 | count_vm_event(PGMAJFAULT); | 94 | count_vm_event(PGMAJFAULT); |
95 | mem_cgroup_count_vm_event(area->vm_mm, PGMAJFAULT); | 95 | mem_cgroup_count_vm_event(area->vm_mm, PGMAJFAULT); |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ce8cb926526b..b9e66b7e0c14 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -450,7 +450,8 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) | |||
450 | nfs_refresh_inode(dentry->d_inode, entry->fattr); | 450 | nfs_refresh_inode(dentry->d_inode, entry->fattr); |
451 | goto out; | 451 | goto out; |
452 | } else { | 452 | } else { |
453 | d_drop(dentry); | 453 | if (d_invalidate(dentry) != 0) |
454 | goto out; | ||
454 | dput(dentry); | 455 | dput(dentry); |
455 | } | 456 | } |
456 | } | 457 | } |
@@ -1100,6 +1101,8 @@ out_set_verifier: | |||
1100 | out_zap_parent: | 1101 | out_zap_parent: |
1101 | nfs_zap_caches(dir); | 1102 | nfs_zap_caches(dir); |
1102 | out_bad: | 1103 | out_bad: |
1104 | nfs_free_fattr(fattr); | ||
1105 | nfs_free_fhandle(fhandle); | ||
1103 | nfs_mark_for_revalidate(dir); | 1106 | nfs_mark_for_revalidate(dir); |
1104 | if (inode && S_ISDIR(inode->i_mode)) { | 1107 | if (inode && S_ISDIR(inode->i_mode)) { |
1105 | /* Purge readdir caches. */ | 1108 | /* Purge readdir caches. */ |
@@ -1112,8 +1115,6 @@ out_zap_parent: | |||
1112 | shrink_dcache_parent(dentry); | 1115 | shrink_dcache_parent(dentry); |
1113 | } | 1116 | } |
1114 | d_drop(dentry); | 1117 | d_drop(dentry); |
1115 | nfs_free_fattr(fattr); | ||
1116 | nfs_free_fhandle(fhandle); | ||
1117 | dput(parent); | 1118 | dput(parent); |
1118 | dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", | 1119 | dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", |
1119 | __func__, dentry->d_parent->d_name.name, | 1120 | __func__, dentry->d_parent->d_name.name, |
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 3e7b2a0dc0c8..07f76db04ec7 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c | |||
@@ -431,7 +431,7 @@ void nilfs_mapping_init(struct address_space *mapping, struct inode *inode, | |||
431 | mapping->host = inode; | 431 | mapping->host = inode; |
432 | mapping->flags = 0; | 432 | mapping->flags = 0; |
433 | mapping_set_gfp_mask(mapping, GFP_NOFS); | 433 | mapping_set_gfp_mask(mapping, GFP_NOFS); |
434 | mapping->assoc_mapping = NULL; | 434 | mapping->private_data = NULL; |
435 | mapping->backing_dev_info = bdi; | 435 | mapping->backing_dev_info = bdi; |
436 | mapping->a_ops = &empty_aops; | 436 | mapping->a_ops = &empty_aops; |
437 | } | 437 | } |
diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig index 7dceff005a67..e5f911bd80d2 100644 --- a/fs/notify/fanotify/Kconfig +++ b/fs/notify/fanotify/Kconfig | |||
@@ -4,7 +4,7 @@ config FANOTIFY | |||
4 | select ANON_INODES | 4 | select ANON_INODES |
5 | default n | 5 | default n |
6 | ---help--- | 6 | ---help--- |
7 | Say Y here to enable fanotify suport. fanotify is a file access | 7 | Say Y here to enable fanotify support. fanotify is a file access |
8 | notification system which differs from inotify in that it sends | 8 | notification system which differs from inotify in that it sends |
9 | an open file descriptor to the userspace listener along with | 9 | an open file descriptor to the userspace listener along with |
10 | the event. | 10 | the event. |
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 721d692fa8d4..6fcaeb8c902e 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
@@ -258,7 +258,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
258 | if (ret) | 258 | if (ret) |
259 | goto out_close_fd; | 259 | goto out_close_fd; |
260 | 260 | ||
261 | fd_install(fd, f); | 261 | if (fd != FAN_NOFD) |
262 | fd_install(fd, f); | ||
262 | return fanotify_event_metadata.event_len; | 263 | return fanotify_event_metadata.event_len; |
263 | 264 | ||
264 | out_close_fd: | 265 | out_close_fd: |
diff --git a/fs/notify/notification.c b/fs/notify/notification.c index c887b1378f7e..48cb994e4922 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c | |||
@@ -18,7 +18,7 @@ | |||
18 | 18 | ||
19 | /* | 19 | /* |
20 | * Basic idea behind the notification queue: An fsnotify group (like inotify) | 20 | * Basic idea behind the notification queue: An fsnotify group (like inotify) |
21 | * sends the userspace notification about events asyncronously some time after | 21 | * sends the userspace notification about events asynchronously some time after |
22 | * the event happened. When inotify gets an event it will need to add that | 22 | * the event happened. When inotify gets an event it will need to add that |
23 | * event to the group notify queue. Since a single event might need to be on | 23 | * event to the group notify queue. Since a single event might need to be on |
24 | * multiple group's notification queues we can't add the event directly to each | 24 | * multiple group's notification queues we can't add the event directly to each |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 5a4ee77cec51..dda089804942 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -2513,18 +2513,15 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | |||
2513 | ret = sd.num_spliced; | 2513 | ret = sd.num_spliced; |
2514 | 2514 | ||
2515 | if (ret > 0) { | 2515 | if (ret > 0) { |
2516 | unsigned long nr_pages; | ||
2517 | int err; | 2516 | int err; |
2518 | 2517 | ||
2519 | nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
2520 | |||
2521 | err = generic_write_sync(out, *ppos, ret); | 2518 | err = generic_write_sync(out, *ppos, ret); |
2522 | if (err) | 2519 | if (err) |
2523 | ret = err; | 2520 | ret = err; |
2524 | else | 2521 | else |
2525 | *ppos += ret; | 2522 | *ppos += ret; |
2526 | 2523 | ||
2527 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); | 2524 | balance_dirty_pages_ratelimited(mapping); |
2528 | } | 2525 | } |
2529 | 2526 | ||
2530 | return ret; | 2527 | return ret; |
diff --git a/fs/proc/array.c b/fs/proc/array.c index c1c207c36cae..d3696708fc1a 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -438,7 +438,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
438 | 438 | ||
439 | min_flt += sig->min_flt; | 439 | min_flt += sig->min_flt; |
440 | maj_flt += sig->maj_flt; | 440 | maj_flt += sig->maj_flt; |
441 | thread_group_times(task, &utime, &stime); | 441 | thread_group_cputime_adjusted(task, &utime, &stime); |
442 | gtime += sig->gtime; | 442 | gtime += sig->gtime; |
443 | } | 443 | } |
444 | 444 | ||
@@ -454,7 +454,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
454 | if (!whole) { | 454 | if (!whole) { |
455 | min_flt = task->min_flt; | 455 | min_flt = task->min_flt; |
456 | maj_flt = task->maj_flt; | 456 | maj_flt = task->maj_flt; |
457 | task_times(task, &utime, &stime); | 457 | task_cputime_adjusted(task, &utime, &stime); |
458 | gtime = task->gtime; | 458 | gtime = task->gtime; |
459 | } | 459 | } |
460 | 460 | ||
diff --git a/fs/proc/base.c b/fs/proc/base.c index 144a96732dd7..aa63d25157b8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -873,12 +873,119 @@ static const struct file_operations proc_environ_operations = { | |||
873 | .release = mem_release, | 873 | .release = mem_release, |
874 | }; | 874 | }; |
875 | 875 | ||
876 | static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, | ||
877 | loff_t *ppos) | ||
878 | { | ||
879 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | ||
880 | char buffer[PROC_NUMBUF]; | ||
881 | int oom_adj = OOM_ADJUST_MIN; | ||
882 | size_t len; | ||
883 | unsigned long flags; | ||
884 | |||
885 | if (!task) | ||
886 | return -ESRCH; | ||
887 | if (lock_task_sighand(task, &flags)) { | ||
888 | if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) | ||
889 | oom_adj = OOM_ADJUST_MAX; | ||
890 | else | ||
891 | oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / | ||
892 | OOM_SCORE_ADJ_MAX; | ||
893 | unlock_task_sighand(task, &flags); | ||
894 | } | ||
895 | put_task_struct(task); | ||
896 | len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); | ||
897 | return simple_read_from_buffer(buf, count, ppos, buffer, len); | ||
898 | } | ||
899 | |||
900 | static ssize_t oom_adj_write(struct file *file, const char __user *buf, | ||
901 | size_t count, loff_t *ppos) | ||
902 | { | ||
903 | struct task_struct *task; | ||
904 | char buffer[PROC_NUMBUF]; | ||
905 | int oom_adj; | ||
906 | unsigned long flags; | ||
907 | int err; | ||
908 | |||
909 | memset(buffer, 0, sizeof(buffer)); | ||
910 | if (count > sizeof(buffer) - 1) | ||
911 | count = sizeof(buffer) - 1; | ||
912 | if (copy_from_user(buffer, buf, count)) { | ||
913 | err = -EFAULT; | ||
914 | goto out; | ||
915 | } | ||
916 | |||
917 | err = kstrtoint(strstrip(buffer), 0, &oom_adj); | ||
918 | if (err) | ||
919 | goto out; | ||
920 | if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && | ||
921 | oom_adj != OOM_DISABLE) { | ||
922 | err = -EINVAL; | ||
923 | goto out; | ||
924 | } | ||
925 | |||
926 | task = get_proc_task(file->f_path.dentry->d_inode); | ||
927 | if (!task) { | ||
928 | err = -ESRCH; | ||
929 | goto out; | ||
930 | } | ||
931 | |||
932 | task_lock(task); | ||
933 | if (!task->mm) { | ||
934 | err = -EINVAL; | ||
935 | goto err_task_lock; | ||
936 | } | ||
937 | |||
938 | if (!lock_task_sighand(task, &flags)) { | ||
939 | err = -ESRCH; | ||
940 | goto err_task_lock; | ||
941 | } | ||
942 | |||
943 | /* | ||
944 | * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum | ||
945 | * value is always attainable. | ||
946 | */ | ||
947 | if (oom_adj == OOM_ADJUST_MAX) | ||
948 | oom_adj = OOM_SCORE_ADJ_MAX; | ||
949 | else | ||
950 | oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; | ||
951 | |||
952 | if (oom_adj < task->signal->oom_score_adj && | ||
953 | !capable(CAP_SYS_RESOURCE)) { | ||
954 | err = -EACCES; | ||
955 | goto err_sighand; | ||
956 | } | ||
957 | |||
958 | /* | ||
959 | * /proc/pid/oom_adj is provided for legacy purposes, ask users to use | ||
960 | * /proc/pid/oom_score_adj instead. | ||
961 | */ | ||
962 | printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", | ||
963 | current->comm, task_pid_nr(current), task_pid_nr(task), | ||
964 | task_pid_nr(task)); | ||
965 | |||
966 | task->signal->oom_score_adj = oom_adj; | ||
967 | trace_oom_score_adj_update(task); | ||
968 | err_sighand: | ||
969 | unlock_task_sighand(task, &flags); | ||
970 | err_task_lock: | ||
971 | task_unlock(task); | ||
972 | put_task_struct(task); | ||
973 | out: | ||
974 | return err < 0 ? err : count; | ||
975 | } | ||
976 | |||
977 | static const struct file_operations proc_oom_adj_operations = { | ||
978 | .read = oom_adj_read, | ||
979 | .write = oom_adj_write, | ||
980 | .llseek = generic_file_llseek, | ||
981 | }; | ||
982 | |||
876 | static ssize_t oom_score_adj_read(struct file *file, char __user *buf, | 983 | static ssize_t oom_score_adj_read(struct file *file, char __user *buf, |
877 | size_t count, loff_t *ppos) | 984 | size_t count, loff_t *ppos) |
878 | { | 985 | { |
879 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | 986 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); |
880 | char buffer[PROC_NUMBUF]; | 987 | char buffer[PROC_NUMBUF]; |
881 | int oom_score_adj = OOM_SCORE_ADJ_MIN; | 988 | short oom_score_adj = OOM_SCORE_ADJ_MIN; |
882 | unsigned long flags; | 989 | unsigned long flags; |
883 | size_t len; | 990 | size_t len; |
884 | 991 | ||
@@ -889,7 +996,7 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf, | |||
889 | unlock_task_sighand(task, &flags); | 996 | unlock_task_sighand(task, &flags); |
890 | } | 997 | } |
891 | put_task_struct(task); | 998 | put_task_struct(task); |
892 | len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj); | 999 | len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj); |
893 | return simple_read_from_buffer(buf, count, ppos, buffer, len); | 1000 | return simple_read_from_buffer(buf, count, ppos, buffer, len); |
894 | } | 1001 | } |
895 | 1002 | ||
@@ -936,15 +1043,15 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, | |||
936 | goto err_task_lock; | 1043 | goto err_task_lock; |
937 | } | 1044 | } |
938 | 1045 | ||
939 | if (oom_score_adj < task->signal->oom_score_adj_min && | 1046 | if ((short)oom_score_adj < task->signal->oom_score_adj_min && |
940 | !capable(CAP_SYS_RESOURCE)) { | 1047 | !capable(CAP_SYS_RESOURCE)) { |
941 | err = -EACCES; | 1048 | err = -EACCES; |
942 | goto err_sighand; | 1049 | goto err_sighand; |
943 | } | 1050 | } |
944 | 1051 | ||
945 | task->signal->oom_score_adj = oom_score_adj; | 1052 | task->signal->oom_score_adj = (short)oom_score_adj; |
946 | if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) | 1053 | if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) |
947 | task->signal->oom_score_adj_min = oom_score_adj; | 1054 | task->signal->oom_score_adj_min = (short)oom_score_adj; |
948 | trace_oom_score_adj_update(task); | 1055 | trace_oom_score_adj_update(task); |
949 | 1056 | ||
950 | err_sighand: | 1057 | err_sighand: |
@@ -1770,8 +1877,9 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, | |||
1770 | if (!vma) | 1877 | if (!vma) |
1771 | goto out_no_vma; | 1878 | goto out_no_vma; |
1772 | 1879 | ||
1773 | result = proc_map_files_instantiate(dir, dentry, task, | 1880 | if (vma->vm_file) |
1774 | (void *)(unsigned long)vma->vm_file->f_mode); | 1881 | result = proc_map_files_instantiate(dir, dentry, task, |
1882 | (void *)(unsigned long)vma->vm_file->f_mode); | ||
1775 | 1883 | ||
1776 | out_no_vma: | 1884 | out_no_vma: |
1777 | up_read(&mm->mmap_sem); | 1885 | up_read(&mm->mmap_sem); |
@@ -2598,6 +2706,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2598 | REG("cgroup", S_IRUGO, proc_cgroup_operations), | 2706 | REG("cgroup", S_IRUGO, proc_cgroup_operations), |
2599 | #endif | 2707 | #endif |
2600 | INF("oom_score", S_IRUGO, proc_oom_score), | 2708 | INF("oom_score", S_IRUGO, proc_oom_score), |
2709 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), | ||
2601 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), | 2710 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), |
2602 | #ifdef CONFIG_AUDITSYSCALL | 2711 | #ifdef CONFIG_AUDITSYSCALL |
2603 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), | 2712 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), |
@@ -2964,6 +3073,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2964 | REG("cgroup", S_IRUGO, proc_cgroup_operations), | 3073 | REG("cgroup", S_IRUGO, proc_cgroup_operations), |
2965 | #endif | 3074 | #endif |
2966 | INF("oom_score", S_IRUGO, proc_oom_score), | 3075 | INF("oom_score", S_IRUGO, proc_oom_score), |
3076 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), | ||
2967 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), | 3077 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), |
2968 | #ifdef CONFIG_AUDITSYSCALL | 3078 | #ifdef CONFIG_AUDITSYSCALL |
2969 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), | 3079 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), |
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 86c67eee439f..e96d4f18ca3a 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
@@ -249,7 +249,7 @@ static int kcore_update_ram(void) | |||
249 | /* Not inialized....update now */ | 249 | /* Not inialized....update now */ |
250 | /* find out "max pfn" */ | 250 | /* find out "max pfn" */ |
251 | end_pfn = 0; | 251 | end_pfn = 0; |
252 | for_each_node_state(nid, N_HIGH_MEMORY) { | 252 | for_each_node_state(nid, N_MEMORY) { |
253 | unsigned long node_end; | 253 | unsigned long node_end; |
254 | node_end = NODE_DATA(nid)->node_start_pfn + | 254 | node_end = NODE_DATA(nid)->node_start_pfn + |
255 | NODE_DATA(nid)->node_spanned_pages; | 255 | NODE_DATA(nid)->node_spanned_pages; |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index a781bdf06694..701580ddfcc3 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -378,12 +378,13 @@ static int test_perm(int mode, int op) | |||
378 | return -EACCES; | 378 | return -EACCES; |
379 | } | 379 | } |
380 | 380 | ||
381 | static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) | 381 | static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, int op) |
382 | { | 382 | { |
383 | struct ctl_table_root *root = head->root; | ||
383 | int mode; | 384 | int mode; |
384 | 385 | ||
385 | if (root->permissions) | 386 | if (root->permissions) |
386 | mode = root->permissions(root, current->nsproxy, table); | 387 | mode = root->permissions(head, table); |
387 | else | 388 | else |
388 | mode = table->mode; | 389 | mode = table->mode; |
389 | 390 | ||
@@ -491,7 +492,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, | |||
491 | * and won't be until we finish. | 492 | * and won't be until we finish. |
492 | */ | 493 | */ |
493 | error = -EPERM; | 494 | error = -EPERM; |
494 | if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) | 495 | if (sysctl_perm(head, table, write ? MAY_WRITE : MAY_READ)) |
495 | goto out; | 496 | goto out; |
496 | 497 | ||
497 | /* if that can happen at all, it should be -EINVAL, not -EISDIR */ | 498 | /* if that can happen at all, it should be -EINVAL, not -EISDIR */ |
@@ -717,7 +718,7 @@ static int proc_sys_permission(struct inode *inode, int mask) | |||
717 | if (!table) /* global root - r-xr-xr-x */ | 718 | if (!table) /* global root - r-xr-xr-x */ |
718 | error = mask & MAY_WRITE ? -EACCES : 0; | 719 | error = mask & MAY_WRITE ? -EACCES : 0; |
719 | else /* Use the permissions on the sysctl table entry */ | 720 | else /* Use the permissions on the sysctl table entry */ |
720 | error = sysctl_perm(head->root, table, mask & ~MAY_NOT_BLOCK); | 721 | error = sysctl_perm(head, table, mask & ~MAY_NOT_BLOCK); |
721 | 722 | ||
722 | sysctl_head_finish(head); | 723 | sysctl_head_finish(head); |
723 | return error; | 724 | return error; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 90c63f9392a5..48775628abbf 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -643,7 +643,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
643 | spinlock_t *ptl; | 643 | spinlock_t *ptl; |
644 | struct page *page; | 644 | struct page *page; |
645 | 645 | ||
646 | split_huge_page_pmd(walk->mm, pmd); | 646 | split_huge_page_pmd(vma, addr, pmd); |
647 | if (pmd_trans_unstable(pmd)) | 647 | if (pmd_trans_unstable(pmd)) |
648 | return 0; | 648 | return 0; |
649 | 649 | ||
@@ -1126,7 +1126,7 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, | |||
1126 | return NULL; | 1126 | return NULL; |
1127 | 1127 | ||
1128 | nid = page_to_nid(page); | 1128 | nid = page_to_nid(page); |
1129 | if (!node_isset(nid, node_states[N_HIGH_MEMORY])) | 1129 | if (!node_isset(nid, node_states[N_MEMORY])) |
1130 | return NULL; | 1130 | return NULL; |
1131 | 1131 | ||
1132 | return page; | 1132 | return page; |
@@ -1279,7 +1279,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1279 | if (md->writeback) | 1279 | if (md->writeback) |
1280 | seq_printf(m, " writeback=%lu", md->writeback); | 1280 | seq_printf(m, " writeback=%lu", md->writeback); |
1281 | 1281 | ||
1282 | for_each_node_state(n, N_HIGH_MEMORY) | 1282 | for_each_node_state(n, N_MEMORY) |
1283 | if (md->node[n]) | 1283 | if (md->node[n]) |
1284 | seq_printf(m, " N%d=%lu", n, md->node[n]); | 1284 | seq_printf(m, " N%d=%lu", n, md->node[n]); |
1285 | out: | 1285 | out: |
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 4ab572e6d277..ed1d8c7212da 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c | |||
@@ -49,6 +49,7 @@ struct pstore_private { | |||
49 | struct pstore_info *psi; | 49 | struct pstore_info *psi; |
50 | enum pstore_type_id type; | 50 | enum pstore_type_id type; |
51 | u64 id; | 51 | u64 id; |
52 | int count; | ||
52 | ssize_t size; | 53 | ssize_t size; |
53 | char data[]; | 54 | char data[]; |
54 | }; | 55 | }; |
@@ -175,7 +176,8 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) | |||
175 | struct pstore_private *p = dentry->d_inode->i_private; | 176 | struct pstore_private *p = dentry->d_inode->i_private; |
176 | 177 | ||
177 | if (p->psi->erase) | 178 | if (p->psi->erase) |
178 | p->psi->erase(p->type, p->id, p->psi); | 179 | p->psi->erase(p->type, p->id, p->count, |
180 | dentry->d_inode->i_ctime, p->psi); | ||
179 | 181 | ||
180 | return simple_unlink(dir, dentry); | 182 | return simple_unlink(dir, dentry); |
181 | } | 183 | } |
@@ -270,7 +272,7 @@ int pstore_is_mounted(void) | |||
270 | * Load it up with "size" bytes of data from "buf". | 272 | * Load it up with "size" bytes of data from "buf". |
271 | * Set the mtime & ctime to the date that this record was originally stored. | 273 | * Set the mtime & ctime to the date that this record was originally stored. |
272 | */ | 274 | */ |
273 | int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, | 275 | int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, |
274 | char *data, size_t size, struct timespec time, | 276 | char *data, size_t size, struct timespec time, |
275 | struct pstore_info *psi) | 277 | struct pstore_info *psi) |
276 | { | 278 | { |
@@ -306,6 +308,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, | |||
306 | goto fail_alloc; | 308 | goto fail_alloc; |
307 | private->type = type; | 309 | private->type = type; |
308 | private->id = id; | 310 | private->id = id; |
311 | private->count = count; | ||
309 | private->psi = psi; | 312 | private->psi = psi; |
310 | 313 | ||
311 | switch (type) { | 314 | switch (type) { |
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index 4847f588b7d5..937d820f273c 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h | |||
@@ -50,7 +50,7 @@ extern struct pstore_info *psinfo; | |||
50 | extern void pstore_set_kmsg_bytes(int); | 50 | extern void pstore_set_kmsg_bytes(int); |
51 | extern void pstore_get_records(int); | 51 | extern void pstore_get_records(int); |
52 | extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, | 52 | extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, |
53 | char *data, size_t size, | 53 | int count, char *data, size_t size, |
54 | struct timespec time, struct pstore_info *psi); | 54 | struct timespec time, struct pstore_info *psi); |
55 | extern int pstore_is_mounted(void); | 55 | extern int pstore_is_mounted(void); |
56 | 56 | ||
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 947fbe06c3b1..5ea2e77ff023 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c | |||
@@ -136,7 +136,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, | |||
136 | break; | 136 | break; |
137 | 137 | ||
138 | ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part, | 138 | ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part, |
139 | hsize + len, psinfo); | 139 | oopscount, hsize + len, psinfo); |
140 | if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) | 140 | if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) |
141 | pstore_new_entry = 1; | 141 | pstore_new_entry = 1; |
142 | 142 | ||
@@ -173,7 +173,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) | |||
173 | spin_lock_irqsave(&psinfo->buf_lock, flags); | 173 | spin_lock_irqsave(&psinfo->buf_lock, flags); |
174 | } | 174 | } |
175 | memcpy(psinfo->buf, s, c); | 175 | memcpy(psinfo->buf, s, c); |
176 | psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, c, psinfo); | 176 | psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, c, psinfo); |
177 | spin_unlock_irqrestore(&psinfo->buf_lock, flags); | 177 | spin_unlock_irqrestore(&psinfo->buf_lock, flags); |
178 | s += c; | 178 | s += c; |
179 | c = e - s; | 179 | c = e - s; |
@@ -197,7 +197,7 @@ static void pstore_register_console(void) {} | |||
197 | 197 | ||
198 | static int pstore_write_compat(enum pstore_type_id type, | 198 | static int pstore_write_compat(enum pstore_type_id type, |
199 | enum kmsg_dump_reason reason, | 199 | enum kmsg_dump_reason reason, |
200 | u64 *id, unsigned int part, | 200 | u64 *id, unsigned int part, int count, |
201 | size_t size, struct pstore_info *psi) | 201 | size_t size, struct pstore_info *psi) |
202 | { | 202 | { |
203 | return psi->write_buf(type, reason, id, part, psinfo->buf, size, psi); | 203 | return psi->write_buf(type, reason, id, part, psinfo->buf, size, psi); |
@@ -267,6 +267,7 @@ void pstore_get_records(int quiet) | |||
267 | char *buf = NULL; | 267 | char *buf = NULL; |
268 | ssize_t size; | 268 | ssize_t size; |
269 | u64 id; | 269 | u64 id; |
270 | int count; | ||
270 | enum pstore_type_id type; | 271 | enum pstore_type_id type; |
271 | struct timespec time; | 272 | struct timespec time; |
272 | int failed = 0, rc; | 273 | int failed = 0, rc; |
@@ -278,9 +279,9 @@ void pstore_get_records(int quiet) | |||
278 | if (psi->open && psi->open(psi)) | 279 | if (psi->open && psi->open(psi)) |
279 | goto out; | 280 | goto out; |
280 | 281 | ||
281 | while ((size = psi->read(&id, &type, &time, &buf, psi)) > 0) { | 282 | while ((size = psi->read(&id, &type, &count, &time, &buf, psi)) > 0) { |
282 | rc = pstore_mkfile(type, psi->name, id, buf, (size_t)size, | 283 | rc = pstore_mkfile(type, psi->name, id, count, buf, |
283 | time, psi); | 284 | (size_t)size, time, psi); |
284 | kfree(buf); | 285 | kfree(buf); |
285 | buf = NULL; | 286 | buf = NULL; |
286 | if (rc && (rc != -EEXIST || !quiet)) | 287 | if (rc && (rc != -EEXIST || !quiet)) |
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index dba70e53b72c..f883e7e74305 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c | |||
@@ -132,9 +132,8 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max, | |||
132 | } | 132 | } |
133 | 133 | ||
134 | static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, | 134 | static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, |
135 | struct timespec *time, | 135 | int *count, struct timespec *time, |
136 | char **buf, | 136 | char **buf, struct pstore_info *psi) |
137 | struct pstore_info *psi) | ||
138 | { | 137 | { |
139 | ssize_t size; | 138 | ssize_t size; |
140 | struct ramoops_context *cxt = psi->data; | 139 | struct ramoops_context *cxt = psi->data; |
@@ -241,8 +240,8 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, | |||
241 | return 0; | 240 | return 0; |
242 | } | 241 | } |
243 | 242 | ||
244 | static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, | 243 | static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count, |
245 | struct pstore_info *psi) | 244 | struct timespec time, struct pstore_info *psi) |
246 | { | 245 | { |
247 | struct ramoops_context *cxt = psi->data; | 246 | struct ramoops_context *cxt = psi->data; |
248 | struct persistent_ram_zone *prz; | 247 | struct persistent_ram_zone *prz; |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index f27f01a98aa2..d83736fbc26c 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -1782,8 +1782,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1782 | 1782 | ||
1783 | BUG_ON(!th->t_trans_id); | 1783 | BUG_ON(!th->t_trans_id); |
1784 | 1784 | ||
1785 | dquot_initialize(inode); | 1785 | reiserfs_write_unlock(inode->i_sb); |
1786 | err = dquot_alloc_inode(inode); | 1786 | err = dquot_alloc_inode(inode); |
1787 | reiserfs_write_lock(inode->i_sb); | ||
1787 | if (err) | 1788 | if (err) |
1788 | goto out_end_trans; | 1789 | goto out_end_trans; |
1789 | if (!dir->i_nlink) { | 1790 | if (!dir->i_nlink) { |
@@ -1979,8 +1980,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1979 | 1980 | ||
1980 | out_end_trans: | 1981 | out_end_trans: |
1981 | journal_end(th, th->t_super, th->t_blocks_allocated); | 1982 | journal_end(th, th->t_super, th->t_blocks_allocated); |
1983 | reiserfs_write_unlock(inode->i_sb); | ||
1982 | /* Drop can be outside and it needs more credits so it's better to have it outside */ | 1984 | /* Drop can be outside and it needs more credits so it's better to have it outside */ |
1983 | dquot_drop(inode); | 1985 | dquot_drop(inode); |
1986 | reiserfs_write_lock(inode->i_sb); | ||
1984 | inode->i_flags |= S_NOQUOTA; | 1987 | inode->i_flags |= S_NOQUOTA; |
1985 | make_bad_inode(inode); | 1988 | make_bad_inode(inode); |
1986 | 1989 | ||
@@ -3103,10 +3106,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3103 | /* must be turned off for recursive notify_change calls */ | 3106 | /* must be turned off for recursive notify_change calls */ |
3104 | ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); | 3107 | ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); |
3105 | 3108 | ||
3106 | depth = reiserfs_write_lock_once(inode->i_sb); | ||
3107 | if (is_quota_modification(inode, attr)) | 3109 | if (is_quota_modification(inode, attr)) |
3108 | dquot_initialize(inode); | 3110 | dquot_initialize(inode); |
3109 | 3111 | depth = reiserfs_write_lock_once(inode->i_sb); | |
3110 | if (attr->ia_valid & ATTR_SIZE) { | 3112 | if (attr->ia_valid & ATTR_SIZE) { |
3111 | /* version 2 items will be caught by the s_maxbytes check | 3113 | /* version 2 items will be caught by the s_maxbytes check |
3112 | ** done for us in vmtruncate | 3114 | ** done for us in vmtruncate |
@@ -3170,7 +3172,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3170 | error = journal_begin(&th, inode->i_sb, jbegin_count); | 3172 | error = journal_begin(&th, inode->i_sb, jbegin_count); |
3171 | if (error) | 3173 | if (error) |
3172 | goto out; | 3174 | goto out; |
3175 | reiserfs_write_unlock_once(inode->i_sb, depth); | ||
3173 | error = dquot_transfer(inode, attr); | 3176 | error = dquot_transfer(inode, attr); |
3177 | depth = reiserfs_write_lock_once(inode->i_sb); | ||
3174 | if (error) { | 3178 | if (error) { |
3175 | journal_end(&th, inode->i_sb, jbegin_count); | 3179 | journal_end(&th, inode->i_sb, jbegin_count); |
3176 | goto out; | 3180 | goto out; |
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index f8afa4b162b8..2f40a4c70a4d 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c | |||
@@ -1968,7 +1968,9 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree | |||
1968 | key2type(&(key->on_disk_key))); | 1968 | key2type(&(key->on_disk_key))); |
1969 | #endif | 1969 | #endif |
1970 | 1970 | ||
1971 | reiserfs_write_unlock(inode->i_sb); | ||
1971 | retval = dquot_alloc_space_nodirty(inode, pasted_size); | 1972 | retval = dquot_alloc_space_nodirty(inode, pasted_size); |
1973 | reiserfs_write_lock(inode->i_sb); | ||
1972 | if (retval) { | 1974 | if (retval) { |
1973 | pathrelse(search_path); | 1975 | pathrelse(search_path); |
1974 | return retval; | 1976 | return retval; |
@@ -2061,9 +2063,11 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, | |||
2061 | "reiserquota insert_item(): allocating %u id=%u type=%c", | 2063 | "reiserquota insert_item(): allocating %u id=%u type=%c", |
2062 | quota_bytes, inode->i_uid, head2type(ih)); | 2064 | quota_bytes, inode->i_uid, head2type(ih)); |
2063 | #endif | 2065 | #endif |
2066 | reiserfs_write_unlock(inode->i_sb); | ||
2064 | /* We can't dirty inode here. It would be immediately written but | 2067 | /* We can't dirty inode here. It would be immediately written but |
2065 | * appropriate stat item isn't inserted yet... */ | 2068 | * appropriate stat item isn't inserted yet... */ |
2066 | retval = dquot_alloc_space_nodirty(inode, quota_bytes); | 2069 | retval = dquot_alloc_space_nodirty(inode, quota_bytes); |
2070 | reiserfs_write_lock(inode->i_sb); | ||
2067 | if (retval) { | 2071 | if (retval) { |
2068 | pathrelse(path); | 2072 | pathrelse(path); |
2069 | return retval; | 2073 | return retval; |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 1078ae179993..418bdc3a57da 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -298,7 +298,9 @@ static int finish_unfinished(struct super_block *s) | |||
298 | retval = remove_save_link_only(s, &save_link_key, 0); | 298 | retval = remove_save_link_only(s, &save_link_key, 0); |
299 | continue; | 299 | continue; |
300 | } | 300 | } |
301 | reiserfs_write_unlock(s); | ||
301 | dquot_initialize(inode); | 302 | dquot_initialize(inode); |
303 | reiserfs_write_lock(s); | ||
302 | 304 | ||
303 | if (truncate && S_ISDIR(inode->i_mode)) { | 305 | if (truncate && S_ISDIR(inode->i_mode)) { |
304 | /* We got a truncate request for a dir which is impossible. | 306 | /* We got a truncate request for a dir which is impossible. |
@@ -1335,7 +1337,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1335 | kfree(qf_names[i]); | 1337 | kfree(qf_names[i]); |
1336 | #endif | 1338 | #endif |
1337 | err = -EINVAL; | 1339 | err = -EINVAL; |
1338 | goto out_err; | 1340 | goto out_unlock; |
1339 | } | 1341 | } |
1340 | #ifdef CONFIG_QUOTA | 1342 | #ifdef CONFIG_QUOTA |
1341 | handle_quota_files(s, qf_names, &qfmt); | 1343 | handle_quota_files(s, qf_names, &qfmt); |
@@ -1379,7 +1381,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1379 | if (blocks) { | 1381 | if (blocks) { |
1380 | err = reiserfs_resize(s, blocks); | 1382 | err = reiserfs_resize(s, blocks); |
1381 | if (err != 0) | 1383 | if (err != 0) |
1382 | goto out_err; | 1384 | goto out_unlock; |
1383 | } | 1385 | } |
1384 | 1386 | ||
1385 | if (*mount_flags & MS_RDONLY) { | 1387 | if (*mount_flags & MS_RDONLY) { |
@@ -1389,9 +1391,15 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1389 | /* it is read-only already */ | 1391 | /* it is read-only already */ |
1390 | goto out_ok; | 1392 | goto out_ok; |
1391 | 1393 | ||
1394 | /* | ||
1395 | * Drop write lock. Quota will retake it when needed and lock | ||
1396 | * ordering requires calling dquot_suspend() without it. | ||
1397 | */ | ||
1398 | reiserfs_write_unlock(s); | ||
1392 | err = dquot_suspend(s, -1); | 1399 | err = dquot_suspend(s, -1); |
1393 | if (err < 0) | 1400 | if (err < 0) |
1394 | goto out_err; | 1401 | goto out_err; |
1402 | reiserfs_write_lock(s); | ||
1395 | 1403 | ||
1396 | /* try to remount file system with read-only permissions */ | 1404 | /* try to remount file system with read-only permissions */ |
1397 | if (sb_umount_state(rs) == REISERFS_VALID_FS | 1405 | if (sb_umount_state(rs) == REISERFS_VALID_FS |
@@ -1401,7 +1409,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1401 | 1409 | ||
1402 | err = journal_begin(&th, s, 10); | 1410 | err = journal_begin(&th, s, 10); |
1403 | if (err) | 1411 | if (err) |
1404 | goto out_err; | 1412 | goto out_unlock; |
1405 | 1413 | ||
1406 | /* Mounting a rw partition read-only. */ | 1414 | /* Mounting a rw partition read-only. */ |
1407 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); | 1415 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); |
@@ -1416,7 +1424,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1416 | 1424 | ||
1417 | if (reiserfs_is_journal_aborted(journal)) { | 1425 | if (reiserfs_is_journal_aborted(journal)) { |
1418 | err = journal->j_errno; | 1426 | err = journal->j_errno; |
1419 | goto out_err; | 1427 | goto out_unlock; |
1420 | } | 1428 | } |
1421 | 1429 | ||
1422 | handle_data_mode(s, mount_options); | 1430 | handle_data_mode(s, mount_options); |
@@ -1425,7 +1433,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1425 | s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */ | 1433 | s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */ |
1426 | err = journal_begin(&th, s, 10); | 1434 | err = journal_begin(&th, s, 10); |
1427 | if (err) | 1435 | if (err) |
1428 | goto out_err; | 1436 | goto out_unlock; |
1429 | 1437 | ||
1430 | /* Mount a partition which is read-only, read-write */ | 1438 | /* Mount a partition which is read-only, read-write */ |
1431 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); | 1439 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); |
@@ -1442,10 +1450,16 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1442 | SB_JOURNAL(s)->j_must_wait = 1; | 1450 | SB_JOURNAL(s)->j_must_wait = 1; |
1443 | err = journal_end(&th, s, 10); | 1451 | err = journal_end(&th, s, 10); |
1444 | if (err) | 1452 | if (err) |
1445 | goto out_err; | 1453 | goto out_unlock; |
1446 | 1454 | ||
1447 | if (!(*mount_flags & MS_RDONLY)) { | 1455 | if (!(*mount_flags & MS_RDONLY)) { |
1456 | /* | ||
1457 | * Drop write lock. Quota will retake it when needed and lock | ||
1458 | * ordering requires calling dquot_resume() without it. | ||
1459 | */ | ||
1460 | reiserfs_write_unlock(s); | ||
1448 | dquot_resume(s, -1); | 1461 | dquot_resume(s, -1); |
1462 | reiserfs_write_lock(s); | ||
1449 | finish_unfinished(s); | 1463 | finish_unfinished(s); |
1450 | reiserfs_xattr_init(s, *mount_flags); | 1464 | reiserfs_xattr_init(s, *mount_flags); |
1451 | } | 1465 | } |
@@ -1455,9 +1469,10 @@ out_ok: | |||
1455 | reiserfs_write_unlock(s); | 1469 | reiserfs_write_unlock(s); |
1456 | return 0; | 1470 | return 0; |
1457 | 1471 | ||
1472 | out_unlock: | ||
1473 | reiserfs_write_unlock(s); | ||
1458 | out_err: | 1474 | out_err: |
1459 | kfree(new_opts); | 1475 | kfree(new_opts); |
1460 | reiserfs_write_unlock(s); | ||
1461 | return err; | 1476 | return err; |
1462 | } | 1477 | } |
1463 | 1478 | ||
@@ -2095,13 +2110,15 @@ static int reiserfs_write_dquot(struct dquot *dquot) | |||
2095 | REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); | 2110 | REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); |
2096 | if (ret) | 2111 | if (ret) |
2097 | goto out; | 2112 | goto out; |
2113 | reiserfs_write_unlock(dquot->dq_sb); | ||
2098 | ret = dquot_commit(dquot); | 2114 | ret = dquot_commit(dquot); |
2115 | reiserfs_write_lock(dquot->dq_sb); | ||
2099 | err = | 2116 | err = |
2100 | journal_end(&th, dquot->dq_sb, | 2117 | journal_end(&th, dquot->dq_sb, |
2101 | REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); | 2118 | REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); |
2102 | if (!ret && err) | 2119 | if (!ret && err) |
2103 | ret = err; | 2120 | ret = err; |
2104 | out: | 2121 | out: |
2105 | reiserfs_write_unlock(dquot->dq_sb); | 2122 | reiserfs_write_unlock(dquot->dq_sb); |
2106 | return ret; | 2123 | return ret; |
2107 | } | 2124 | } |
@@ -2117,13 +2134,15 @@ static int reiserfs_acquire_dquot(struct dquot *dquot) | |||
2117 | REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); | 2134 | REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); |
2118 | if (ret) | 2135 | if (ret) |
2119 | goto out; | 2136 | goto out; |
2137 | reiserfs_write_unlock(dquot->dq_sb); | ||
2120 | ret = dquot_acquire(dquot); | 2138 | ret = dquot_acquire(dquot); |
2139 | reiserfs_write_lock(dquot->dq_sb); | ||
2121 | err = | 2140 | err = |
2122 | journal_end(&th, dquot->dq_sb, | 2141 | journal_end(&th, dquot->dq_sb, |
2123 | REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); | 2142 | REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); |
2124 | if (!ret && err) | 2143 | if (!ret && err) |
2125 | ret = err; | 2144 | ret = err; |
2126 | out: | 2145 | out: |
2127 | reiserfs_write_unlock(dquot->dq_sb); | 2146 | reiserfs_write_unlock(dquot->dq_sb); |
2128 | return ret; | 2147 | return ret; |
2129 | } | 2148 | } |
@@ -2137,19 +2156,21 @@ static int reiserfs_release_dquot(struct dquot *dquot) | |||
2137 | ret = | 2156 | ret = |
2138 | journal_begin(&th, dquot->dq_sb, | 2157 | journal_begin(&th, dquot->dq_sb, |
2139 | REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); | 2158 | REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); |
2159 | reiserfs_write_unlock(dquot->dq_sb); | ||
2140 | if (ret) { | 2160 | if (ret) { |
2141 | /* Release dquot anyway to avoid endless cycle in dqput() */ | 2161 | /* Release dquot anyway to avoid endless cycle in dqput() */ |
2142 | dquot_release(dquot); | 2162 | dquot_release(dquot); |
2143 | goto out; | 2163 | goto out; |
2144 | } | 2164 | } |
2145 | ret = dquot_release(dquot); | 2165 | ret = dquot_release(dquot); |
2166 | reiserfs_write_lock(dquot->dq_sb); | ||
2146 | err = | 2167 | err = |
2147 | journal_end(&th, dquot->dq_sb, | 2168 | journal_end(&th, dquot->dq_sb, |
2148 | REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); | 2169 | REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); |
2149 | if (!ret && err) | 2170 | if (!ret && err) |
2150 | ret = err; | 2171 | ret = err; |
2151 | out: | ||
2152 | reiserfs_write_unlock(dquot->dq_sb); | 2172 | reiserfs_write_unlock(dquot->dq_sb); |
2173 | out: | ||
2153 | return ret; | 2174 | return ret; |
2154 | } | 2175 | } |
2155 | 2176 | ||
@@ -2174,11 +2195,13 @@ static int reiserfs_write_info(struct super_block *sb, int type) | |||
2174 | ret = journal_begin(&th, sb, 2); | 2195 | ret = journal_begin(&th, sb, 2); |
2175 | if (ret) | 2196 | if (ret) |
2176 | goto out; | 2197 | goto out; |
2198 | reiserfs_write_unlock(sb); | ||
2177 | ret = dquot_commit_info(sb, type); | 2199 | ret = dquot_commit_info(sb, type); |
2200 | reiserfs_write_lock(sb); | ||
2178 | err = journal_end(&th, sb, 2); | 2201 | err = journal_end(&th, sb, 2); |
2179 | if (!ret && err) | 2202 | if (!ret && err) |
2180 | ret = err; | 2203 | ret = err; |
2181 | out: | 2204 | out: |
2182 | reiserfs_write_unlock(sb); | 2205 | reiserfs_write_unlock(sb); |
2183 | return ret; | 2206 | return ret; |
2184 | } | 2207 | } |
@@ -2203,8 +2226,11 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, | |||
2203 | struct reiserfs_transaction_handle th; | 2226 | struct reiserfs_transaction_handle th; |
2204 | int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA; | 2227 | int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA; |
2205 | 2228 | ||
2206 | if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) | 2229 | reiserfs_write_lock(sb); |
2207 | return -EINVAL; | 2230 | if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) { |
2231 | err = -EINVAL; | ||
2232 | goto out; | ||
2233 | } | ||
2208 | 2234 | ||
2209 | /* Quotafile not on the same filesystem? */ | 2235 | /* Quotafile not on the same filesystem? */ |
2210 | if (path->dentry->d_sb != sb) { | 2236 | if (path->dentry->d_sb != sb) { |
@@ -2246,8 +2272,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, | |||
2246 | if (err) | 2272 | if (err) |
2247 | goto out; | 2273 | goto out; |
2248 | } | 2274 | } |
2249 | err = dquot_quota_on(sb, type, format_id, path); | 2275 | reiserfs_write_unlock(sb); |
2276 | return dquot_quota_on(sb, type, format_id, path); | ||
2250 | out: | 2277 | out: |
2278 | reiserfs_write_unlock(sb); | ||
2251 | return err; | 2279 | return err; |
2252 | } | 2280 | } |
2253 | 2281 | ||
@@ -2320,7 +2348,9 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, | |||
2320 | tocopy = sb->s_blocksize - offset < towrite ? | 2348 | tocopy = sb->s_blocksize - offset < towrite ? |
2321 | sb->s_blocksize - offset : towrite; | 2349 | sb->s_blocksize - offset : towrite; |
2322 | tmp_bh.b_state = 0; | 2350 | tmp_bh.b_state = 0; |
2351 | reiserfs_write_lock(sb); | ||
2323 | err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE); | 2352 | err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE); |
2353 | reiserfs_write_unlock(sb); | ||
2324 | if (err) | 2354 | if (err) |
2325 | goto out; | 2355 | goto out; |
2326 | if (offset || tocopy != sb->s_blocksize) | 2356 | if (offset || tocopy != sb->s_blocksize) |
@@ -2336,10 +2366,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, | |||
2336 | flush_dcache_page(bh->b_page); | 2366 | flush_dcache_page(bh->b_page); |
2337 | set_buffer_uptodate(bh); | 2367 | set_buffer_uptodate(bh); |
2338 | unlock_buffer(bh); | 2368 | unlock_buffer(bh); |
2369 | reiserfs_write_lock(sb); | ||
2339 | reiserfs_prepare_for_journal(sb, bh, 1); | 2370 | reiserfs_prepare_for_journal(sb, bh, 1); |
2340 | journal_mark_dirty(current->journal_info, sb, bh); | 2371 | journal_mark_dirty(current->journal_info, sb, bh); |
2341 | if (!journal_quota) | 2372 | if (!journal_quota) |
2342 | reiserfs_add_ordered_list(inode, bh); | 2373 | reiserfs_add_ordered_list(inode, bh); |
2374 | reiserfs_write_unlock(sb); | ||
2343 | brelse(bh); | 2375 | brelse(bh); |
2344 | offset = 0; | 2376 | offset = 0; |
2345 | towrite -= tocopy; | 2377 | towrite -= tocopy; |
diff --git a/fs/splice.c b/fs/splice.c index 13e5b4776e7a..8890604e3fcd 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -1024,17 +1024,14 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
1024 | ret = sd.num_spliced; | 1024 | ret = sd.num_spliced; |
1025 | 1025 | ||
1026 | if (ret > 0) { | 1026 | if (ret > 0) { |
1027 | unsigned long nr_pages; | ||
1028 | int err; | 1027 | int err; |
1029 | 1028 | ||
1030 | nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
1031 | |||
1032 | err = generic_write_sync(out, *ppos, ret); | 1029 | err = generic_write_sync(out, *ppos, ret); |
1033 | if (err) | 1030 | if (err) |
1034 | ret = err; | 1031 | ret = err; |
1035 | else | 1032 | else |
1036 | *ppos += ret; | 1033 | *ppos += ret; |
1037 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); | 1034 | balance_dirty_pages_ratelimited(mapping); |
1038 | } | 1035 | } |
1039 | sb_end_write(inode->i_sb); | 1036 | sb_end_write(inode->i_sb); |
1040 | 1037 | ||
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 00012e31829d..602f56db0442 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -485,8 +485,8 @@ const struct file_operations sysfs_file_operations = { | |||
485 | .poll = sysfs_poll, | 485 | .poll = sysfs_poll, |
486 | }; | 486 | }; |
487 | 487 | ||
488 | int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr, | 488 | static int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr, |
489 | const void **pns) | 489 | const void **pns) |
490 | { | 490 | { |
491 | struct sysfs_dirent *dir_sd = kobj->sd; | 491 | struct sysfs_dirent *dir_sd = kobj->sd; |
492 | const struct sysfs_ops *ops; | 492 | const struct sysfs_ops *ops; |
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 28ec13af28d9..2dcf3d473fec 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c | |||
@@ -681,8 +681,16 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c) | |||
681 | if (!lprops) { | 681 | if (!lprops) { |
682 | lprops = ubifs_fast_find_freeable(c); | 682 | lprops = ubifs_fast_find_freeable(c); |
683 | if (!lprops) { | 683 | if (!lprops) { |
684 | ubifs_assert(c->freeable_cnt == 0); | 684 | /* |
685 | if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { | 685 | * The first condition means the following: go scan the |
686 | * LPT if there are uncategorized lprops, which means | ||
687 | * there may be freeable LEBs there (UBIFS does not | ||
688 | * store the information about freeable LEBs in the | ||
689 | * master node). | ||
690 | */ | ||
691 | if (c->in_a_category_cnt != c->main_lebs || | ||
692 | c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { | ||
693 | ubifs_assert(c->freeable_cnt == 0); | ||
686 | lprops = scan_for_leb_for_idx(c); | 694 | lprops = scan_for_leb_for_idx(c); |
687 | if (IS_ERR(lprops)) { | 695 | if (IS_ERR(lprops)) { |
688 | err = PTR_ERR(lprops); | 696 | err = PTR_ERR(lprops); |
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index e5a2a35a46dc..46190a7c42a6 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c | |||
@@ -300,8 +300,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, | |||
300 | default: | 300 | default: |
301 | ubifs_assert(0); | 301 | ubifs_assert(0); |
302 | } | 302 | } |
303 | |||
303 | lprops->flags &= ~LPROPS_CAT_MASK; | 304 | lprops->flags &= ~LPROPS_CAT_MASK; |
304 | lprops->flags |= cat; | 305 | lprops->flags |= cat; |
306 | c->in_a_category_cnt += 1; | ||
307 | ubifs_assert(c->in_a_category_cnt <= c->main_lebs); | ||
305 | } | 308 | } |
306 | 309 | ||
307 | /** | 310 | /** |
@@ -334,6 +337,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c, | |||
334 | default: | 337 | default: |
335 | ubifs_assert(0); | 338 | ubifs_assert(0); |
336 | } | 339 | } |
340 | |||
341 | c->in_a_category_cnt -= 1; | ||
342 | ubifs_assert(c->in_a_category_cnt >= 0); | ||
337 | } | 343 | } |
338 | 344 | ||
339 | /** | 345 | /** |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 5486346d0a3f..d133c276fe05 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -1183,6 +1183,8 @@ struct ubifs_debug_info; | |||
1183 | * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size) | 1183 | * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size) |
1184 | * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size) | 1184 | * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size) |
1185 | * @freeable_cnt: number of freeable LEBs in @freeable_list | 1185 | * @freeable_cnt: number of freeable LEBs in @freeable_list |
1186 | * @in_a_category_cnt: count of lprops which are in a certain category, which | ||
1187 | * basically meants that they were loaded from the flash | ||
1186 | * | 1188 | * |
1187 | * @ltab_lnum: LEB number of LPT's own lprops table | 1189 | * @ltab_lnum: LEB number of LPT's own lprops table |
1188 | * @ltab_offs: offset of LPT's own lprops table | 1190 | * @ltab_offs: offset of LPT's own lprops table |
@@ -1412,6 +1414,7 @@ struct ubifs_info { | |||
1412 | struct list_head freeable_list; | 1414 | struct list_head freeable_list; |
1413 | struct list_head frdi_idx_list; | 1415 | struct list_head frdi_idx_list; |
1414 | int freeable_cnt; | 1416 | int freeable_cnt; |
1417 | int in_a_category_cnt; | ||
1415 | 1418 | ||
1416 | int ltab_lnum; | 1419 | int ltab_lnum; |
1417 | int ltab_offs; | 1420 | int ltab_offs; |
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 6100ec0fa1d4..5a7ffe54f5d5 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig | |||
@@ -2,6 +2,7 @@ config XFS_FS | |||
2 | tristate "XFS filesystem support" | 2 | tristate "XFS filesystem support" |
3 | depends on BLOCK | 3 | depends on BLOCK |
4 | select EXPORTFS | 4 | select EXPORTFS |
5 | select LIBCRC32C | ||
5 | help | 6 | help |
6 | XFS is a high performance journaling filesystem which originated | 7 | XFS is a high performance journaling filesystem which originated |
7 | on the SGI IRIX platform. It is completely multi-threaded, can | 8 | on the SGI IRIX platform. It is completely multi-threaded, can |
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index d2bf974b1a2f..d02201df855b 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -37,9 +37,8 @@ xfs-y += xfs_aops.o \ | |||
37 | xfs_file.o \ | 37 | xfs_file.o \ |
38 | xfs_filestream.o \ | 38 | xfs_filestream.o \ |
39 | xfs_fsops.o \ | 39 | xfs_fsops.o \ |
40 | xfs_fs_subr.o \ | ||
41 | xfs_globals.o \ | 40 | xfs_globals.o \ |
42 | xfs_iget.o \ | 41 | xfs_icache.o \ |
43 | xfs_ioctl.o \ | 42 | xfs_ioctl.o \ |
44 | xfs_iomap.o \ | 43 | xfs_iomap.o \ |
45 | xfs_iops.o \ | 44 | xfs_iops.o \ |
@@ -47,7 +46,6 @@ xfs-y += xfs_aops.o \ | |||
47 | xfs_message.o \ | 46 | xfs_message.o \ |
48 | xfs_mru_cache.o \ | 47 | xfs_mru_cache.o \ |
49 | xfs_super.o \ | 48 | xfs_super.o \ |
50 | xfs_sync.o \ | ||
51 | xfs_xattr.o \ | 49 | xfs_xattr.o \ |
52 | xfs_rename.o \ | 50 | xfs_rename.o \ |
53 | xfs_utils.o \ | 51 | xfs_utils.o \ |
diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h index 4732d71262cc..104db0f3bed6 100644 --- a/fs/xfs/uuid.h +++ b/fs/xfs/uuid.h | |||
@@ -26,4 +26,10 @@ extern int uuid_is_nil(uuid_t *uuid); | |||
26 | extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); | 26 | extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); |
27 | extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]); | 27 | extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]); |
28 | 28 | ||
29 | static inline void | ||
30 | uuid_copy(uuid_t *dst, uuid_t *src) | ||
31 | { | ||
32 | memcpy(dst, src, sizeof(uuid_t)); | ||
33 | } | ||
34 | |||
29 | #endif /* __XFS_SUPPORT_UUID_H__ */ | 35 | #endif /* __XFS_SUPPORT_UUID_H__ */ |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 44d65c1533c0..f2aeedb6a579 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -108,6 +108,8 @@ typedef struct xfs_agf { | |||
108 | extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, | 108 | extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, |
109 | xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); | 109 | xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); |
110 | 110 | ||
111 | extern const struct xfs_buf_ops xfs_agf_buf_ops; | ||
112 | |||
111 | /* | 113 | /* |
112 | * Size of the unlinked inode hash table in the agi. | 114 | * Size of the unlinked inode hash table in the agi. |
113 | */ | 115 | */ |
@@ -161,6 +163,8 @@ typedef struct xfs_agi { | |||
161 | extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, | 163 | extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, |
162 | xfs_agnumber_t agno, struct xfs_buf **bpp); | 164 | xfs_agnumber_t agno, struct xfs_buf **bpp); |
163 | 165 | ||
166 | extern const struct xfs_buf_ops xfs_agi_buf_ops; | ||
167 | |||
164 | /* | 168 | /* |
165 | * The third a.g. block contains the a.g. freelist, an array | 169 | * The third a.g. block contains the a.g. freelist, an array |
166 | * of block pointers to blocks owned by the allocation btree code. | 170 | * of block pointers to blocks owned by the allocation btree code. |
@@ -233,6 +237,7 @@ typedef struct xfs_perag { | |||
233 | #define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup | 237 | #define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup |
234 | in xfs_inode_ag_iterator */ | 238 | in xfs_inode_ag_iterator */ |
235 | #define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ | 239 | #define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ |
240 | #define XFS_ICI_EOFBLOCKS_TAG 1 /* inode has blocks beyond EOF */ | ||
236 | 241 | ||
237 | #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) | 242 | #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) |
238 | #define XFS_MIN_FREELIST_RAW(bl,cl,mp) \ | 243 | #define XFS_MIN_FREELIST_RAW(bl,cl,mp) \ |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 335206a9c698..393055fe3aef 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -430,6 +430,60 @@ xfs_alloc_fixup_trees( | |||
430 | return 0; | 430 | return 0; |
431 | } | 431 | } |
432 | 432 | ||
433 | static void | ||
434 | xfs_agfl_verify( | ||
435 | struct xfs_buf *bp) | ||
436 | { | ||
437 | #ifdef WHEN_CRCS_COME_ALONG | ||
438 | /* | ||
439 | * we cannot actually do any verification of the AGFL because mkfs does | ||
440 | * not initialise the AGFL to zero or NULL. Hence the only valid part of | ||
441 | * the AGFL is what the AGF says is active. We can't get to the AGF, so | ||
442 | * we can't verify just those entries are valid. | ||
443 | * | ||
444 | * This problem goes away when the CRC format change comes along as that | ||
445 | * requires the AGFL to be initialised by mkfs. At that point, we can | ||
446 | * verify the blocks in the agfl -active or not- lie within the bounds | ||
447 | * of the AG. Until then, just leave this check ifdef'd out. | ||
448 | */ | ||
449 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
450 | struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); | ||
451 | int agfl_ok = 1; | ||
452 | |||
453 | int i; | ||
454 | |||
455 | for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { | ||
456 | if (be32_to_cpu(agfl->agfl_bno[i]) == NULLAGBLOCK || | ||
457 | be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) | ||
458 | agfl_ok = 0; | ||
459 | } | ||
460 | |||
461 | if (!agfl_ok) { | ||
462 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agfl); | ||
463 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
464 | } | ||
465 | #endif | ||
466 | } | ||
467 | |||
468 | static void | ||
469 | xfs_agfl_write_verify( | ||
470 | struct xfs_buf *bp) | ||
471 | { | ||
472 | xfs_agfl_verify(bp); | ||
473 | } | ||
474 | |||
475 | static void | ||
476 | xfs_agfl_read_verify( | ||
477 | struct xfs_buf *bp) | ||
478 | { | ||
479 | xfs_agfl_verify(bp); | ||
480 | } | ||
481 | |||
482 | const struct xfs_buf_ops xfs_agfl_buf_ops = { | ||
483 | .verify_read = xfs_agfl_read_verify, | ||
484 | .verify_write = xfs_agfl_write_verify, | ||
485 | }; | ||
486 | |||
433 | /* | 487 | /* |
434 | * Read in the allocation group free block array. | 488 | * Read in the allocation group free block array. |
435 | */ | 489 | */ |
@@ -447,7 +501,7 @@ xfs_alloc_read_agfl( | |||
447 | error = xfs_trans_read_buf( | 501 | error = xfs_trans_read_buf( |
448 | mp, tp, mp->m_ddev_targp, | 502 | mp, tp, mp->m_ddev_targp, |
449 | XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), | 503 | XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), |
450 | XFS_FSS_TO_BB(mp, 1), 0, &bp); | 504 | XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); |
451 | if (error) | 505 | if (error) |
452 | return error; | 506 | return error; |
453 | ASSERT(!xfs_buf_geterror(bp)); | 507 | ASSERT(!xfs_buf_geterror(bp)); |
@@ -2091,6 +2145,63 @@ xfs_alloc_put_freelist( | |||
2091 | return 0; | 2145 | return 0; |
2092 | } | 2146 | } |
2093 | 2147 | ||
2148 | static void | ||
2149 | xfs_agf_verify( | ||
2150 | struct xfs_buf *bp) | ||
2151 | { | ||
2152 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
2153 | struct xfs_agf *agf; | ||
2154 | int agf_ok; | ||
2155 | |||
2156 | agf = XFS_BUF_TO_AGF(bp); | ||
2157 | |||
2158 | agf_ok = agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && | ||
2159 | XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && | ||
2160 | be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && | ||
2161 | be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && | ||
2162 | be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && | ||
2163 | be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp); | ||
2164 | |||
2165 | /* | ||
2166 | * during growfs operations, the perag is not fully initialised, | ||
2167 | * so we can't use it for any useful checking. growfs ensures we can't | ||
2168 | * use it by using uncached buffers that don't have the perag attached | ||
2169 | * so we can detect and avoid this problem. | ||
2170 | */ | ||
2171 | if (bp->b_pag) | ||
2172 | agf_ok = agf_ok && be32_to_cpu(agf->agf_seqno) == | ||
2173 | bp->b_pag->pag_agno; | ||
2174 | |||
2175 | if (xfs_sb_version_haslazysbcount(&mp->m_sb)) | ||
2176 | agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <= | ||
2177 | be32_to_cpu(agf->agf_length); | ||
2178 | |||
2179 | if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, | ||
2180 | XFS_RANDOM_ALLOC_READ_AGF))) { | ||
2181 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agf); | ||
2182 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
2183 | } | ||
2184 | } | ||
2185 | |||
2186 | static void | ||
2187 | xfs_agf_read_verify( | ||
2188 | struct xfs_buf *bp) | ||
2189 | { | ||
2190 | xfs_agf_verify(bp); | ||
2191 | } | ||
2192 | |||
2193 | static void | ||
2194 | xfs_agf_write_verify( | ||
2195 | struct xfs_buf *bp) | ||
2196 | { | ||
2197 | xfs_agf_verify(bp); | ||
2198 | } | ||
2199 | |||
2200 | const struct xfs_buf_ops xfs_agf_buf_ops = { | ||
2201 | .verify_read = xfs_agf_read_verify, | ||
2202 | .verify_write = xfs_agf_write_verify, | ||
2203 | }; | ||
2204 | |||
2094 | /* | 2205 | /* |
2095 | * Read in the allocation group header (free/alloc section). | 2206 | * Read in the allocation group header (free/alloc section). |
2096 | */ | 2207 | */ |
@@ -2102,44 +2213,19 @@ xfs_read_agf( | |||
2102 | int flags, /* XFS_BUF_ */ | 2213 | int flags, /* XFS_BUF_ */ |
2103 | struct xfs_buf **bpp) /* buffer for the ag freelist header */ | 2214 | struct xfs_buf **bpp) /* buffer for the ag freelist header */ |
2104 | { | 2215 | { |
2105 | struct xfs_agf *agf; /* ag freelist header */ | ||
2106 | int agf_ok; /* set if agf is consistent */ | ||
2107 | int error; | 2216 | int error; |
2108 | 2217 | ||
2109 | ASSERT(agno != NULLAGNUMBER); | 2218 | ASSERT(agno != NULLAGNUMBER); |
2110 | error = xfs_trans_read_buf( | 2219 | error = xfs_trans_read_buf( |
2111 | mp, tp, mp->m_ddev_targp, | 2220 | mp, tp, mp->m_ddev_targp, |
2112 | XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), | 2221 | XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), |
2113 | XFS_FSS_TO_BB(mp, 1), flags, bpp); | 2222 | XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops); |
2114 | if (error) | 2223 | if (error) |
2115 | return error; | 2224 | return error; |
2116 | if (!*bpp) | 2225 | if (!*bpp) |
2117 | return 0; | 2226 | return 0; |
2118 | 2227 | ||
2119 | ASSERT(!(*bpp)->b_error); | 2228 | ASSERT(!(*bpp)->b_error); |
2120 | agf = XFS_BUF_TO_AGF(*bpp); | ||
2121 | |||
2122 | /* | ||
2123 | * Validate the magic number of the agf block. | ||
2124 | */ | ||
2125 | agf_ok = | ||
2126 | agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && | ||
2127 | XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && | ||
2128 | be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && | ||
2129 | be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && | ||
2130 | be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && | ||
2131 | be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp) && | ||
2132 | be32_to_cpu(agf->agf_seqno) == agno; | ||
2133 | if (xfs_sb_version_haslazysbcount(&mp->m_sb)) | ||
2134 | agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <= | ||
2135 | be32_to_cpu(agf->agf_length); | ||
2136 | if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, | ||
2137 | XFS_RANDOM_ALLOC_READ_AGF))) { | ||
2138 | XFS_CORRUPTION_ERROR("xfs_alloc_read_agf", | ||
2139 | XFS_ERRLEVEL_LOW, mp, agf); | ||
2140 | xfs_trans_brelse(tp, *bpp); | ||
2141 | return XFS_ERROR(EFSCORRUPTED); | ||
2142 | } | ||
2143 | xfs_buf_set_ref(*bpp, XFS_AGF_REF); | 2229 | xfs_buf_set_ref(*bpp, XFS_AGF_REF); |
2144 | return 0; | 2230 | return 0; |
2145 | } | 2231 | } |
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index feacb061bab7..99d0a6101558 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
@@ -231,4 +231,7 @@ xfs_alloc_get_rec( | |||
231 | xfs_extlen_t *len, /* output: length of extent */ | 231 | xfs_extlen_t *len, /* output: length of extent */ |
232 | int *stat); /* output: success/failure */ | 232 | int *stat); /* output: success/failure */ |
233 | 233 | ||
234 | extern const struct xfs_buf_ops xfs_agf_buf_ops; | ||
235 | extern const struct xfs_buf_ops xfs_agfl_buf_ops; | ||
236 | |||
234 | #endif /* __XFS_ALLOC_H__ */ | 237 | #endif /* __XFS_ALLOC_H__ */ |
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index f7876c6d6165..b1ddef6b2689 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c | |||
@@ -272,6 +272,82 @@ xfs_allocbt_key_diff( | |||
272 | return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; | 272 | return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; |
273 | } | 273 | } |
274 | 274 | ||
275 | static void | ||
276 | xfs_allocbt_verify( | ||
277 | struct xfs_buf *bp) | ||
278 | { | ||
279 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
280 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); | ||
281 | struct xfs_perag *pag = bp->b_pag; | ||
282 | unsigned int level; | ||
283 | int sblock_ok; /* block passes checks */ | ||
284 | |||
285 | /* | ||
286 | * magic number and level verification | ||
287 | * | ||
288 | * During growfs operations, we can't verify the exact level as the | ||
289 | * perag is not fully initialised and hence not attached to the buffer. | ||
290 | * In this case, check against the maximum tree depth. | ||
291 | */ | ||
292 | level = be16_to_cpu(block->bb_level); | ||
293 | switch (block->bb_magic) { | ||
294 | case cpu_to_be32(XFS_ABTB_MAGIC): | ||
295 | if (pag) | ||
296 | sblock_ok = level < pag->pagf_levels[XFS_BTNUM_BNOi]; | ||
297 | else | ||
298 | sblock_ok = level < mp->m_ag_maxlevels; | ||
299 | break; | ||
300 | case cpu_to_be32(XFS_ABTC_MAGIC): | ||
301 | if (pag) | ||
302 | sblock_ok = level < pag->pagf_levels[XFS_BTNUM_CNTi]; | ||
303 | else | ||
304 | sblock_ok = level < mp->m_ag_maxlevels; | ||
305 | break; | ||
306 | default: | ||
307 | sblock_ok = 0; | ||
308 | break; | ||
309 | } | ||
310 | |||
311 | /* numrecs verification */ | ||
312 | sblock_ok = sblock_ok && | ||
313 | be16_to_cpu(block->bb_numrecs) <= mp->m_alloc_mxr[level != 0]; | ||
314 | |||
315 | /* sibling pointer verification */ | ||
316 | sblock_ok = sblock_ok && | ||
317 | (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || | ||
318 | be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) && | ||
319 | block->bb_u.s.bb_leftsib && | ||
320 | (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || | ||
321 | be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) && | ||
322 | block->bb_u.s.bb_rightsib; | ||
323 | |||
324 | if (!sblock_ok) { | ||
325 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
326 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block); | ||
327 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
328 | } | ||
329 | } | ||
330 | |||
331 | static void | ||
332 | xfs_allocbt_read_verify( | ||
333 | struct xfs_buf *bp) | ||
334 | { | ||
335 | xfs_allocbt_verify(bp); | ||
336 | } | ||
337 | |||
338 | static void | ||
339 | xfs_allocbt_write_verify( | ||
340 | struct xfs_buf *bp) | ||
341 | { | ||
342 | xfs_allocbt_verify(bp); | ||
343 | } | ||
344 | |||
345 | const struct xfs_buf_ops xfs_allocbt_buf_ops = { | ||
346 | .verify_read = xfs_allocbt_read_verify, | ||
347 | .verify_write = xfs_allocbt_write_verify, | ||
348 | }; | ||
349 | |||
350 | |||
275 | #ifdef DEBUG | 351 | #ifdef DEBUG |
276 | STATIC int | 352 | STATIC int |
277 | xfs_allocbt_keys_inorder( | 353 | xfs_allocbt_keys_inorder( |
@@ -327,6 +403,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { | |||
327 | .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, | 403 | .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, |
328 | .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, | 404 | .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, |
329 | .key_diff = xfs_allocbt_key_diff, | 405 | .key_diff = xfs_allocbt_key_diff, |
406 | .buf_ops = &xfs_allocbt_buf_ops, | ||
330 | #ifdef DEBUG | 407 | #ifdef DEBUG |
331 | .keys_inorder = xfs_allocbt_keys_inorder, | 408 | .keys_inorder = xfs_allocbt_keys_inorder, |
332 | .recs_inorder = xfs_allocbt_recs_inorder, | 409 | .recs_inorder = xfs_allocbt_recs_inorder, |
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 359fb86ed876..7e89a2b429dd 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h | |||
@@ -93,4 +93,6 @@ extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, | |||
93 | xfs_agnumber_t, xfs_btnum_t); | 93 | xfs_agnumber_t, xfs_btnum_t); |
94 | extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); | 94 | extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); |
95 | 95 | ||
96 | extern const struct xfs_buf_ops xfs_allocbt_buf_ops; | ||
97 | |||
96 | #endif /* __XFS_ALLOC_BTREE_H__ */ | 98 | #endif /* __XFS_ALLOC_BTREE_H__ */ |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index e562dd43f41f..4111a40ebe1a 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -124,7 +124,7 @@ xfs_setfilesize_trans_alloc( | |||
124 | ioend->io_append_trans = tp; | 124 | ioend->io_append_trans = tp; |
125 | 125 | ||
126 | /* | 126 | /* |
127 | * We will pass freeze protection with a transaction. So tell lockdep | 127 | * We may pass freeze protection with a transaction. So tell lockdep |
128 | * we released it. | 128 | * we released it. |
129 | */ | 129 | */ |
130 | rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], | 130 | rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], |
@@ -149,11 +149,13 @@ xfs_setfilesize( | |||
149 | xfs_fsize_t isize; | 149 | xfs_fsize_t isize; |
150 | 150 | ||
151 | /* | 151 | /* |
152 | * The transaction was allocated in the I/O submission thread, | 152 | * The transaction may have been allocated in the I/O submission thread, |
153 | * thus we need to mark ourselves as beeing in a transaction | 153 | * thus we need to mark ourselves as beeing in a transaction manually. |
154 | * manually. | 154 | * Similarly for freeze protection. |
155 | */ | 155 | */ |
156 | current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); | 156 | current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); |
157 | rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
158 | 0, 1, _THIS_IP_); | ||
157 | 159 | ||
158 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 160 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
159 | isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); | 161 | isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); |
@@ -187,7 +189,8 @@ xfs_finish_ioend( | |||
187 | 189 | ||
188 | if (ioend->io_type == XFS_IO_UNWRITTEN) | 190 | if (ioend->io_type == XFS_IO_UNWRITTEN) |
189 | queue_work(mp->m_unwritten_workqueue, &ioend->io_work); | 191 | queue_work(mp->m_unwritten_workqueue, &ioend->io_work); |
190 | else if (ioend->io_append_trans) | 192 | else if (ioend->io_append_trans || |
193 | (ioend->io_isdirect && xfs_ioend_is_append(ioend))) | ||
191 | queue_work(mp->m_data_workqueue, &ioend->io_work); | 194 | queue_work(mp->m_data_workqueue, &ioend->io_work); |
192 | else | 195 | else |
193 | xfs_destroy_ioend(ioend); | 196 | xfs_destroy_ioend(ioend); |
@@ -205,15 +208,6 @@ xfs_end_io( | |||
205 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | 208 | struct xfs_inode *ip = XFS_I(ioend->io_inode); |
206 | int error = 0; | 209 | int error = 0; |
207 | 210 | ||
208 | if (ioend->io_append_trans) { | ||
209 | /* | ||
210 | * We've got freeze protection passed with the transaction. | ||
211 | * Tell lockdep about it. | ||
212 | */ | ||
213 | rwsem_acquire_read( | ||
214 | &ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
215 | 0, 1, _THIS_IP_); | ||
216 | } | ||
217 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 211 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
218 | ioend->io_error = -EIO; | 212 | ioend->io_error = -EIO; |
219 | goto done; | 213 | goto done; |
@@ -226,35 +220,31 @@ xfs_end_io( | |||
226 | * range to normal written extens after the data I/O has finished. | 220 | * range to normal written extens after the data I/O has finished. |
227 | */ | 221 | */ |
228 | if (ioend->io_type == XFS_IO_UNWRITTEN) { | 222 | if (ioend->io_type == XFS_IO_UNWRITTEN) { |
223 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, | ||
224 | ioend->io_size); | ||
225 | } else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) { | ||
229 | /* | 226 | /* |
230 | * For buffered I/O we never preallocate a transaction when | 227 | * For direct I/O we do not know if we need to allocate blocks |
231 | * doing the unwritten extent conversion, but for direct I/O | 228 | * or not so we can't preallocate an append transaction as that |
232 | * we do not know if we are converting an unwritten extent | 229 | * results in nested reservations and log space deadlocks. Hence |
233 | * or not at the point where we preallocate the transaction. | 230 | * allocate the transaction here. While this is sub-optimal and |
231 | * can block IO completion for some time, we're stuck with doing | ||
232 | * it this way until we can pass the ioend to the direct IO | ||
233 | * allocation callbacks and avoid nesting that way. | ||
234 | */ | 234 | */ |
235 | if (ioend->io_append_trans) { | 235 | error = xfs_setfilesize_trans_alloc(ioend); |
236 | ASSERT(ioend->io_isdirect); | 236 | if (error) |
237 | |||
238 | current_set_flags_nested( | ||
239 | &ioend->io_append_trans->t_pflags, PF_FSTRANS); | ||
240 | xfs_trans_cancel(ioend->io_append_trans, 0); | ||
241 | } | ||
242 | |||
243 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, | ||
244 | ioend->io_size); | ||
245 | if (error) { | ||
246 | ioend->io_error = -error; | ||
247 | goto done; | 237 | goto done; |
248 | } | 238 | error = xfs_setfilesize(ioend); |
249 | } else if (ioend->io_append_trans) { | 239 | } else if (ioend->io_append_trans) { |
250 | error = xfs_setfilesize(ioend); | 240 | error = xfs_setfilesize(ioend); |
251 | if (error) | ||
252 | ioend->io_error = -error; | ||
253 | } else { | 241 | } else { |
254 | ASSERT(!xfs_ioend_is_append(ioend)); | 242 | ASSERT(!xfs_ioend_is_append(ioend)); |
255 | } | 243 | } |
256 | 244 | ||
257 | done: | 245 | done: |
246 | if (error) | ||
247 | ioend->io_error = -error; | ||
258 | xfs_destroy_ioend(ioend); | 248 | xfs_destroy_ioend(ioend); |
259 | } | 249 | } |
260 | 250 | ||
@@ -481,11 +471,17 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) | |||
481 | * | 471 | * |
482 | * The fix is two passes across the ioend list - one to start writeback on the | 472 | * The fix is two passes across the ioend list - one to start writeback on the |
483 | * buffer_heads, and then submit them for I/O on the second pass. | 473 | * buffer_heads, and then submit them for I/O on the second pass. |
474 | * | ||
475 | * If @fail is non-zero, it means that we have a situation where some part of | ||
476 | * the submission process has failed after we have marked paged for writeback | ||
477 | * and unlocked them. In this situation, we need to fail the ioend chain rather | ||
478 | * than submit it to IO. This typically only happens on a filesystem shutdown. | ||
484 | */ | 479 | */ |
485 | STATIC void | 480 | STATIC void |
486 | xfs_submit_ioend( | 481 | xfs_submit_ioend( |
487 | struct writeback_control *wbc, | 482 | struct writeback_control *wbc, |
488 | xfs_ioend_t *ioend) | 483 | xfs_ioend_t *ioend, |
484 | int fail) | ||
489 | { | 485 | { |
490 | xfs_ioend_t *head = ioend; | 486 | xfs_ioend_t *head = ioend; |
491 | xfs_ioend_t *next; | 487 | xfs_ioend_t *next; |
@@ -506,6 +502,18 @@ xfs_submit_ioend( | |||
506 | next = ioend->io_list; | 502 | next = ioend->io_list; |
507 | bio = NULL; | 503 | bio = NULL; |
508 | 504 | ||
505 | /* | ||
506 | * If we are failing the IO now, just mark the ioend with an | ||
507 | * error and finish it. This will run IO completion immediately | ||
508 | * as there is only one reference to the ioend at this point in | ||
509 | * time. | ||
510 | */ | ||
511 | if (fail) { | ||
512 | ioend->io_error = -fail; | ||
513 | xfs_finish_ioend(ioend); | ||
514 | continue; | ||
515 | } | ||
516 | |||
509 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { | 517 | for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { |
510 | 518 | ||
511 | if (!bio) { | 519 | if (!bio) { |
@@ -1060,7 +1068,18 @@ xfs_vm_writepage( | |||
1060 | 1068 | ||
1061 | xfs_start_page_writeback(page, 1, count); | 1069 | xfs_start_page_writeback(page, 1, count); |
1062 | 1070 | ||
1063 | if (ioend && imap_valid) { | 1071 | /* if there is no IO to be submitted for this page, we are done */ |
1072 | if (!ioend) | ||
1073 | return 0; | ||
1074 | |||
1075 | ASSERT(iohead); | ||
1076 | |||
1077 | /* | ||
1078 | * Any errors from this point onwards need tobe reported through the IO | ||
1079 | * completion path as we have marked the initial page as under writeback | ||
1080 | * and unlocked it. | ||
1081 | */ | ||
1082 | if (imap_valid) { | ||
1064 | xfs_off_t end_index; | 1083 | xfs_off_t end_index; |
1065 | 1084 | ||
1066 | end_index = imap.br_startoff + imap.br_blockcount; | 1085 | end_index = imap.br_startoff + imap.br_blockcount; |
@@ -1079,20 +1098,15 @@ xfs_vm_writepage( | |||
1079 | wbc, end_index); | 1098 | wbc, end_index); |
1080 | } | 1099 | } |
1081 | 1100 | ||
1082 | if (iohead) { | ||
1083 | /* | ||
1084 | * Reserve log space if we might write beyond the on-disk | ||
1085 | * inode size. | ||
1086 | */ | ||
1087 | if (ioend->io_type != XFS_IO_UNWRITTEN && | ||
1088 | xfs_ioend_is_append(ioend)) { | ||
1089 | err = xfs_setfilesize_trans_alloc(ioend); | ||
1090 | if (err) | ||
1091 | goto error; | ||
1092 | } | ||
1093 | 1101 | ||
1094 | xfs_submit_ioend(wbc, iohead); | 1102 | /* |
1095 | } | 1103 | * Reserve log space if we might write beyond the on-disk inode size. |
1104 | */ | ||
1105 | err = 0; | ||
1106 | if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend)) | ||
1107 | err = xfs_setfilesize_trans_alloc(ioend); | ||
1108 | |||
1109 | xfs_submit_ioend(wbc, iohead, err); | ||
1096 | 1110 | ||
1097 | return 0; | 1111 | return 0; |
1098 | 1112 | ||
@@ -1408,25 +1422,21 @@ xfs_vm_direct_IO( | |||
1408 | size_t size = iov_length(iov, nr_segs); | 1422 | size_t size = iov_length(iov, nr_segs); |
1409 | 1423 | ||
1410 | /* | 1424 | /* |
1411 | * We need to preallocate a transaction for a size update | 1425 | * We cannot preallocate a size update transaction here as we |
1412 | * here. In the case that this write both updates the size | 1426 | * don't know whether allocation is necessary or not. Hence we |
1413 | * and converts at least on unwritten extent we will cancel | 1427 | * can only tell IO completion that one is necessary if we are |
1414 | * the still clean transaction after the I/O has finished. | 1428 | * not doing unwritten extent conversion. |
1415 | */ | 1429 | */ |
1416 | iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT); | 1430 | iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT); |
1417 | if (offset + size > XFS_I(inode)->i_d.di_size) { | 1431 | if (offset + size > XFS_I(inode)->i_d.di_size) |
1418 | ret = xfs_setfilesize_trans_alloc(ioend); | ||
1419 | if (ret) | ||
1420 | goto out_destroy_ioend; | ||
1421 | ioend->io_isdirect = 1; | 1432 | ioend->io_isdirect = 1; |
1422 | } | ||
1423 | 1433 | ||
1424 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, | 1434 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, |
1425 | offset, nr_segs, | 1435 | offset, nr_segs, |
1426 | xfs_get_blocks_direct, | 1436 | xfs_get_blocks_direct, |
1427 | xfs_end_io_direct_write, NULL, 0); | 1437 | xfs_end_io_direct_write, NULL, 0); |
1428 | if (ret != -EIOCBQUEUED && iocb->private) | 1438 | if (ret != -EIOCBQUEUED && iocb->private) |
1429 | goto out_trans_cancel; | 1439 | goto out_destroy_ioend; |
1430 | } else { | 1440 | } else { |
1431 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, | 1441 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, |
1432 | offset, nr_segs, | 1442 | offset, nr_segs, |
@@ -1436,15 +1446,6 @@ xfs_vm_direct_IO( | |||
1436 | 1446 | ||
1437 | return ret; | 1447 | return ret; |
1438 | 1448 | ||
1439 | out_trans_cancel: | ||
1440 | if (ioend->io_append_trans) { | ||
1441 | current_set_flags_nested(&ioend->io_append_trans->t_pflags, | ||
1442 | PF_FSTRANS); | ||
1443 | rwsem_acquire_read( | ||
1444 | &inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
1445 | 0, 1, _THIS_IP_); | ||
1446 | xfs_trans_cancel(ioend->io_append_trans, 0); | ||
1447 | } | ||
1448 | out_destroy_ioend: | 1449 | out_destroy_ioend: |
1449 | xfs_destroy_ioend(ioend); | 1450 | xfs_destroy_ioend(ioend); |
1450 | return ret; | 1451 | return ret; |
@@ -1617,7 +1618,7 @@ xfs_vm_bmap( | |||
1617 | 1618 | ||
1618 | trace_xfs_vm_bmap(XFS_I(inode)); | 1619 | trace_xfs_vm_bmap(XFS_I(inode)); |
1619 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 1620 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
1620 | xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); | 1621 | filemap_write_and_wait(mapping); |
1621 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 1622 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
1622 | return generic_block_bmap(mapping, block, xfs_get_blocks); | 1623 | return generic_block_bmap(mapping, block, xfs_get_blocks); |
1623 | } | 1624 | } |
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 0ca1f0be62d2..aaf472532b3c 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c | |||
@@ -903,11 +903,9 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) | |||
903 | */ | 903 | */ |
904 | dp = args->dp; | 904 | dp = args->dp; |
905 | args->blkno = 0; | 905 | args->blkno = 0; |
906 | error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp, | 906 | error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); |
907 | XFS_ATTR_FORK); | ||
908 | if (error) | 907 | if (error) |
909 | return(error); | 908 | return error; |
910 | ASSERT(bp != NULL); | ||
911 | 909 | ||
912 | /* | 910 | /* |
913 | * Look up the given attribute in the leaf block. Figure out if | 911 | * Look up the given attribute in the leaf block. Figure out if |
@@ -1031,12 +1029,12 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) | |||
1031 | * Read in the block containing the "old" attr, then | 1029 | * Read in the block containing the "old" attr, then |
1032 | * remove the "old" attr from that block (neat, huh!) | 1030 | * remove the "old" attr from that block (neat, huh!) |
1033 | */ | 1031 | */ |
1034 | error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, | 1032 | error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, |
1035 | &bp, XFS_ATTR_FORK); | 1033 | -1, &bp); |
1036 | if (error) | 1034 | if (error) |
1037 | return(error); | 1035 | return error; |
1038 | ASSERT(bp != NULL); | 1036 | |
1039 | (void)xfs_attr_leaf_remove(bp, args); | 1037 | xfs_attr_leaf_remove(bp, args); |
1040 | 1038 | ||
1041 | /* | 1039 | /* |
1042 | * If the result is small enough, shrink it all into the inode. | 1040 | * If the result is small enough, shrink it all into the inode. |
@@ -1100,20 +1098,17 @@ xfs_attr_leaf_removename(xfs_da_args_t *args) | |||
1100 | */ | 1098 | */ |
1101 | dp = args->dp; | 1099 | dp = args->dp; |
1102 | args->blkno = 0; | 1100 | args->blkno = 0; |
1103 | error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp, | 1101 | error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); |
1104 | XFS_ATTR_FORK); | 1102 | if (error) |
1105 | if (error) { | 1103 | return error; |
1106 | return(error); | ||
1107 | } | ||
1108 | 1104 | ||
1109 | ASSERT(bp != NULL); | ||
1110 | error = xfs_attr_leaf_lookup_int(bp, args); | 1105 | error = xfs_attr_leaf_lookup_int(bp, args); |
1111 | if (error == ENOATTR) { | 1106 | if (error == ENOATTR) { |
1112 | xfs_trans_brelse(args->trans, bp); | 1107 | xfs_trans_brelse(args->trans, bp); |
1113 | return(error); | 1108 | return(error); |
1114 | } | 1109 | } |
1115 | 1110 | ||
1116 | (void)xfs_attr_leaf_remove(bp, args); | 1111 | xfs_attr_leaf_remove(bp, args); |
1117 | 1112 | ||
1118 | /* | 1113 | /* |
1119 | * If the result is small enough, shrink it all into the inode. | 1114 | * If the result is small enough, shrink it all into the inode. |
@@ -1155,12 +1150,12 @@ xfs_attr_leaf_get(xfs_da_args_t *args) | |||
1155 | struct xfs_buf *bp; | 1150 | struct xfs_buf *bp; |
1156 | int error; | 1151 | int error; |
1157 | 1152 | ||
1153 | trace_xfs_attr_leaf_get(args); | ||
1154 | |||
1158 | args->blkno = 0; | 1155 | args->blkno = 0; |
1159 | error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp, | 1156 | error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); |
1160 | XFS_ATTR_FORK); | ||
1161 | if (error) | 1157 | if (error) |
1162 | return(error); | 1158 | return error; |
1163 | ASSERT(bp != NULL); | ||
1164 | 1159 | ||
1165 | error = xfs_attr_leaf_lookup_int(bp, args); | 1160 | error = xfs_attr_leaf_lookup_int(bp, args); |
1166 | if (error != EEXIST) { | 1161 | if (error != EEXIST) { |
@@ -1181,22 +1176,15 @@ xfs_attr_leaf_get(xfs_da_args_t *args) | |||
1181 | STATIC int | 1176 | STATIC int |
1182 | xfs_attr_leaf_list(xfs_attr_list_context_t *context) | 1177 | xfs_attr_leaf_list(xfs_attr_list_context_t *context) |
1183 | { | 1178 | { |
1184 | xfs_attr_leafblock_t *leaf; | ||
1185 | int error; | 1179 | int error; |
1186 | struct xfs_buf *bp; | 1180 | struct xfs_buf *bp; |
1187 | 1181 | ||
1182 | trace_xfs_attr_leaf_list(context); | ||
1183 | |||
1188 | context->cursor->blkno = 0; | 1184 | context->cursor->blkno = 0; |
1189 | error = xfs_da_read_buf(NULL, context->dp, 0, -1, &bp, XFS_ATTR_FORK); | 1185 | error = xfs_attr_leaf_read(NULL, context->dp, 0, -1, &bp); |
1190 | if (error) | 1186 | if (error) |
1191 | return XFS_ERROR(error); | 1187 | return XFS_ERROR(error); |
1192 | ASSERT(bp != NULL); | ||
1193 | leaf = bp->b_addr; | ||
1194 | if (unlikely(leaf->hdr.info.magic != cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) { | ||
1195 | XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW, | ||
1196 | context->dp->i_mount, leaf); | ||
1197 | xfs_trans_brelse(NULL, bp); | ||
1198 | return XFS_ERROR(EFSCORRUPTED); | ||
1199 | } | ||
1200 | 1188 | ||
1201 | error = xfs_attr_leaf_list_int(bp, context); | 1189 | error = xfs_attr_leaf_list_int(bp, context); |
1202 | xfs_trans_brelse(NULL, bp); | 1190 | xfs_trans_brelse(NULL, bp); |
@@ -1600,12 +1588,9 @@ xfs_attr_node_removename(xfs_da_args_t *args) | |||
1600 | ASSERT(state->path.blk[0].bp); | 1588 | ASSERT(state->path.blk[0].bp); |
1601 | state->path.blk[0].bp = NULL; | 1589 | state->path.blk[0].bp = NULL; |
1602 | 1590 | ||
1603 | error = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp, | 1591 | error = xfs_attr_leaf_read(args->trans, args->dp, 0, -1, &bp); |
1604 | XFS_ATTR_FORK); | ||
1605 | if (error) | 1592 | if (error) |
1606 | goto out; | 1593 | goto out; |
1607 | ASSERT((((xfs_attr_leafblock_t *)bp->b_addr)->hdr.info.magic) == | ||
1608 | cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); | ||
1609 | 1594 | ||
1610 | if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { | 1595 | if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { |
1611 | xfs_bmap_init(args->flist, args->firstblock); | 1596 | xfs_bmap_init(args->flist, args->firstblock); |
@@ -1653,6 +1638,8 @@ xfs_attr_fillstate(xfs_da_state_t *state) | |||
1653 | xfs_da_state_blk_t *blk; | 1638 | xfs_da_state_blk_t *blk; |
1654 | int level; | 1639 | int level; |
1655 | 1640 | ||
1641 | trace_xfs_attr_fillstate(state->args); | ||
1642 | |||
1656 | /* | 1643 | /* |
1657 | * Roll down the "path" in the state structure, storing the on-disk | 1644 | * Roll down the "path" in the state structure, storing the on-disk |
1658 | * block number for those buffers in the "path". | 1645 | * block number for those buffers in the "path". |
@@ -1699,6 +1686,8 @@ xfs_attr_refillstate(xfs_da_state_t *state) | |||
1699 | xfs_da_state_blk_t *blk; | 1686 | xfs_da_state_blk_t *blk; |
1700 | int level, error; | 1687 | int level, error; |
1701 | 1688 | ||
1689 | trace_xfs_attr_refillstate(state->args); | ||
1690 | |||
1702 | /* | 1691 | /* |
1703 | * Roll down the "path" in the state structure, storing the on-disk | 1692 | * Roll down the "path" in the state structure, storing the on-disk |
1704 | * block number for those buffers in the "path". | 1693 | * block number for those buffers in the "path". |
@@ -1707,7 +1696,7 @@ xfs_attr_refillstate(xfs_da_state_t *state) | |||
1707 | ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); | 1696 | ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); |
1708 | for (blk = path->blk, level = 0; level < path->active; blk++, level++) { | 1697 | for (blk = path->blk, level = 0; level < path->active; blk++, level++) { |
1709 | if (blk->disk_blkno) { | 1698 | if (blk->disk_blkno) { |
1710 | error = xfs_da_read_buf(state->args->trans, | 1699 | error = xfs_da_node_read(state->args->trans, |
1711 | state->args->dp, | 1700 | state->args->dp, |
1712 | blk->blkno, blk->disk_blkno, | 1701 | blk->blkno, blk->disk_blkno, |
1713 | &blk->bp, XFS_ATTR_FORK); | 1702 | &blk->bp, XFS_ATTR_FORK); |
@@ -1726,7 +1715,7 @@ xfs_attr_refillstate(xfs_da_state_t *state) | |||
1726 | ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); | 1715 | ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); |
1727 | for (blk = path->blk, level = 0; level < path->active; blk++, level++) { | 1716 | for (blk = path->blk, level = 0; level < path->active; blk++, level++) { |
1728 | if (blk->disk_blkno) { | 1717 | if (blk->disk_blkno) { |
1729 | error = xfs_da_read_buf(state->args->trans, | 1718 | error = xfs_da_node_read(state->args->trans, |
1730 | state->args->dp, | 1719 | state->args->dp, |
1731 | blk->blkno, blk->disk_blkno, | 1720 | blk->blkno, blk->disk_blkno, |
1732 | &blk->bp, XFS_ATTR_FORK); | 1721 | &blk->bp, XFS_ATTR_FORK); |
@@ -1755,6 +1744,8 @@ xfs_attr_node_get(xfs_da_args_t *args) | |||
1755 | int error, retval; | 1744 | int error, retval; |
1756 | int i; | 1745 | int i; |
1757 | 1746 | ||
1747 | trace_xfs_attr_node_get(args); | ||
1748 | |||
1758 | state = xfs_da_state_alloc(); | 1749 | state = xfs_da_state_alloc(); |
1759 | state->args = args; | 1750 | state->args = args; |
1760 | state->mp = args->dp->i_mount; | 1751 | state->mp = args->dp->i_mount; |
@@ -1804,6 +1795,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) | |||
1804 | int error, i; | 1795 | int error, i; |
1805 | struct xfs_buf *bp; | 1796 | struct xfs_buf *bp; |
1806 | 1797 | ||
1798 | trace_xfs_attr_node_list(context); | ||
1799 | |||
1807 | cursor = context->cursor; | 1800 | cursor = context->cursor; |
1808 | cursor->initted = 1; | 1801 | cursor->initted = 1; |
1809 | 1802 | ||
@@ -1814,7 +1807,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) | |||
1814 | */ | 1807 | */ |
1815 | bp = NULL; | 1808 | bp = NULL; |
1816 | if (cursor->blkno > 0) { | 1809 | if (cursor->blkno > 0) { |
1817 | error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1, | 1810 | error = xfs_da_node_read(NULL, context->dp, cursor->blkno, -1, |
1818 | &bp, XFS_ATTR_FORK); | 1811 | &bp, XFS_ATTR_FORK); |
1819 | if ((error != 0) && (error != EFSCORRUPTED)) | 1812 | if ((error != 0) && (error != EFSCORRUPTED)) |
1820 | return(error); | 1813 | return(error); |
@@ -1856,17 +1849,11 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) | |||
1856 | if (bp == NULL) { | 1849 | if (bp == NULL) { |
1857 | cursor->blkno = 0; | 1850 | cursor->blkno = 0; |
1858 | for (;;) { | 1851 | for (;;) { |
1859 | error = xfs_da_read_buf(NULL, context->dp, | 1852 | error = xfs_da_node_read(NULL, context->dp, |
1860 | cursor->blkno, -1, &bp, | 1853 | cursor->blkno, -1, &bp, |
1861 | XFS_ATTR_FORK); | 1854 | XFS_ATTR_FORK); |
1862 | if (error) | 1855 | if (error) |
1863 | return(error); | 1856 | return(error); |
1864 | if (unlikely(bp == NULL)) { | ||
1865 | XFS_ERROR_REPORT("xfs_attr_node_list(2)", | ||
1866 | XFS_ERRLEVEL_LOW, | ||
1867 | context->dp->i_mount); | ||
1868 | return(XFS_ERROR(EFSCORRUPTED)); | ||
1869 | } | ||
1870 | node = bp->b_addr; | 1857 | node = bp->b_addr; |
1871 | if (node->hdr.info.magic == | 1858 | if (node->hdr.info.magic == |
1872 | cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) | 1859 | cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) |
@@ -1907,14 +1894,6 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) | |||
1907 | */ | 1894 | */ |
1908 | for (;;) { | 1895 | for (;;) { |
1909 | leaf = bp->b_addr; | 1896 | leaf = bp->b_addr; |
1910 | if (unlikely(leaf->hdr.info.magic != | ||
1911 | cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) { | ||
1912 | XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)", | ||
1913 | XFS_ERRLEVEL_LOW, | ||
1914 | context->dp->i_mount, leaf); | ||
1915 | xfs_trans_brelse(NULL, bp); | ||
1916 | return(XFS_ERROR(EFSCORRUPTED)); | ||
1917 | } | ||
1918 | error = xfs_attr_leaf_list_int(bp, context); | 1897 | error = xfs_attr_leaf_list_int(bp, context); |
1919 | if (error) { | 1898 | if (error) { |
1920 | xfs_trans_brelse(NULL, bp); | 1899 | xfs_trans_brelse(NULL, bp); |
@@ -1924,16 +1903,10 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) | |||
1924 | break; | 1903 | break; |
1925 | cursor->blkno = be32_to_cpu(leaf->hdr.info.forw); | 1904 | cursor->blkno = be32_to_cpu(leaf->hdr.info.forw); |
1926 | xfs_trans_brelse(NULL, bp); | 1905 | xfs_trans_brelse(NULL, bp); |
1927 | error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1, | 1906 | error = xfs_attr_leaf_read(NULL, context->dp, cursor->blkno, -1, |
1928 | &bp, XFS_ATTR_FORK); | 1907 | &bp); |
1929 | if (error) | 1908 | if (error) |
1930 | return(error); | 1909 | return error; |
1931 | if (unlikely((bp == NULL))) { | ||
1932 | XFS_ERROR_REPORT("xfs_attr_node_list(5)", | ||
1933 | XFS_ERRLEVEL_LOW, | ||
1934 | context->dp->i_mount); | ||
1935 | return(XFS_ERROR(EFSCORRUPTED)); | ||
1936 | } | ||
1937 | } | 1910 | } |
1938 | xfs_trans_brelse(NULL, bp); | 1911 | xfs_trans_brelse(NULL, bp); |
1939 | return(0); | 1912 | return(0); |
@@ -1959,6 +1932,8 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) | |||
1959 | int nmap, error, tmp, valuelen, blkcnt, i; | 1932 | int nmap, error, tmp, valuelen, blkcnt, i; |
1960 | xfs_dablk_t lblkno; | 1933 | xfs_dablk_t lblkno; |
1961 | 1934 | ||
1935 | trace_xfs_attr_rmtval_get(args); | ||
1936 | |||
1962 | ASSERT(!(args->flags & ATTR_KERNOVAL)); | 1937 | ASSERT(!(args->flags & ATTR_KERNOVAL)); |
1963 | 1938 | ||
1964 | mp = args->dp->i_mount; | 1939 | mp = args->dp->i_mount; |
@@ -1980,7 +1955,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) | |||
1980 | dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); | 1955 | dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); |
1981 | blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); | 1956 | blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); |
1982 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, | 1957 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, |
1983 | dblkno, blkcnt, 0, &bp); | 1958 | dblkno, blkcnt, 0, &bp, NULL); |
1984 | if (error) | 1959 | if (error) |
1985 | return(error); | 1960 | return(error); |
1986 | 1961 | ||
@@ -2014,6 +1989,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) | |||
2014 | xfs_dablk_t lblkno; | 1989 | xfs_dablk_t lblkno; |
2015 | int blkcnt, valuelen, nmap, error, tmp, committed; | 1990 | int blkcnt, valuelen, nmap, error, tmp, committed; |
2016 | 1991 | ||
1992 | trace_xfs_attr_rmtval_set(args); | ||
1993 | |||
2017 | dp = args->dp; | 1994 | dp = args->dp; |
2018 | mp = dp->i_mount; | 1995 | mp = dp->i_mount; |
2019 | src = args->value; | 1996 | src = args->value; |
@@ -2143,6 +2120,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) | |||
2143 | xfs_dablk_t lblkno; | 2120 | xfs_dablk_t lblkno; |
2144 | int valuelen, blkcnt, nmap, error, done, committed; | 2121 | int valuelen, blkcnt, nmap, error, done, committed; |
2145 | 2122 | ||
2123 | trace_xfs_attr_rmtval_remove(args); | ||
2124 | |||
2146 | mp = args->dp->i_mount; | 2125 | mp = args->dp->i_mount; |
2147 | 2126 | ||
2148 | /* | 2127 | /* |
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index d330111ca738..ee24993c7d12 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
@@ -57,7 +57,8 @@ STATIC int xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t which_block, | |||
57 | struct xfs_buf **bpp); | 57 | struct xfs_buf **bpp); |
58 | STATIC int xfs_attr_leaf_add_work(struct xfs_buf *leaf_buffer, | 58 | STATIC int xfs_attr_leaf_add_work(struct xfs_buf *leaf_buffer, |
59 | xfs_da_args_t *args, int freemap_index); | 59 | xfs_da_args_t *args, int freemap_index); |
60 | STATIC void xfs_attr_leaf_compact(xfs_trans_t *tp, struct xfs_buf *leaf_buffer); | 60 | STATIC void xfs_attr_leaf_compact(struct xfs_da_args *args, |
61 | struct xfs_buf *leaf_buffer); | ||
61 | STATIC void xfs_attr_leaf_rebalance(xfs_da_state_t *state, | 62 | STATIC void xfs_attr_leaf_rebalance(xfs_da_state_t *state, |
62 | xfs_da_state_blk_t *blk1, | 63 | xfs_da_state_blk_t *blk1, |
63 | xfs_da_state_blk_t *blk2); | 64 | xfs_da_state_blk_t *blk2); |
@@ -87,6 +88,52 @@ STATIC void xfs_attr_leaf_moveents(xfs_attr_leafblock_t *src_leaf, | |||
87 | xfs_mount_t *mp); | 88 | xfs_mount_t *mp); |
88 | STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); | 89 | STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); |
89 | 90 | ||
91 | static void | ||
92 | xfs_attr_leaf_verify( | ||
93 | struct xfs_buf *bp) | ||
94 | { | ||
95 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
96 | struct xfs_attr_leaf_hdr *hdr = bp->b_addr; | ||
97 | int block_ok = 0; | ||
98 | |||
99 | block_ok = hdr->info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC); | ||
100 | if (!block_ok) { | ||
101 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); | ||
102 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
103 | } | ||
104 | } | ||
105 | |||
106 | static void | ||
107 | xfs_attr_leaf_read_verify( | ||
108 | struct xfs_buf *bp) | ||
109 | { | ||
110 | xfs_attr_leaf_verify(bp); | ||
111 | } | ||
112 | |||
113 | static void | ||
114 | xfs_attr_leaf_write_verify( | ||
115 | struct xfs_buf *bp) | ||
116 | { | ||
117 | xfs_attr_leaf_verify(bp); | ||
118 | } | ||
119 | |||
120 | const struct xfs_buf_ops xfs_attr_leaf_buf_ops = { | ||
121 | .verify_read = xfs_attr_leaf_read_verify, | ||
122 | .verify_write = xfs_attr_leaf_write_verify, | ||
123 | }; | ||
124 | |||
125 | int | ||
126 | xfs_attr_leaf_read( | ||
127 | struct xfs_trans *tp, | ||
128 | struct xfs_inode *dp, | ||
129 | xfs_dablk_t bno, | ||
130 | xfs_daddr_t mappedbno, | ||
131 | struct xfs_buf **bpp) | ||
132 | { | ||
133 | return xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, | ||
134 | XFS_ATTR_FORK, &xfs_attr_leaf_buf_ops); | ||
135 | } | ||
136 | |||
90 | /*======================================================================== | 137 | /*======================================================================== |
91 | * Namespace helper routines | 138 | * Namespace helper routines |
92 | *========================================================================*/ | 139 | *========================================================================*/ |
@@ -869,17 +916,16 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args) | |||
869 | error = xfs_da_grow_inode(args, &blkno); | 916 | error = xfs_da_grow_inode(args, &blkno); |
870 | if (error) | 917 | if (error) |
871 | goto out; | 918 | goto out; |
872 | error = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp1, | 919 | error = xfs_attr_leaf_read(args->trans, args->dp, 0, -1, &bp1); |
873 | XFS_ATTR_FORK); | ||
874 | if (error) | 920 | if (error) |
875 | goto out; | 921 | goto out; |
876 | ASSERT(bp1 != NULL); | 922 | |
877 | bp2 = NULL; | 923 | bp2 = NULL; |
878 | error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp2, | 924 | error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp2, |
879 | XFS_ATTR_FORK); | 925 | XFS_ATTR_FORK); |
880 | if (error) | 926 | if (error) |
881 | goto out; | 927 | goto out; |
882 | ASSERT(bp2 != NULL); | 928 | bp2->b_ops = bp1->b_ops; |
883 | memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(dp->i_mount)); | 929 | memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(dp->i_mount)); |
884 | bp1 = NULL; | 930 | bp1 = NULL; |
885 | xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1); | 931 | xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1); |
@@ -933,7 +979,7 @@ xfs_attr_leaf_create( | |||
933 | XFS_ATTR_FORK); | 979 | XFS_ATTR_FORK); |
934 | if (error) | 980 | if (error) |
935 | return(error); | 981 | return(error); |
936 | ASSERT(bp != NULL); | 982 | bp->b_ops = &xfs_attr_leaf_buf_ops; |
937 | leaf = bp->b_addr; | 983 | leaf = bp->b_addr; |
938 | memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount)); | 984 | memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount)); |
939 | hdr = &leaf->hdr; | 985 | hdr = &leaf->hdr; |
@@ -1071,7 +1117,7 @@ xfs_attr_leaf_add( | |||
1071 | * Compact the entries to coalesce free space. | 1117 | * Compact the entries to coalesce free space. |
1072 | * This may change the hdr->count via dropping INCOMPLETE entries. | 1118 | * This may change the hdr->count via dropping INCOMPLETE entries. |
1073 | */ | 1119 | */ |
1074 | xfs_attr_leaf_compact(args->trans, bp); | 1120 | xfs_attr_leaf_compact(args, bp); |
1075 | 1121 | ||
1076 | /* | 1122 | /* |
1077 | * After compaction, the block is guaranteed to have only one | 1123 | * After compaction, the block is guaranteed to have only one |
@@ -1102,6 +1148,8 @@ xfs_attr_leaf_add_work( | |||
1102 | xfs_mount_t *mp; | 1148 | xfs_mount_t *mp; |
1103 | int tmp, i; | 1149 | int tmp, i; |
1104 | 1150 | ||
1151 | trace_xfs_attr_leaf_add_work(args); | ||
1152 | |||
1105 | leaf = bp->b_addr; | 1153 | leaf = bp->b_addr; |
1106 | ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); | 1154 | ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); |
1107 | hdr = &leaf->hdr; | 1155 | hdr = &leaf->hdr; |
@@ -1214,15 +1262,17 @@ xfs_attr_leaf_add_work( | |||
1214 | */ | 1262 | */ |
1215 | STATIC void | 1263 | STATIC void |
1216 | xfs_attr_leaf_compact( | 1264 | xfs_attr_leaf_compact( |
1217 | struct xfs_trans *trans, | 1265 | struct xfs_da_args *args, |
1218 | struct xfs_buf *bp) | 1266 | struct xfs_buf *bp) |
1219 | { | 1267 | { |
1220 | xfs_attr_leafblock_t *leaf_s, *leaf_d; | 1268 | xfs_attr_leafblock_t *leaf_s, *leaf_d; |
1221 | xfs_attr_leaf_hdr_t *hdr_s, *hdr_d; | 1269 | xfs_attr_leaf_hdr_t *hdr_s, *hdr_d; |
1222 | xfs_mount_t *mp; | 1270 | struct xfs_trans *trans = args->trans; |
1223 | char *tmpbuffer; | 1271 | struct xfs_mount *mp = trans->t_mountp; |
1272 | char *tmpbuffer; | ||
1273 | |||
1274 | trace_xfs_attr_leaf_compact(args); | ||
1224 | 1275 | ||
1225 | mp = trans->t_mountp; | ||
1226 | tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP); | 1276 | tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP); |
1227 | ASSERT(tmpbuffer != NULL); | 1277 | ASSERT(tmpbuffer != NULL); |
1228 | memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp)); | 1278 | memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp)); |
@@ -1291,6 +1341,7 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, | |||
1291 | leaf2 = blk2->bp->b_addr; | 1341 | leaf2 = blk2->bp->b_addr; |
1292 | ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); | 1342 | ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); |
1293 | ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); | 1343 | ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); |
1344 | ASSERT(leaf2->hdr.count == 0); | ||
1294 | args = state->args; | 1345 | args = state->args; |
1295 | 1346 | ||
1296 | trace_xfs_attr_leaf_rebalance(args); | 1347 | trace_xfs_attr_leaf_rebalance(args); |
@@ -1344,9 +1395,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, | |||
1344 | max = be16_to_cpu(hdr2->firstused) | 1395 | max = be16_to_cpu(hdr2->firstused) |
1345 | - sizeof(xfs_attr_leaf_hdr_t); | 1396 | - sizeof(xfs_attr_leaf_hdr_t); |
1346 | max -= be16_to_cpu(hdr2->count) * sizeof(xfs_attr_leaf_entry_t); | 1397 | max -= be16_to_cpu(hdr2->count) * sizeof(xfs_attr_leaf_entry_t); |
1347 | if (space > max) { | 1398 | if (space > max) |
1348 | xfs_attr_leaf_compact(args->trans, blk2->bp); | 1399 | xfs_attr_leaf_compact(args, blk2->bp); |
1349 | } | ||
1350 | 1400 | ||
1351 | /* | 1401 | /* |
1352 | * Move high entries from leaf1 to low end of leaf2. | 1402 | * Move high entries from leaf1 to low end of leaf2. |
@@ -1361,6 +1411,7 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, | |||
1361 | * I assert that since all callers pass in an empty | 1411 | * I assert that since all callers pass in an empty |
1362 | * second buffer, this code should never execute. | 1412 | * second buffer, this code should never execute. |
1363 | */ | 1413 | */ |
1414 | ASSERT(0); | ||
1364 | 1415 | ||
1365 | /* | 1416 | /* |
1366 | * Figure the total bytes to be added to the destination leaf. | 1417 | * Figure the total bytes to be added to the destination leaf. |
@@ -1376,9 +1427,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, | |||
1376 | max = be16_to_cpu(hdr1->firstused) | 1427 | max = be16_to_cpu(hdr1->firstused) |
1377 | - sizeof(xfs_attr_leaf_hdr_t); | 1428 | - sizeof(xfs_attr_leaf_hdr_t); |
1378 | max -= be16_to_cpu(hdr1->count) * sizeof(xfs_attr_leaf_entry_t); | 1429 | max -= be16_to_cpu(hdr1->count) * sizeof(xfs_attr_leaf_entry_t); |
1379 | if (space > max) { | 1430 | if (space > max) |
1380 | xfs_attr_leaf_compact(args->trans, blk1->bp); | 1431 | xfs_attr_leaf_compact(args, blk1->bp); |
1381 | } | ||
1382 | 1432 | ||
1383 | /* | 1433 | /* |
1384 | * Move low entries from leaf2 to high end of leaf1. | 1434 | * Move low entries from leaf2 to high end of leaf1. |
@@ -1422,10 +1472,24 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, | |||
1422 | args->index2 = 0; | 1472 | args->index2 = 0; |
1423 | args->blkno2 = blk2->blkno; | 1473 | args->blkno2 = blk2->blkno; |
1424 | } else { | 1474 | } else { |
1475 | /* | ||
1476 | * On a double leaf split, the original attr location | ||
1477 | * is already stored in blkno2/index2, so don't | ||
1478 | * overwrite it overwise we corrupt the tree. | ||
1479 | */ | ||
1425 | blk2->index = blk1->index | 1480 | blk2->index = blk1->index |
1426 | - be16_to_cpu(leaf1->hdr.count); | 1481 | - be16_to_cpu(leaf1->hdr.count); |
1427 | args->index = args->index2 = blk2->index; | 1482 | args->index = blk2->index; |
1428 | args->blkno = args->blkno2 = blk2->blkno; | 1483 | args->blkno = blk2->blkno; |
1484 | if (!state->extravalid) { | ||
1485 | /* | ||
1486 | * set the new attr location to match the old | ||
1487 | * one and let the higher level split code | ||
1488 | * decide where in the leaf to place it. | ||
1489 | */ | ||
1490 | args->index2 = blk2->index; | ||
1491 | args->blkno2 = blk2->blkno; | ||
1492 | } | ||
1429 | } | 1493 | } |
1430 | } else { | 1494 | } else { |
1431 | ASSERT(state->inleaf == 1); | 1495 | ASSERT(state->inleaf == 1); |
@@ -1561,6 +1625,8 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) | |||
1561 | xfs_dablk_t blkno; | 1625 | xfs_dablk_t blkno; |
1562 | struct xfs_buf *bp; | 1626 | struct xfs_buf *bp; |
1563 | 1627 | ||
1628 | trace_xfs_attr_leaf_toosmall(state->args); | ||
1629 | |||
1564 | /* | 1630 | /* |
1565 | * Check for the degenerate case of the block being over 50% full. | 1631 | * Check for the degenerate case of the block being over 50% full. |
1566 | * If so, it's not worth even looking to see if we might be able | 1632 | * If so, it's not worth even looking to see if we might be able |
@@ -1620,18 +1686,16 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action) | |||
1620 | blkno = be32_to_cpu(info->back); | 1686 | blkno = be32_to_cpu(info->back); |
1621 | if (blkno == 0) | 1687 | if (blkno == 0) |
1622 | continue; | 1688 | continue; |
1623 | error = xfs_da_read_buf(state->args->trans, state->args->dp, | 1689 | error = xfs_attr_leaf_read(state->args->trans, state->args->dp, |
1624 | blkno, -1, &bp, XFS_ATTR_FORK); | 1690 | blkno, -1, &bp); |
1625 | if (error) | 1691 | if (error) |
1626 | return(error); | 1692 | return(error); |
1627 | ASSERT(bp != NULL); | ||
1628 | 1693 | ||
1629 | leaf = (xfs_attr_leafblock_t *)info; | 1694 | leaf = (xfs_attr_leafblock_t *)info; |
1630 | count = be16_to_cpu(leaf->hdr.count); | 1695 | count = be16_to_cpu(leaf->hdr.count); |
1631 | bytes = state->blocksize - (state->blocksize>>2); | 1696 | bytes = state->blocksize - (state->blocksize>>2); |
1632 | bytes -= be16_to_cpu(leaf->hdr.usedbytes); | 1697 | bytes -= be16_to_cpu(leaf->hdr.usedbytes); |
1633 | leaf = bp->b_addr; | 1698 | leaf = bp->b_addr; |
1634 | ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); | ||
1635 | count += be16_to_cpu(leaf->hdr.count); | 1699 | count += be16_to_cpu(leaf->hdr.count); |
1636 | bytes -= be16_to_cpu(leaf->hdr.usedbytes); | 1700 | bytes -= be16_to_cpu(leaf->hdr.usedbytes); |
1637 | bytes -= count * sizeof(xfs_attr_leaf_entry_t); | 1701 | bytes -= count * sizeof(xfs_attr_leaf_entry_t); |
@@ -1686,6 +1750,8 @@ xfs_attr_leaf_remove( | |||
1686 | int tablesize, tmp, i; | 1750 | int tablesize, tmp, i; |
1687 | xfs_mount_t *mp; | 1751 | xfs_mount_t *mp; |
1688 | 1752 | ||
1753 | trace_xfs_attr_leaf_remove(args); | ||
1754 | |||
1689 | leaf = bp->b_addr; | 1755 | leaf = bp->b_addr; |
1690 | ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); | 1756 | ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); |
1691 | hdr = &leaf->hdr; | 1757 | hdr = &leaf->hdr; |
@@ -2495,15 +2561,11 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) | |||
2495 | /* | 2561 | /* |
2496 | * Set up the operation. | 2562 | * Set up the operation. |
2497 | */ | 2563 | */ |
2498 | error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp, | 2564 | error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); |
2499 | XFS_ATTR_FORK); | 2565 | if (error) |
2500 | if (error) { | ||
2501 | return(error); | 2566 | return(error); |
2502 | } | ||
2503 | ASSERT(bp != NULL); | ||
2504 | 2567 | ||
2505 | leaf = bp->b_addr; | 2568 | leaf = bp->b_addr; |
2506 | ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); | ||
2507 | ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); | 2569 | ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); |
2508 | ASSERT(args->index >= 0); | 2570 | ASSERT(args->index >= 0); |
2509 | entry = &leaf->entries[ args->index ]; | 2571 | entry = &leaf->entries[ args->index ]; |
@@ -2560,15 +2622,11 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args) | |||
2560 | /* | 2622 | /* |
2561 | * Set up the operation. | 2623 | * Set up the operation. |
2562 | */ | 2624 | */ |
2563 | error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp, | 2625 | error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); |
2564 | XFS_ATTR_FORK); | 2626 | if (error) |
2565 | if (error) { | ||
2566 | return(error); | 2627 | return(error); |
2567 | } | ||
2568 | ASSERT(bp != NULL); | ||
2569 | 2628 | ||
2570 | leaf = bp->b_addr; | 2629 | leaf = bp->b_addr; |
2571 | ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); | ||
2572 | ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); | 2630 | ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); |
2573 | ASSERT(args->index >= 0); | 2631 | ASSERT(args->index >= 0); |
2574 | entry = &leaf->entries[ args->index ]; | 2632 | entry = &leaf->entries[ args->index ]; |
@@ -2617,35 +2675,28 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) | |||
2617 | /* | 2675 | /* |
2618 | * Read the block containing the "old" attr | 2676 | * Read the block containing the "old" attr |
2619 | */ | 2677 | */ |
2620 | error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp1, | 2678 | error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno, -1, &bp1); |
2621 | XFS_ATTR_FORK); | 2679 | if (error) |
2622 | if (error) { | 2680 | return error; |
2623 | return(error); | ||
2624 | } | ||
2625 | ASSERT(bp1 != NULL); | ||
2626 | 2681 | ||
2627 | /* | 2682 | /* |
2628 | * Read the block containing the "new" attr, if it is different | 2683 | * Read the block containing the "new" attr, if it is different |
2629 | */ | 2684 | */ |
2630 | if (args->blkno2 != args->blkno) { | 2685 | if (args->blkno2 != args->blkno) { |
2631 | error = xfs_da_read_buf(args->trans, args->dp, args->blkno2, | 2686 | error = xfs_attr_leaf_read(args->trans, args->dp, args->blkno2, |
2632 | -1, &bp2, XFS_ATTR_FORK); | 2687 | -1, &bp2); |
2633 | if (error) { | 2688 | if (error) |
2634 | return(error); | 2689 | return error; |
2635 | } | ||
2636 | ASSERT(bp2 != NULL); | ||
2637 | } else { | 2690 | } else { |
2638 | bp2 = bp1; | 2691 | bp2 = bp1; |
2639 | } | 2692 | } |
2640 | 2693 | ||
2641 | leaf1 = bp1->b_addr; | 2694 | leaf1 = bp1->b_addr; |
2642 | ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); | ||
2643 | ASSERT(args->index < be16_to_cpu(leaf1->hdr.count)); | 2695 | ASSERT(args->index < be16_to_cpu(leaf1->hdr.count)); |
2644 | ASSERT(args->index >= 0); | 2696 | ASSERT(args->index >= 0); |
2645 | entry1 = &leaf1->entries[ args->index ]; | 2697 | entry1 = &leaf1->entries[ args->index ]; |
2646 | 2698 | ||
2647 | leaf2 = bp2->b_addr; | 2699 | leaf2 = bp2->b_addr; |
2648 | ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); | ||
2649 | ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count)); | 2700 | ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count)); |
2650 | ASSERT(args->index2 >= 0); | 2701 | ASSERT(args->index2 >= 0); |
2651 | entry2 = &leaf2->entries[ args->index2 ]; | 2702 | entry2 = &leaf2->entries[ args->index2 ]; |
@@ -2730,7 +2781,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp) | |||
2730 | * the extents in reverse order the extent containing | 2781 | * the extents in reverse order the extent containing |
2731 | * block 0 must still be there. | 2782 | * block 0 must still be there. |
2732 | */ | 2783 | */ |
2733 | error = xfs_da_read_buf(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK); | 2784 | error = xfs_da_node_read(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK); |
2734 | if (error) | 2785 | if (error) |
2735 | return(error); | 2786 | return(error); |
2736 | blkno = XFS_BUF_ADDR(bp); | 2787 | blkno = XFS_BUF_ADDR(bp); |
@@ -2815,7 +2866,7 @@ xfs_attr_node_inactive( | |||
2815 | * traversal of the tree so we may deal with many blocks | 2866 | * traversal of the tree so we may deal with many blocks |
2816 | * before we come back to this one. | 2867 | * before we come back to this one. |
2817 | */ | 2868 | */ |
2818 | error = xfs_da_read_buf(*trans, dp, child_fsb, -2, &child_bp, | 2869 | error = xfs_da_node_read(*trans, dp, child_fsb, -2, &child_bp, |
2819 | XFS_ATTR_FORK); | 2870 | XFS_ATTR_FORK); |
2820 | if (error) | 2871 | if (error) |
2821 | return(error); | 2872 | return(error); |
@@ -2856,8 +2907,8 @@ xfs_attr_node_inactive( | |||
2856 | * child block number. | 2907 | * child block number. |
2857 | */ | 2908 | */ |
2858 | if ((i+1) < count) { | 2909 | if ((i+1) < count) { |
2859 | error = xfs_da_read_buf(*trans, dp, 0, parent_blkno, | 2910 | error = xfs_da_node_read(*trans, dp, 0, parent_blkno, |
2860 | &bp, XFS_ATTR_FORK); | 2911 | &bp, XFS_ATTR_FORK); |
2861 | if (error) | 2912 | if (error) |
2862 | return(error); | 2913 | return(error); |
2863 | child_fsb = be32_to_cpu(node->btree[i+1].before); | 2914 | child_fsb = be32_to_cpu(node->btree[i+1].before); |
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h index dea17722945e..77de139a58f0 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/xfs_attr_leaf.h | |||
@@ -261,4 +261,10 @@ int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp, | |||
261 | struct xfs_buf *leaf2_bp); | 261 | struct xfs_buf *leaf2_bp); |
262 | int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, | 262 | int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, |
263 | int *local); | 263 | int *local); |
264 | int xfs_attr_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp, | ||
265 | xfs_dablk_t bno, xfs_daddr_t mappedbno, | ||
266 | struct xfs_buf **bpp); | ||
267 | |||
268 | extern const struct xfs_buf_ops xfs_attr_leaf_buf_ops; | ||
269 | |||
264 | #endif /* __XFS_ATTR_LEAF_H__ */ | 270 | #endif /* __XFS_ATTR_LEAF_H__ */ |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 83d0cf3df930..0e92d12765d2 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -2662,8 +2662,9 @@ xfs_bmap_btree_to_extents( | |||
2662 | if ((error = xfs_btree_check_lptr(cur, cbno, 1))) | 2662 | if ((error = xfs_btree_check_lptr(cur, cbno, 1))) |
2663 | return error; | 2663 | return error; |
2664 | #endif | 2664 | #endif |
2665 | if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, | 2665 | error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, |
2666 | XFS_BMAP_BTREE_REF))) | 2666 | &xfs_bmbt_buf_ops); |
2667 | if (error) | ||
2667 | return error; | 2668 | return error; |
2668 | cblock = XFS_BUF_TO_BLOCK(cbp); | 2669 | cblock = XFS_BUF_TO_BLOCK(cbp); |
2669 | if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) | 2670 | if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) |
@@ -3123,6 +3124,7 @@ xfs_bmap_extents_to_btree( | |||
3123 | /* | 3124 | /* |
3124 | * Fill in the child block. | 3125 | * Fill in the child block. |
3125 | */ | 3126 | */ |
3127 | abp->b_ops = &xfs_bmbt_buf_ops; | ||
3126 | ablock = XFS_BUF_TO_BLOCK(abp); | 3128 | ablock = XFS_BUF_TO_BLOCK(abp); |
3127 | ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); | 3129 | ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); |
3128 | ablock->bb_level = 0; | 3130 | ablock->bb_level = 0; |
@@ -3269,6 +3271,7 @@ xfs_bmap_local_to_extents( | |||
3269 | ASSERT(args.len == 1); | 3271 | ASSERT(args.len == 1); |
3270 | *firstblock = args.fsbno; | 3272 | *firstblock = args.fsbno; |
3271 | bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); | 3273 | bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); |
3274 | bp->b_ops = &xfs_bmbt_buf_ops; | ||
3272 | memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); | 3275 | memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); |
3273 | xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); | 3276 | xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); |
3274 | xfs_bmap_forkoff_reset(args.mp, ip, whichfork); | 3277 | xfs_bmap_forkoff_reset(args.mp, ip, whichfork); |
@@ -4078,8 +4081,9 @@ xfs_bmap_read_extents( | |||
4078 | * pointer (leftmost) at each level. | 4081 | * pointer (leftmost) at each level. |
4079 | */ | 4082 | */ |
4080 | while (level-- > 0) { | 4083 | while (level-- > 0) { |
4081 | if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, | 4084 | error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, |
4082 | XFS_BMAP_BTREE_REF))) | 4085 | XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); |
4086 | if (error) | ||
4083 | return error; | 4087 | return error; |
4084 | block = XFS_BUF_TO_BLOCK(bp); | 4088 | block = XFS_BUF_TO_BLOCK(bp); |
4085 | XFS_WANT_CORRUPTED_GOTO( | 4089 | XFS_WANT_CORRUPTED_GOTO( |
@@ -4124,7 +4128,8 @@ xfs_bmap_read_extents( | |||
4124 | */ | 4128 | */ |
4125 | nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); | 4129 | nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); |
4126 | if (nextbno != NULLFSBLOCK) | 4130 | if (nextbno != NULLFSBLOCK) |
4127 | xfs_btree_reada_bufl(mp, nextbno, 1); | 4131 | xfs_btree_reada_bufl(mp, nextbno, 1, |
4132 | &xfs_bmbt_buf_ops); | ||
4128 | /* | 4133 | /* |
4129 | * Copy records into the extent records. | 4134 | * Copy records into the extent records. |
4130 | */ | 4135 | */ |
@@ -4156,8 +4161,9 @@ xfs_bmap_read_extents( | |||
4156 | */ | 4161 | */ |
4157 | if (bno == NULLFSBLOCK) | 4162 | if (bno == NULLFSBLOCK) |
4158 | break; | 4163 | break; |
4159 | if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, | 4164 | error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, |
4160 | XFS_BMAP_BTREE_REF))) | 4165 | XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); |
4166 | if (error) | ||
4161 | return error; | 4167 | return error; |
4162 | block = XFS_BUF_TO_BLOCK(bp); | 4168 | block = XFS_BUF_TO_BLOCK(bp); |
4163 | } | 4169 | } |
@@ -5599,7 +5605,7 @@ xfs_getbmap( | |||
5599 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 5605 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
5600 | if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { | 5606 | if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { |
5601 | if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) { | 5607 | if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) { |
5602 | error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF); | 5608 | error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); |
5603 | if (error) | 5609 | if (error) |
5604 | goto out_unlock_iolock; | 5610 | goto out_unlock_iolock; |
5605 | } | 5611 | } |
@@ -5868,15 +5874,16 @@ xfs_bmap_check_leaf_extents( | |||
5868 | */ | 5874 | */ |
5869 | while (level-- > 0) { | 5875 | while (level-- > 0) { |
5870 | /* See if buf is in cur first */ | 5876 | /* See if buf is in cur first */ |
5877 | bp_release = 0; | ||
5871 | bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); | 5878 | bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); |
5872 | if (bp) { | 5879 | if (!bp) { |
5873 | bp_release = 0; | ||
5874 | } else { | ||
5875 | bp_release = 1; | 5880 | bp_release = 1; |
5881 | error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, | ||
5882 | XFS_BMAP_BTREE_REF, | ||
5883 | &xfs_bmbt_buf_ops); | ||
5884 | if (error) | ||
5885 | goto error_norelse; | ||
5876 | } | 5886 | } |
5877 | if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, | ||
5878 | XFS_BMAP_BTREE_REF))) | ||
5879 | goto error_norelse; | ||
5880 | block = XFS_BUF_TO_BLOCK(bp); | 5887 | block = XFS_BUF_TO_BLOCK(bp); |
5881 | XFS_WANT_CORRUPTED_GOTO( | 5888 | XFS_WANT_CORRUPTED_GOTO( |
5882 | xfs_bmap_sanity_check(mp, bp, level), | 5889 | xfs_bmap_sanity_check(mp, bp, level), |
@@ -5953,15 +5960,16 @@ xfs_bmap_check_leaf_extents( | |||
5953 | if (bno == NULLFSBLOCK) | 5960 | if (bno == NULLFSBLOCK) |
5954 | break; | 5961 | break; |
5955 | 5962 | ||
5963 | bp_release = 0; | ||
5956 | bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); | 5964 | bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); |
5957 | if (bp) { | 5965 | if (!bp) { |
5958 | bp_release = 0; | ||
5959 | } else { | ||
5960 | bp_release = 1; | 5966 | bp_release = 1; |
5967 | error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, | ||
5968 | XFS_BMAP_BTREE_REF, | ||
5969 | &xfs_bmbt_buf_ops); | ||
5970 | if (error) | ||
5971 | goto error_norelse; | ||
5961 | } | 5972 | } |
5962 | if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, | ||
5963 | XFS_BMAP_BTREE_REF))) | ||
5964 | goto error_norelse; | ||
5965 | block = XFS_BUF_TO_BLOCK(bp); | 5973 | block = XFS_BUF_TO_BLOCK(bp); |
5966 | } | 5974 | } |
5967 | if (bp_release) { | 5975 | if (bp_release) { |
@@ -6052,7 +6060,9 @@ xfs_bmap_count_tree( | |||
6052 | struct xfs_btree_block *block, *nextblock; | 6060 | struct xfs_btree_block *block, *nextblock; |
6053 | int numrecs; | 6061 | int numrecs; |
6054 | 6062 | ||
6055 | if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF))) | 6063 | error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, |
6064 | &xfs_bmbt_buf_ops); | ||
6065 | if (error) | ||
6056 | return error; | 6066 | return error; |
6057 | *count += 1; | 6067 | *count += 1; |
6058 | block = XFS_BUF_TO_BLOCK(bp); | 6068 | block = XFS_BUF_TO_BLOCK(bp); |
@@ -6061,8 +6071,10 @@ xfs_bmap_count_tree( | |||
6061 | /* Not at node above leaves, count this level of nodes */ | 6071 | /* Not at node above leaves, count this level of nodes */ |
6062 | nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); | 6072 | nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); |
6063 | while (nextbno != NULLFSBLOCK) { | 6073 | while (nextbno != NULLFSBLOCK) { |
6064 | if ((error = xfs_btree_read_bufl(mp, tp, nextbno, | 6074 | error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, |
6065 | 0, &nbp, XFS_BMAP_BTREE_REF))) | 6075 | XFS_BMAP_BTREE_REF, |
6076 | &xfs_bmbt_buf_ops); | ||
6077 | if (error) | ||
6066 | return error; | 6078 | return error; |
6067 | *count += 1; | 6079 | *count += 1; |
6068 | nextblock = XFS_BUF_TO_BLOCK(nbp); | 6080 | nextblock = XFS_BUF_TO_BLOCK(nbp); |
@@ -6091,8 +6103,10 @@ xfs_bmap_count_tree( | |||
6091 | if (nextbno == NULLFSBLOCK) | 6103 | if (nextbno == NULLFSBLOCK) |
6092 | break; | 6104 | break; |
6093 | bno = nextbno; | 6105 | bno = nextbno; |
6094 | if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, | 6106 | error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, |
6095 | XFS_BMAP_BTREE_REF))) | 6107 | XFS_BMAP_BTREE_REF, |
6108 | &xfs_bmbt_buf_ops); | ||
6109 | if (error) | ||
6096 | return error; | 6110 | return error; |
6097 | *count += 1; | 6111 | *count += 1; |
6098 | block = XFS_BUF_TO_BLOCK(bp); | 6112 | block = XFS_BUF_TO_BLOCK(bp); |
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 862084a47a7e..061b45cbe614 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include "xfs_bmap.h" | 36 | #include "xfs_bmap.h" |
37 | #include "xfs_error.h" | 37 | #include "xfs_error.h" |
38 | #include "xfs_quota.h" | 38 | #include "xfs_quota.h" |
39 | #include "xfs_trace.h" | ||
39 | 40 | ||
40 | /* | 41 | /* |
41 | * Determine the extent state. | 42 | * Determine the extent state. |
@@ -707,6 +708,67 @@ xfs_bmbt_key_diff( | |||
707 | cur->bc_rec.b.br_startoff; | 708 | cur->bc_rec.b.br_startoff; |
708 | } | 709 | } |
709 | 710 | ||
711 | static void | ||
712 | xfs_bmbt_verify( | ||
713 | struct xfs_buf *bp) | ||
714 | { | ||
715 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
716 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); | ||
717 | unsigned int level; | ||
718 | int lblock_ok; /* block passes checks */ | ||
719 | |||
720 | /* magic number and level verification. | ||
721 | * | ||
722 | * We don't know waht fork we belong to, so just verify that the level | ||
723 | * is less than the maximum of the two. Later checks will be more | ||
724 | * precise. | ||
725 | */ | ||
726 | level = be16_to_cpu(block->bb_level); | ||
727 | lblock_ok = block->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC) && | ||
728 | level < max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]); | ||
729 | |||
730 | /* numrecs verification */ | ||
731 | lblock_ok = lblock_ok && | ||
732 | be16_to_cpu(block->bb_numrecs) <= mp->m_bmap_dmxr[level != 0]; | ||
733 | |||
734 | /* sibling pointer verification */ | ||
735 | lblock_ok = lblock_ok && | ||
736 | block->bb_u.l.bb_leftsib && | ||
737 | (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || | ||
738 | XFS_FSB_SANITY_CHECK(mp, | ||
739 | be64_to_cpu(block->bb_u.l.bb_leftsib))) && | ||
740 | block->bb_u.l.bb_rightsib && | ||
741 | (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || | ||
742 | XFS_FSB_SANITY_CHECK(mp, | ||
743 | be64_to_cpu(block->bb_u.l.bb_rightsib))); | ||
744 | |||
745 | if (!lblock_ok) { | ||
746 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
747 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block); | ||
748 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
749 | } | ||
750 | } | ||
751 | |||
752 | static void | ||
753 | xfs_bmbt_read_verify( | ||
754 | struct xfs_buf *bp) | ||
755 | { | ||
756 | xfs_bmbt_verify(bp); | ||
757 | } | ||
758 | |||
759 | static void | ||
760 | xfs_bmbt_write_verify( | ||
761 | struct xfs_buf *bp) | ||
762 | { | ||
763 | xfs_bmbt_verify(bp); | ||
764 | } | ||
765 | |||
766 | const struct xfs_buf_ops xfs_bmbt_buf_ops = { | ||
767 | .verify_read = xfs_bmbt_read_verify, | ||
768 | .verify_write = xfs_bmbt_write_verify, | ||
769 | }; | ||
770 | |||
771 | |||
710 | #ifdef DEBUG | 772 | #ifdef DEBUG |
711 | STATIC int | 773 | STATIC int |
712 | xfs_bmbt_keys_inorder( | 774 | xfs_bmbt_keys_inorder( |
@@ -746,6 +808,7 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { | |||
746 | .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, | 808 | .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, |
747 | .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, | 809 | .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, |
748 | .key_diff = xfs_bmbt_key_diff, | 810 | .key_diff = xfs_bmbt_key_diff, |
811 | .buf_ops = &xfs_bmbt_buf_ops, | ||
749 | #ifdef DEBUG | 812 | #ifdef DEBUG |
750 | .keys_inorder = xfs_bmbt_keys_inorder, | 813 | .keys_inorder = xfs_bmbt_keys_inorder, |
751 | .recs_inorder = xfs_bmbt_recs_inorder, | 814 | .recs_inorder = xfs_bmbt_recs_inorder, |
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 0e66c4ea0f85..88469ca08696 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h | |||
@@ -236,5 +236,6 @@ extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf); | |||
236 | extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, | 236 | extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, |
237 | struct xfs_trans *, struct xfs_inode *, int); | 237 | struct xfs_trans *, struct xfs_inode *, int); |
238 | 238 | ||
239 | extern const struct xfs_buf_ops xfs_bmbt_buf_ops; | ||
239 | 240 | ||
240 | #endif /* __XFS_BMAP_BTREE_H__ */ | 241 | #endif /* __XFS_BMAP_BTREE_H__ */ |
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index e53e317b1582..db010408d701 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
@@ -266,9 +266,13 @@ xfs_btree_dup_cursor( | |||
266 | for (i = 0; i < new->bc_nlevels; i++) { | 266 | for (i = 0; i < new->bc_nlevels; i++) { |
267 | new->bc_ptrs[i] = cur->bc_ptrs[i]; | 267 | new->bc_ptrs[i] = cur->bc_ptrs[i]; |
268 | new->bc_ra[i] = cur->bc_ra[i]; | 268 | new->bc_ra[i] = cur->bc_ra[i]; |
269 | if ((bp = cur->bc_bufs[i])) { | 269 | bp = cur->bc_bufs[i]; |
270 | if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, | 270 | if (bp) { |
271 | XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) { | 271 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, |
272 | XFS_BUF_ADDR(bp), mp->m_bsize, | ||
273 | 0, &bp, | ||
274 | cur->bc_ops->buf_ops); | ||
275 | if (error) { | ||
272 | xfs_btree_del_cursor(new, error); | 276 | xfs_btree_del_cursor(new, error); |
273 | *ncur = NULL; | 277 | *ncur = NULL; |
274 | return error; | 278 | return error; |
@@ -609,25 +613,26 @@ xfs_btree_offsets( | |||
609 | * Get a buffer for the block, return it read in. | 613 | * Get a buffer for the block, return it read in. |
610 | * Long-form addressing. | 614 | * Long-form addressing. |
611 | */ | 615 | */ |
612 | int /* error */ | 616 | int |
613 | xfs_btree_read_bufl( | 617 | xfs_btree_read_bufl( |
614 | xfs_mount_t *mp, /* file system mount point */ | 618 | struct xfs_mount *mp, /* file system mount point */ |
615 | xfs_trans_t *tp, /* transaction pointer */ | 619 | struct xfs_trans *tp, /* transaction pointer */ |
616 | xfs_fsblock_t fsbno, /* file system block number */ | 620 | xfs_fsblock_t fsbno, /* file system block number */ |
617 | uint lock, /* lock flags for read_buf */ | 621 | uint lock, /* lock flags for read_buf */ |
618 | xfs_buf_t **bpp, /* buffer for fsbno */ | 622 | struct xfs_buf **bpp, /* buffer for fsbno */ |
619 | int refval) /* ref count value for buffer */ | 623 | int refval, /* ref count value for buffer */ |
620 | { | 624 | const struct xfs_buf_ops *ops) |
621 | xfs_buf_t *bp; /* return value */ | 625 | { |
626 | struct xfs_buf *bp; /* return value */ | ||
622 | xfs_daddr_t d; /* real disk block address */ | 627 | xfs_daddr_t d; /* real disk block address */ |
623 | int error; | 628 | int error; |
624 | 629 | ||
625 | ASSERT(fsbno != NULLFSBLOCK); | 630 | ASSERT(fsbno != NULLFSBLOCK); |
626 | d = XFS_FSB_TO_DADDR(mp, fsbno); | 631 | d = XFS_FSB_TO_DADDR(mp, fsbno); |
627 | if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, | 632 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, |
628 | mp->m_bsize, lock, &bp))) { | 633 | mp->m_bsize, lock, &bp, ops); |
634 | if (error) | ||
629 | return error; | 635 | return error; |
630 | } | ||
631 | ASSERT(!xfs_buf_geterror(bp)); | 636 | ASSERT(!xfs_buf_geterror(bp)); |
632 | if (bp) | 637 | if (bp) |
633 | xfs_buf_set_ref(bp, refval); | 638 | xfs_buf_set_ref(bp, refval); |
@@ -642,15 +647,16 @@ xfs_btree_read_bufl( | |||
642 | /* ARGSUSED */ | 647 | /* ARGSUSED */ |
643 | void | 648 | void |
644 | xfs_btree_reada_bufl( | 649 | xfs_btree_reada_bufl( |
645 | xfs_mount_t *mp, /* file system mount point */ | 650 | struct xfs_mount *mp, /* file system mount point */ |
646 | xfs_fsblock_t fsbno, /* file system block number */ | 651 | xfs_fsblock_t fsbno, /* file system block number */ |
647 | xfs_extlen_t count) /* count of filesystem blocks */ | 652 | xfs_extlen_t count, /* count of filesystem blocks */ |
653 | const struct xfs_buf_ops *ops) | ||
648 | { | 654 | { |
649 | xfs_daddr_t d; | 655 | xfs_daddr_t d; |
650 | 656 | ||
651 | ASSERT(fsbno != NULLFSBLOCK); | 657 | ASSERT(fsbno != NULLFSBLOCK); |
652 | d = XFS_FSB_TO_DADDR(mp, fsbno); | 658 | d = XFS_FSB_TO_DADDR(mp, fsbno); |
653 | xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count); | 659 | xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); |
654 | } | 660 | } |
655 | 661 | ||
656 | /* | 662 | /* |
@@ -660,17 +666,18 @@ xfs_btree_reada_bufl( | |||
660 | /* ARGSUSED */ | 666 | /* ARGSUSED */ |
661 | void | 667 | void |
662 | xfs_btree_reada_bufs( | 668 | xfs_btree_reada_bufs( |
663 | xfs_mount_t *mp, /* file system mount point */ | 669 | struct xfs_mount *mp, /* file system mount point */ |
664 | xfs_agnumber_t agno, /* allocation group number */ | 670 | xfs_agnumber_t agno, /* allocation group number */ |
665 | xfs_agblock_t agbno, /* allocation group block number */ | 671 | xfs_agblock_t agbno, /* allocation group block number */ |
666 | xfs_extlen_t count) /* count of filesystem blocks */ | 672 | xfs_extlen_t count, /* count of filesystem blocks */ |
673 | const struct xfs_buf_ops *ops) | ||
667 | { | 674 | { |
668 | xfs_daddr_t d; | 675 | xfs_daddr_t d; |
669 | 676 | ||
670 | ASSERT(agno != NULLAGNUMBER); | 677 | ASSERT(agno != NULLAGNUMBER); |
671 | ASSERT(agbno != NULLAGBLOCK); | 678 | ASSERT(agbno != NULLAGBLOCK); |
672 | d = XFS_AGB_TO_DADDR(mp, agno, agbno); | 679 | d = XFS_AGB_TO_DADDR(mp, agno, agbno); |
673 | xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count); | 680 | xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); |
674 | } | 681 | } |
675 | 682 | ||
676 | STATIC int | 683 | STATIC int |
@@ -684,12 +691,14 @@ xfs_btree_readahead_lblock( | |||
684 | xfs_dfsbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); | 691 | xfs_dfsbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); |
685 | 692 | ||
686 | if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) { | 693 | if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) { |
687 | xfs_btree_reada_bufl(cur->bc_mp, left, 1); | 694 | xfs_btree_reada_bufl(cur->bc_mp, left, 1, |
695 | cur->bc_ops->buf_ops); | ||
688 | rval++; | 696 | rval++; |
689 | } | 697 | } |
690 | 698 | ||
691 | if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) { | 699 | if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) { |
692 | xfs_btree_reada_bufl(cur->bc_mp, right, 1); | 700 | xfs_btree_reada_bufl(cur->bc_mp, right, 1, |
701 | cur->bc_ops->buf_ops); | ||
693 | rval++; | 702 | rval++; |
694 | } | 703 | } |
695 | 704 | ||
@@ -709,13 +718,13 @@ xfs_btree_readahead_sblock( | |||
709 | 718 | ||
710 | if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { | 719 | if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { |
711 | xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, | 720 | xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, |
712 | left, 1); | 721 | left, 1, cur->bc_ops->buf_ops); |
713 | rval++; | 722 | rval++; |
714 | } | 723 | } |
715 | 724 | ||
716 | if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { | 725 | if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { |
717 | xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, | 726 | xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, |
718 | right, 1); | 727 | right, 1, cur->bc_ops->buf_ops); |
719 | rval++; | 728 | rval++; |
720 | } | 729 | } |
721 | 730 | ||
@@ -853,18 +862,22 @@ xfs_btree_set_sibling( | |||
853 | } | 862 | } |
854 | } | 863 | } |
855 | 864 | ||
856 | STATIC void | 865 | void |
857 | xfs_btree_init_block( | 866 | xfs_btree_init_block( |
858 | struct xfs_btree_cur *cur, | 867 | struct xfs_mount *mp, |
859 | int level, | 868 | struct xfs_buf *bp, |
860 | int numrecs, | 869 | __u32 magic, |
861 | struct xfs_btree_block *new) /* new block */ | 870 | __u16 level, |
871 | __u16 numrecs, | ||
872 | unsigned int flags) | ||
862 | { | 873 | { |
863 | new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); | 874 | struct xfs_btree_block *new = XFS_BUF_TO_BLOCK(bp); |
875 | |||
876 | new->bb_magic = cpu_to_be32(magic); | ||
864 | new->bb_level = cpu_to_be16(level); | 877 | new->bb_level = cpu_to_be16(level); |
865 | new->bb_numrecs = cpu_to_be16(numrecs); | 878 | new->bb_numrecs = cpu_to_be16(numrecs); |
866 | 879 | ||
867 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { | 880 | if (flags & XFS_BTREE_LONG_PTRS) { |
868 | new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); | 881 | new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); |
869 | new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); | 882 | new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); |
870 | } else { | 883 | } else { |
@@ -873,6 +886,17 @@ xfs_btree_init_block( | |||
873 | } | 886 | } |
874 | } | 887 | } |
875 | 888 | ||
889 | STATIC void | ||
890 | xfs_btree_init_block_cur( | ||
891 | struct xfs_btree_cur *cur, | ||
892 | int level, | ||
893 | int numrecs, | ||
894 | struct xfs_buf *bp) | ||
895 | { | ||
896 | xfs_btree_init_block(cur->bc_mp, bp, xfs_magics[cur->bc_btnum], | ||
897 | level, numrecs, cur->bc_flags); | ||
898 | } | ||
899 | |||
876 | /* | 900 | /* |
877 | * Return true if ptr is the last record in the btree and | 901 | * Return true if ptr is the last record in the btree and |
878 | * we need to track updateѕ to this record. The decision | 902 | * we need to track updateѕ to this record. The decision |
@@ -972,6 +996,7 @@ xfs_btree_get_buf_block( | |||
972 | if (!*bpp) | 996 | if (!*bpp) |
973 | return ENOMEM; | 997 | return ENOMEM; |
974 | 998 | ||
999 | (*bpp)->b_ops = cur->bc_ops->buf_ops; | ||
975 | *block = XFS_BUF_TO_BLOCK(*bpp); | 1000 | *block = XFS_BUF_TO_BLOCK(*bpp); |
976 | return 0; | 1001 | return 0; |
977 | } | 1002 | } |
@@ -998,19 +1023,15 @@ xfs_btree_read_buf_block( | |||
998 | 1023 | ||
999 | d = xfs_btree_ptr_to_daddr(cur, ptr); | 1024 | d = xfs_btree_ptr_to_daddr(cur, ptr); |
1000 | error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, | 1025 | error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, |
1001 | mp->m_bsize, flags, bpp); | 1026 | mp->m_bsize, flags, bpp, |
1027 | cur->bc_ops->buf_ops); | ||
1002 | if (error) | 1028 | if (error) |
1003 | return error; | 1029 | return error; |
1004 | 1030 | ||
1005 | ASSERT(!xfs_buf_geterror(*bpp)); | 1031 | ASSERT(!xfs_buf_geterror(*bpp)); |
1006 | |||
1007 | xfs_btree_set_refs(cur, *bpp); | 1032 | xfs_btree_set_refs(cur, *bpp); |
1008 | *block = XFS_BUF_TO_BLOCK(*bpp); | 1033 | *block = XFS_BUF_TO_BLOCK(*bpp); |
1009 | 1034 | return 0; | |
1010 | error = xfs_btree_check_block(cur, *block, level, *bpp); | ||
1011 | if (error) | ||
1012 | xfs_trans_brelse(cur->bc_tp, *bpp); | ||
1013 | return error; | ||
1014 | } | 1035 | } |
1015 | 1036 | ||
1016 | /* | 1037 | /* |
@@ -2183,7 +2204,7 @@ xfs_btree_split( | |||
2183 | goto error0; | 2204 | goto error0; |
2184 | 2205 | ||
2185 | /* Fill in the btree header for the new right block. */ | 2206 | /* Fill in the btree header for the new right block. */ |
2186 | xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right); | 2207 | xfs_btree_init_block_cur(cur, xfs_btree_get_level(left), 0, rbp); |
2187 | 2208 | ||
2188 | /* | 2209 | /* |
2189 | * Split the entries between the old and the new block evenly. | 2210 | * Split the entries between the old and the new block evenly. |
@@ -2492,7 +2513,7 @@ xfs_btree_new_root( | |||
2492 | nptr = 2; | 2513 | nptr = 2; |
2493 | } | 2514 | } |
2494 | /* Fill in the new block's btree header and log it. */ | 2515 | /* Fill in the new block's btree header and log it. */ |
2495 | xfs_btree_init_block(cur, cur->bc_nlevels, 2, new); | 2516 | xfs_btree_init_block_cur(cur, cur->bc_nlevels, 2, nbp); |
2496 | xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); | 2517 | xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); |
2497 | ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && | 2518 | ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && |
2498 | !xfs_btree_ptr_is_null(cur, &rptr)); | 2519 | !xfs_btree_ptr_is_null(cur, &rptr)); |
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 5b240de104c0..f932897194eb 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h | |||
@@ -188,6 +188,8 @@ struct xfs_btree_ops { | |||
188 | __int64_t (*key_diff)(struct xfs_btree_cur *cur, | 188 | __int64_t (*key_diff)(struct xfs_btree_cur *cur, |
189 | union xfs_btree_key *key); | 189 | union xfs_btree_key *key); |
190 | 190 | ||
191 | const struct xfs_buf_ops *buf_ops; | ||
192 | |||
191 | #ifdef DEBUG | 193 | #ifdef DEBUG |
192 | /* check that k1 is lower than k2 */ | 194 | /* check that k1 is lower than k2 */ |
193 | int (*keys_inorder)(struct xfs_btree_cur *cur, | 195 | int (*keys_inorder)(struct xfs_btree_cur *cur, |
@@ -355,7 +357,8 @@ xfs_btree_read_bufl( | |||
355 | xfs_fsblock_t fsbno, /* file system block number */ | 357 | xfs_fsblock_t fsbno, /* file system block number */ |
356 | uint lock, /* lock flags for read_buf */ | 358 | uint lock, /* lock flags for read_buf */ |
357 | struct xfs_buf **bpp, /* buffer for fsbno */ | 359 | struct xfs_buf **bpp, /* buffer for fsbno */ |
358 | int refval);/* ref count value for buffer */ | 360 | int refval, /* ref count value for buffer */ |
361 | const struct xfs_buf_ops *ops); | ||
359 | 362 | ||
360 | /* | 363 | /* |
361 | * Read-ahead the block, don't wait for it, don't return a buffer. | 364 | * Read-ahead the block, don't wait for it, don't return a buffer. |
@@ -365,7 +368,8 @@ void /* error */ | |||
365 | xfs_btree_reada_bufl( | 368 | xfs_btree_reada_bufl( |
366 | struct xfs_mount *mp, /* file system mount point */ | 369 | struct xfs_mount *mp, /* file system mount point */ |
367 | xfs_fsblock_t fsbno, /* file system block number */ | 370 | xfs_fsblock_t fsbno, /* file system block number */ |
368 | xfs_extlen_t count); /* count of filesystem blocks */ | 371 | xfs_extlen_t count, /* count of filesystem blocks */ |
372 | const struct xfs_buf_ops *ops); | ||
369 | 373 | ||
370 | /* | 374 | /* |
371 | * Read-ahead the block, don't wait for it, don't return a buffer. | 375 | * Read-ahead the block, don't wait for it, don't return a buffer. |
@@ -376,8 +380,20 @@ xfs_btree_reada_bufs( | |||
376 | struct xfs_mount *mp, /* file system mount point */ | 380 | struct xfs_mount *mp, /* file system mount point */ |
377 | xfs_agnumber_t agno, /* allocation group number */ | 381 | xfs_agnumber_t agno, /* allocation group number */ |
378 | xfs_agblock_t agbno, /* allocation group block number */ | 382 | xfs_agblock_t agbno, /* allocation group block number */ |
379 | xfs_extlen_t count); /* count of filesystem blocks */ | 383 | xfs_extlen_t count, /* count of filesystem blocks */ |
384 | const struct xfs_buf_ops *ops); | ||
380 | 385 | ||
386 | /* | ||
387 | * Initialise a new btree block header | ||
388 | */ | ||
389 | void | ||
390 | xfs_btree_init_block( | ||
391 | struct xfs_mount *mp, | ||
392 | struct xfs_buf *bp, | ||
393 | __u32 magic, | ||
394 | __u16 level, | ||
395 | __u16 numrecs, | ||
396 | unsigned int flags); | ||
381 | 397 | ||
382 | /* | 398 | /* |
383 | * Common btree core entry points. | 399 | * Common btree core entry points. |
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 933b7930b863..26673a0b20e7 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -569,7 +569,9 @@ found: | |||
569 | */ | 569 | */ |
570 | if (bp->b_flags & XBF_STALE) { | 570 | if (bp->b_flags & XBF_STALE) { |
571 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); | 571 | ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); |
572 | ASSERT(bp->b_iodone == NULL); | ||
572 | bp->b_flags &= _XBF_KMEM | _XBF_PAGES; | 573 | bp->b_flags &= _XBF_KMEM | _XBF_PAGES; |
574 | bp->b_ops = NULL; | ||
573 | } | 575 | } |
574 | 576 | ||
575 | trace_xfs_buf_find(bp, flags, _RET_IP_); | 577 | trace_xfs_buf_find(bp, flags, _RET_IP_); |
@@ -654,7 +656,8 @@ xfs_buf_read_map( | |||
654 | struct xfs_buftarg *target, | 656 | struct xfs_buftarg *target, |
655 | struct xfs_buf_map *map, | 657 | struct xfs_buf_map *map, |
656 | int nmaps, | 658 | int nmaps, |
657 | xfs_buf_flags_t flags) | 659 | xfs_buf_flags_t flags, |
660 | const struct xfs_buf_ops *ops) | ||
658 | { | 661 | { |
659 | struct xfs_buf *bp; | 662 | struct xfs_buf *bp; |
660 | 663 | ||
@@ -666,6 +669,7 @@ xfs_buf_read_map( | |||
666 | 669 | ||
667 | if (!XFS_BUF_ISDONE(bp)) { | 670 | if (!XFS_BUF_ISDONE(bp)) { |
668 | XFS_STATS_INC(xb_get_read); | 671 | XFS_STATS_INC(xb_get_read); |
672 | bp->b_ops = ops; | ||
669 | _xfs_buf_read(bp, flags); | 673 | _xfs_buf_read(bp, flags); |
670 | } else if (flags & XBF_ASYNC) { | 674 | } else if (flags & XBF_ASYNC) { |
671 | /* | 675 | /* |
@@ -691,13 +695,14 @@ void | |||
691 | xfs_buf_readahead_map( | 695 | xfs_buf_readahead_map( |
692 | struct xfs_buftarg *target, | 696 | struct xfs_buftarg *target, |
693 | struct xfs_buf_map *map, | 697 | struct xfs_buf_map *map, |
694 | int nmaps) | 698 | int nmaps, |
699 | const struct xfs_buf_ops *ops) | ||
695 | { | 700 | { |
696 | if (bdi_read_congested(target->bt_bdi)) | 701 | if (bdi_read_congested(target->bt_bdi)) |
697 | return; | 702 | return; |
698 | 703 | ||
699 | xfs_buf_read_map(target, map, nmaps, | 704 | xfs_buf_read_map(target, map, nmaps, |
700 | XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); | 705 | XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD, ops); |
701 | } | 706 | } |
702 | 707 | ||
703 | /* | 708 | /* |
@@ -709,10 +714,10 @@ xfs_buf_read_uncached( | |||
709 | struct xfs_buftarg *target, | 714 | struct xfs_buftarg *target, |
710 | xfs_daddr_t daddr, | 715 | xfs_daddr_t daddr, |
711 | size_t numblks, | 716 | size_t numblks, |
712 | int flags) | 717 | int flags, |
718 | const struct xfs_buf_ops *ops) | ||
713 | { | 719 | { |
714 | xfs_buf_t *bp; | 720 | struct xfs_buf *bp; |
715 | int error; | ||
716 | 721 | ||
717 | bp = xfs_buf_get_uncached(target, numblks, flags); | 722 | bp = xfs_buf_get_uncached(target, numblks, flags); |
718 | if (!bp) | 723 | if (!bp) |
@@ -723,13 +728,10 @@ xfs_buf_read_uncached( | |||
723 | bp->b_bn = daddr; | 728 | bp->b_bn = daddr; |
724 | bp->b_maps[0].bm_bn = daddr; | 729 | bp->b_maps[0].bm_bn = daddr; |
725 | bp->b_flags |= XBF_READ; | 730 | bp->b_flags |= XBF_READ; |
731 | bp->b_ops = ops; | ||
726 | 732 | ||
727 | xfsbdstrat(target->bt_mount, bp); | 733 | xfsbdstrat(target->bt_mount, bp); |
728 | error = xfs_buf_iowait(bp); | 734 | xfs_buf_iowait(bp); |
729 | if (error) { | ||
730 | xfs_buf_relse(bp); | ||
731 | return NULL; | ||
732 | } | ||
733 | return bp; | 735 | return bp; |
734 | } | 736 | } |
735 | 737 | ||
@@ -999,27 +1001,37 @@ STATIC void | |||
999 | xfs_buf_iodone_work( | 1001 | xfs_buf_iodone_work( |
1000 | struct work_struct *work) | 1002 | struct work_struct *work) |
1001 | { | 1003 | { |
1002 | xfs_buf_t *bp = | 1004 | struct xfs_buf *bp = |
1003 | container_of(work, xfs_buf_t, b_iodone_work); | 1005 | container_of(work, xfs_buf_t, b_iodone_work); |
1006 | bool read = !!(bp->b_flags & XBF_READ); | ||
1007 | |||
1008 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); | ||
1009 | if (read && bp->b_ops) | ||
1010 | bp->b_ops->verify_read(bp); | ||
1004 | 1011 | ||
1005 | if (bp->b_iodone) | 1012 | if (bp->b_iodone) |
1006 | (*(bp->b_iodone))(bp); | 1013 | (*(bp->b_iodone))(bp); |
1007 | else if (bp->b_flags & XBF_ASYNC) | 1014 | else if (bp->b_flags & XBF_ASYNC) |
1008 | xfs_buf_relse(bp); | 1015 | xfs_buf_relse(bp); |
1016 | else { | ||
1017 | ASSERT(read && bp->b_ops); | ||
1018 | complete(&bp->b_iowait); | ||
1019 | } | ||
1009 | } | 1020 | } |
1010 | 1021 | ||
1011 | void | 1022 | void |
1012 | xfs_buf_ioend( | 1023 | xfs_buf_ioend( |
1013 | xfs_buf_t *bp, | 1024 | struct xfs_buf *bp, |
1014 | int schedule) | 1025 | int schedule) |
1015 | { | 1026 | { |
1027 | bool read = !!(bp->b_flags & XBF_READ); | ||
1028 | |||
1016 | trace_xfs_buf_iodone(bp, _RET_IP_); | 1029 | trace_xfs_buf_iodone(bp, _RET_IP_); |
1017 | 1030 | ||
1018 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); | ||
1019 | if (bp->b_error == 0) | 1031 | if (bp->b_error == 0) |
1020 | bp->b_flags |= XBF_DONE; | 1032 | bp->b_flags |= XBF_DONE; |
1021 | 1033 | ||
1022 | if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) { | 1034 | if (bp->b_iodone || (read && bp->b_ops) || (bp->b_flags & XBF_ASYNC)) { |
1023 | if (schedule) { | 1035 | if (schedule) { |
1024 | INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); | 1036 | INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); |
1025 | queue_work(xfslogd_workqueue, &bp->b_iodone_work); | 1037 | queue_work(xfslogd_workqueue, &bp->b_iodone_work); |
@@ -1027,6 +1039,7 @@ xfs_buf_ioend( | |||
1027 | xfs_buf_iodone_work(&bp->b_iodone_work); | 1039 | xfs_buf_iodone_work(&bp->b_iodone_work); |
1028 | } | 1040 | } |
1029 | } else { | 1041 | } else { |
1042 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); | ||
1030 | complete(&bp->b_iowait); | 1043 | complete(&bp->b_iowait); |
1031 | } | 1044 | } |
1032 | } | 1045 | } |
@@ -1197,9 +1210,14 @@ xfs_buf_bio_end_io( | |||
1197 | { | 1210 | { |
1198 | xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; | 1211 | xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; |
1199 | 1212 | ||
1200 | xfs_buf_ioerror(bp, -error); | 1213 | /* |
1214 | * don't overwrite existing errors - otherwise we can lose errors on | ||
1215 | * buffers that require multiple bios to complete. | ||
1216 | */ | ||
1217 | if (!bp->b_error) | ||
1218 | xfs_buf_ioerror(bp, -error); | ||
1201 | 1219 | ||
1202 | if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) | 1220 | if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) |
1203 | invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); | 1221 | invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); |
1204 | 1222 | ||
1205 | _xfs_buf_ioend(bp, 1); | 1223 | _xfs_buf_ioend(bp, 1); |
@@ -1279,6 +1297,11 @@ next_chunk: | |||
1279 | if (size) | 1297 | if (size) |
1280 | goto next_chunk; | 1298 | goto next_chunk; |
1281 | } else { | 1299 | } else { |
1300 | /* | ||
1301 | * This is guaranteed not to be the last io reference count | ||
1302 | * because the caller (xfs_buf_iorequest) holds a count itself. | ||
1303 | */ | ||
1304 | atomic_dec(&bp->b_io_remaining); | ||
1282 | xfs_buf_ioerror(bp, EIO); | 1305 | xfs_buf_ioerror(bp, EIO); |
1283 | bio_put(bio); | 1306 | bio_put(bio); |
1284 | } | 1307 | } |
@@ -1304,6 +1327,20 @@ _xfs_buf_ioapply( | |||
1304 | rw |= REQ_FUA; | 1327 | rw |= REQ_FUA; |
1305 | if (bp->b_flags & XBF_FLUSH) | 1328 | if (bp->b_flags & XBF_FLUSH) |
1306 | rw |= REQ_FLUSH; | 1329 | rw |= REQ_FLUSH; |
1330 | |||
1331 | /* | ||
1332 | * Run the write verifier callback function if it exists. If | ||
1333 | * this function fails it will mark the buffer with an error and | ||
1334 | * the IO should not be dispatched. | ||
1335 | */ | ||
1336 | if (bp->b_ops) { | ||
1337 | bp->b_ops->verify_write(bp); | ||
1338 | if (bp->b_error) { | ||
1339 | xfs_force_shutdown(bp->b_target->bt_mount, | ||
1340 | SHUTDOWN_CORRUPT_INCORE); | ||
1341 | return; | ||
1342 | } | ||
1343 | } | ||
1307 | } else if (bp->b_flags & XBF_READ_AHEAD) { | 1344 | } else if (bp->b_flags & XBF_READ_AHEAD) { |
1308 | rw = READA; | 1345 | rw = READA; |
1309 | } else { | 1346 | } else { |
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 7c0b6a0a1557..23f5642480bb 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
@@ -100,6 +100,7 @@ typedef struct xfs_buftarg { | |||
100 | struct xfs_buf; | 100 | struct xfs_buf; |
101 | typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); | 101 | typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); |
102 | 102 | ||
103 | |||
103 | #define XB_PAGES 2 | 104 | #define XB_PAGES 2 |
104 | 105 | ||
105 | struct xfs_buf_map { | 106 | struct xfs_buf_map { |
@@ -110,6 +111,11 @@ struct xfs_buf_map { | |||
110 | #define DEFINE_SINGLE_BUF_MAP(map, blkno, numblk) \ | 111 | #define DEFINE_SINGLE_BUF_MAP(map, blkno, numblk) \ |
111 | struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) }; | 112 | struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) }; |
112 | 113 | ||
114 | struct xfs_buf_ops { | ||
115 | void (*verify_read)(struct xfs_buf *); | ||
116 | void (*verify_write)(struct xfs_buf *); | ||
117 | }; | ||
118 | |||
113 | typedef struct xfs_buf { | 119 | typedef struct xfs_buf { |
114 | /* | 120 | /* |
115 | * first cacheline holds all the fields needed for an uncontended cache | 121 | * first cacheline holds all the fields needed for an uncontended cache |
@@ -153,13 +159,13 @@ typedef struct xfs_buf { | |||
153 | unsigned int b_page_count; /* size of page array */ | 159 | unsigned int b_page_count; /* size of page array */ |
154 | unsigned int b_offset; /* page offset in first page */ | 160 | unsigned int b_offset; /* page offset in first page */ |
155 | unsigned short b_error; /* error code on I/O */ | 161 | unsigned short b_error; /* error code on I/O */ |
162 | const struct xfs_buf_ops *b_ops; | ||
156 | 163 | ||
157 | #ifdef XFS_BUF_LOCK_TRACKING | 164 | #ifdef XFS_BUF_LOCK_TRACKING |
158 | int b_last_holder; | 165 | int b_last_holder; |
159 | #endif | 166 | #endif |
160 | } xfs_buf_t; | 167 | } xfs_buf_t; |
161 | 168 | ||
162 | |||
163 | /* Finding and Reading Buffers */ | 169 | /* Finding and Reading Buffers */ |
164 | struct xfs_buf *_xfs_buf_find(struct xfs_buftarg *target, | 170 | struct xfs_buf *_xfs_buf_find(struct xfs_buftarg *target, |
165 | struct xfs_buf_map *map, int nmaps, | 171 | struct xfs_buf_map *map, int nmaps, |
@@ -196,9 +202,11 @@ struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target, | |||
196 | xfs_buf_flags_t flags); | 202 | xfs_buf_flags_t flags); |
197 | struct xfs_buf *xfs_buf_read_map(struct xfs_buftarg *target, | 203 | struct xfs_buf *xfs_buf_read_map(struct xfs_buftarg *target, |
198 | struct xfs_buf_map *map, int nmaps, | 204 | struct xfs_buf_map *map, int nmaps, |
199 | xfs_buf_flags_t flags); | 205 | xfs_buf_flags_t flags, |
206 | const struct xfs_buf_ops *ops); | ||
200 | void xfs_buf_readahead_map(struct xfs_buftarg *target, | 207 | void xfs_buf_readahead_map(struct xfs_buftarg *target, |
201 | struct xfs_buf_map *map, int nmaps); | 208 | struct xfs_buf_map *map, int nmaps, |
209 | const struct xfs_buf_ops *ops); | ||
202 | 210 | ||
203 | static inline struct xfs_buf * | 211 | static inline struct xfs_buf * |
204 | xfs_buf_get( | 212 | xfs_buf_get( |
@@ -216,20 +224,22 @@ xfs_buf_read( | |||
216 | struct xfs_buftarg *target, | 224 | struct xfs_buftarg *target, |
217 | xfs_daddr_t blkno, | 225 | xfs_daddr_t blkno, |
218 | size_t numblks, | 226 | size_t numblks, |
219 | xfs_buf_flags_t flags) | 227 | xfs_buf_flags_t flags, |
228 | const struct xfs_buf_ops *ops) | ||
220 | { | 229 | { |
221 | DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); | 230 | DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); |
222 | return xfs_buf_read_map(target, &map, 1, flags); | 231 | return xfs_buf_read_map(target, &map, 1, flags, ops); |
223 | } | 232 | } |
224 | 233 | ||
225 | static inline void | 234 | static inline void |
226 | xfs_buf_readahead( | 235 | xfs_buf_readahead( |
227 | struct xfs_buftarg *target, | 236 | struct xfs_buftarg *target, |
228 | xfs_daddr_t blkno, | 237 | xfs_daddr_t blkno, |
229 | size_t numblks) | 238 | size_t numblks, |
239 | const struct xfs_buf_ops *ops) | ||
230 | { | 240 | { |
231 | DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); | 241 | DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); |
232 | return xfs_buf_readahead_map(target, &map, 1); | 242 | return xfs_buf_readahead_map(target, &map, 1, ops); |
233 | } | 243 | } |
234 | 244 | ||
235 | struct xfs_buf *xfs_buf_get_empty(struct xfs_buftarg *target, size_t numblks); | 245 | struct xfs_buf *xfs_buf_get_empty(struct xfs_buftarg *target, size_t numblks); |
@@ -239,7 +249,8 @@ int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); | |||
239 | struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, | 249 | struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, |
240 | int flags); | 250 | int flags); |
241 | struct xfs_buf *xfs_buf_read_uncached(struct xfs_buftarg *target, | 251 | struct xfs_buf *xfs_buf_read_uncached(struct xfs_buftarg *target, |
242 | xfs_daddr_t daddr, size_t numblks, int flags); | 252 | xfs_daddr_t daddr, size_t numblks, int flags, |
253 | const struct xfs_buf_ops *ops); | ||
243 | void xfs_buf_hold(struct xfs_buf *bp); | 254 | void xfs_buf_hold(struct xfs_buf *bp); |
244 | 255 | ||
245 | /* Releasing Buffers */ | 256 | /* Releasing Buffers */ |
diff --git a/fs/xfs/xfs_cksum.h b/fs/xfs/xfs_cksum.h new file mode 100644 index 000000000000..fad1676ad8cd --- /dev/null +++ b/fs/xfs/xfs_cksum.h | |||
@@ -0,0 +1,63 @@ | |||
1 | #ifndef _XFS_CKSUM_H | ||
2 | #define _XFS_CKSUM_H 1 | ||
3 | |||
4 | #define XFS_CRC_SEED (~(__uint32_t)0) | ||
5 | |||
6 | /* | ||
7 | * Calculate the intermediate checksum for a buffer that has the CRC field | ||
8 | * inside it. The offset of the 32bit crc fields is passed as the | ||
9 | * cksum_offset parameter. | ||
10 | */ | ||
11 | static inline __uint32_t | ||
12 | xfs_start_cksum(char *buffer, size_t length, unsigned long cksum_offset) | ||
13 | { | ||
14 | __uint32_t zero = 0; | ||
15 | __uint32_t crc; | ||
16 | |||
17 | /* Calculate CRC up to the checksum. */ | ||
18 | crc = crc32c(XFS_CRC_SEED, buffer, cksum_offset); | ||
19 | |||
20 | /* Skip checksum field */ | ||
21 | crc = crc32c(crc, &zero, sizeof(__u32)); | ||
22 | |||
23 | /* Calculate the rest of the CRC. */ | ||
24 | return crc32c(crc, &buffer[cksum_offset + sizeof(__be32)], | ||
25 | length - (cksum_offset + sizeof(__be32))); | ||
26 | } | ||
27 | |||
28 | /* | ||
29 | * Convert the intermediate checksum to the final ondisk format. | ||
30 | * | ||
31 | * The CRC32c calculation uses LE format even on BE machines, but returns the | ||
32 | * result in host endian format. Hence we need to byte swap it back to LE format | ||
33 | * so that it is consistent on disk. | ||
34 | */ | ||
35 | static inline __le32 | ||
36 | xfs_end_cksum(__uint32_t crc) | ||
37 | { | ||
38 | return ~cpu_to_le32(crc); | ||
39 | } | ||
40 | |||
41 | /* | ||
42 | * Helper to generate the checksum for a buffer. | ||
43 | */ | ||
44 | static inline void | ||
45 | xfs_update_cksum(char *buffer, size_t length, unsigned long cksum_offset) | ||
46 | { | ||
47 | __uint32_t crc = xfs_start_cksum(buffer, length, cksum_offset); | ||
48 | |||
49 | *(__le32 *)(buffer + cksum_offset) = xfs_end_cksum(crc); | ||
50 | } | ||
51 | |||
52 | /* | ||
53 | * Helper to verify the checksum for a buffer. | ||
54 | */ | ||
55 | static inline int | ||
56 | xfs_verify_cksum(char *buffer, size_t length, unsigned long cksum_offset) | ||
57 | { | ||
58 | __uint32_t crc = xfs_start_cksum(buffer, length, cksum_offset); | ||
59 | |||
60 | return *(__le32 *)(buffer + cksum_offset) == xfs_end_cksum(crc); | ||
61 | } | ||
62 | |||
63 | #endif /* _XFS_CKSUM_H */ | ||
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 7bfb7dd334fc..4d7696a02418 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c | |||
@@ -91,6 +91,84 @@ STATIC int xfs_da_blk_unlink(xfs_da_state_t *state, | |||
91 | xfs_da_state_blk_t *save_blk); | 91 | xfs_da_state_blk_t *save_blk); |
92 | STATIC void xfs_da_state_kill_altpath(xfs_da_state_t *state); | 92 | STATIC void xfs_da_state_kill_altpath(xfs_da_state_t *state); |
93 | 93 | ||
94 | static void | ||
95 | xfs_da_node_verify( | ||
96 | struct xfs_buf *bp) | ||
97 | { | ||
98 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
99 | struct xfs_da_node_hdr *hdr = bp->b_addr; | ||
100 | int block_ok = 0; | ||
101 | |||
102 | block_ok = hdr->info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC); | ||
103 | block_ok = block_ok && | ||
104 | be16_to_cpu(hdr->level) > 0 && | ||
105 | be16_to_cpu(hdr->count) > 0 ; | ||
106 | if (!block_ok) { | ||
107 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); | ||
108 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
109 | } | ||
110 | |||
111 | } | ||
112 | |||
113 | static void | ||
114 | xfs_da_node_write_verify( | ||
115 | struct xfs_buf *bp) | ||
116 | { | ||
117 | xfs_da_node_verify(bp); | ||
118 | } | ||
119 | |||
120 | /* | ||
121 | * leaf/node format detection on trees is sketchy, so a node read can be done on | ||
122 | * leaf level blocks when detection identifies the tree as a node format tree | ||
123 | * incorrectly. In this case, we need to swap the verifier to match the correct | ||
124 | * format of the block being read. | ||
125 | */ | ||
126 | static void | ||
127 | xfs_da_node_read_verify( | ||
128 | struct xfs_buf *bp) | ||
129 | { | ||
130 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
131 | struct xfs_da_blkinfo *info = bp->b_addr; | ||
132 | |||
133 | switch (be16_to_cpu(info->magic)) { | ||
134 | case XFS_DA_NODE_MAGIC: | ||
135 | xfs_da_node_verify(bp); | ||
136 | break; | ||
137 | case XFS_ATTR_LEAF_MAGIC: | ||
138 | bp->b_ops = &xfs_attr_leaf_buf_ops; | ||
139 | bp->b_ops->verify_read(bp); | ||
140 | return; | ||
141 | case XFS_DIR2_LEAFN_MAGIC: | ||
142 | bp->b_ops = &xfs_dir2_leafn_buf_ops; | ||
143 | bp->b_ops->verify_read(bp); | ||
144 | return; | ||
145 | default: | ||
146 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
147 | mp, info); | ||
148 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
149 | break; | ||
150 | } | ||
151 | } | ||
152 | |||
153 | const struct xfs_buf_ops xfs_da_node_buf_ops = { | ||
154 | .verify_read = xfs_da_node_read_verify, | ||
155 | .verify_write = xfs_da_node_write_verify, | ||
156 | }; | ||
157 | |||
158 | |||
159 | int | ||
160 | xfs_da_node_read( | ||
161 | struct xfs_trans *tp, | ||
162 | struct xfs_inode *dp, | ||
163 | xfs_dablk_t bno, | ||
164 | xfs_daddr_t mappedbno, | ||
165 | struct xfs_buf **bpp, | ||
166 | int which_fork) | ||
167 | { | ||
168 | return xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, | ||
169 | which_fork, &xfs_da_node_buf_ops); | ||
170 | } | ||
171 | |||
94 | /*======================================================================== | 172 | /*======================================================================== |
95 | * Routines used for growing the Btree. | 173 | * Routines used for growing the Btree. |
96 | *========================================================================*/ | 174 | *========================================================================*/ |
@@ -125,6 +203,7 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level, | |||
125 | xfs_trans_log_buf(tp, bp, | 203 | xfs_trans_log_buf(tp, bp, |
126 | XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); | 204 | XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); |
127 | 205 | ||
206 | bp->b_ops = &xfs_da_node_buf_ops; | ||
128 | *bpp = bp; | 207 | *bpp = bp; |
129 | return(0); | 208 | return(0); |
130 | } | 209 | } |
@@ -324,6 +403,8 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, | |||
324 | } | 403 | } |
325 | memcpy(node, oldroot, size); | 404 | memcpy(node, oldroot, size); |
326 | xfs_trans_log_buf(tp, bp, 0, size - 1); | 405 | xfs_trans_log_buf(tp, bp, 0, size - 1); |
406 | |||
407 | bp->b_ops = blk1->bp->b_ops; | ||
327 | blk1->bp = bp; | 408 | blk1->bp = bp; |
328 | blk1->blkno = blkno; | 409 | blk1->blkno = blkno; |
329 | 410 | ||
@@ -746,7 +827,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) | |||
746 | */ | 827 | */ |
747 | child = be32_to_cpu(oldroot->btree[0].before); | 828 | child = be32_to_cpu(oldroot->btree[0].before); |
748 | ASSERT(child != 0); | 829 | ASSERT(child != 0); |
749 | error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp, | 830 | error = xfs_da_node_read(args->trans, args->dp, child, -1, &bp, |
750 | args->whichfork); | 831 | args->whichfork); |
751 | if (error) | 832 | if (error) |
752 | return(error); | 833 | return(error); |
@@ -754,7 +835,14 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) | |||
754 | xfs_da_blkinfo_onlychild_validate(bp->b_addr, | 835 | xfs_da_blkinfo_onlychild_validate(bp->b_addr, |
755 | be16_to_cpu(oldroot->hdr.level)); | 836 | be16_to_cpu(oldroot->hdr.level)); |
756 | 837 | ||
838 | /* | ||
839 | * This could be copying a leaf back into the root block in the case of | ||
840 | * there only being a single leaf block left in the tree. Hence we have | ||
841 | * to update the b_ops pointer as well to match the buffer type change | ||
842 | * that could occur. | ||
843 | */ | ||
757 | memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize); | 844 | memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize); |
845 | root_blk->bp->b_ops = bp->b_ops; | ||
758 | xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1); | 846 | xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1); |
759 | error = xfs_da_shrink_inode(args, child, bp); | 847 | error = xfs_da_shrink_inode(args, child, bp); |
760 | return(error); | 848 | return(error); |
@@ -779,6 +867,8 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) | |||
779 | xfs_dablk_t blkno; | 867 | xfs_dablk_t blkno; |
780 | struct xfs_buf *bp; | 868 | struct xfs_buf *bp; |
781 | 869 | ||
870 | trace_xfs_da_node_toosmall(state->args); | ||
871 | |||
782 | /* | 872 | /* |
783 | * Check for the degenerate case of the block being over 50% full. | 873 | * Check for the degenerate case of the block being over 50% full. |
784 | * If so, it's not worth even looking to see if we might be able | 874 | * If so, it's not worth even looking to see if we might be able |
@@ -835,7 +925,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action) | |||
835 | blkno = be32_to_cpu(info->back); | 925 | blkno = be32_to_cpu(info->back); |
836 | if (blkno == 0) | 926 | if (blkno == 0) |
837 | continue; | 927 | continue; |
838 | error = xfs_da_read_buf(state->args->trans, state->args->dp, | 928 | error = xfs_da_node_read(state->args->trans, state->args->dp, |
839 | blkno, -1, &bp, state->args->whichfork); | 929 | blkno, -1, &bp, state->args->whichfork); |
840 | if (error) | 930 | if (error) |
841 | return(error); | 931 | return(error); |
@@ -900,6 +990,8 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path) | |||
900 | xfs_dahash_t lasthash=0; | 990 | xfs_dahash_t lasthash=0; |
901 | int level, count; | 991 | int level, count; |
902 | 992 | ||
993 | trace_xfs_da_fixhashpath(state->args); | ||
994 | |||
903 | level = path->active-1; | 995 | level = path->active-1; |
904 | blk = &path->blk[ level ]; | 996 | blk = &path->blk[ level ]; |
905 | switch (blk->magic) { | 997 | switch (blk->magic) { |
@@ -1079,7 +1171,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result) | |||
1079 | * Read the next node down in the tree. | 1171 | * Read the next node down in the tree. |
1080 | */ | 1172 | */ |
1081 | blk->blkno = blkno; | 1173 | blk->blkno = blkno; |
1082 | error = xfs_da_read_buf(args->trans, args->dp, blkno, | 1174 | error = xfs_da_node_read(args->trans, args->dp, blkno, |
1083 | -1, &blk->bp, args->whichfork); | 1175 | -1, &blk->bp, args->whichfork); |
1084 | if (error) { | 1176 | if (error) { |
1085 | blk->blkno = 0; | 1177 | blk->blkno = 0; |
@@ -1241,7 +1333,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, | |||
1241 | new_info->forw = cpu_to_be32(old_blk->blkno); | 1333 | new_info->forw = cpu_to_be32(old_blk->blkno); |
1242 | new_info->back = old_info->back; | 1334 | new_info->back = old_info->back; |
1243 | if (old_info->back) { | 1335 | if (old_info->back) { |
1244 | error = xfs_da_read_buf(args->trans, args->dp, | 1336 | error = xfs_da_node_read(args->trans, args->dp, |
1245 | be32_to_cpu(old_info->back), | 1337 | be32_to_cpu(old_info->back), |
1246 | -1, &bp, args->whichfork); | 1338 | -1, &bp, args->whichfork); |
1247 | if (error) | 1339 | if (error) |
@@ -1262,7 +1354,7 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, | |||
1262 | new_info->forw = old_info->forw; | 1354 | new_info->forw = old_info->forw; |
1263 | new_info->back = cpu_to_be32(old_blk->blkno); | 1355 | new_info->back = cpu_to_be32(old_blk->blkno); |
1264 | if (old_info->forw) { | 1356 | if (old_info->forw) { |
1265 | error = xfs_da_read_buf(args->trans, args->dp, | 1357 | error = xfs_da_node_read(args->trans, args->dp, |
1266 | be32_to_cpu(old_info->forw), | 1358 | be32_to_cpu(old_info->forw), |
1267 | -1, &bp, args->whichfork); | 1359 | -1, &bp, args->whichfork); |
1268 | if (error) | 1360 | if (error) |
@@ -1362,7 +1454,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, | |||
1362 | trace_xfs_da_unlink_back(args); | 1454 | trace_xfs_da_unlink_back(args); |
1363 | save_info->back = drop_info->back; | 1455 | save_info->back = drop_info->back; |
1364 | if (drop_info->back) { | 1456 | if (drop_info->back) { |
1365 | error = xfs_da_read_buf(args->trans, args->dp, | 1457 | error = xfs_da_node_read(args->trans, args->dp, |
1366 | be32_to_cpu(drop_info->back), | 1458 | be32_to_cpu(drop_info->back), |
1367 | -1, &bp, args->whichfork); | 1459 | -1, &bp, args->whichfork); |
1368 | if (error) | 1460 | if (error) |
@@ -1379,7 +1471,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, | |||
1379 | trace_xfs_da_unlink_forward(args); | 1471 | trace_xfs_da_unlink_forward(args); |
1380 | save_info->forw = drop_info->forw; | 1472 | save_info->forw = drop_info->forw; |
1381 | if (drop_info->forw) { | 1473 | if (drop_info->forw) { |
1382 | error = xfs_da_read_buf(args->trans, args->dp, | 1474 | error = xfs_da_node_read(args->trans, args->dp, |
1383 | be32_to_cpu(drop_info->forw), | 1475 | be32_to_cpu(drop_info->forw), |
1384 | -1, &bp, args->whichfork); | 1476 | -1, &bp, args->whichfork); |
1385 | if (error) | 1477 | if (error) |
@@ -1417,6 +1509,8 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, | |||
1417 | xfs_dablk_t blkno=0; | 1509 | xfs_dablk_t blkno=0; |
1418 | int level, error; | 1510 | int level, error; |
1419 | 1511 | ||
1512 | trace_xfs_da_path_shift(state->args); | ||
1513 | |||
1420 | /* | 1514 | /* |
1421 | * Roll up the Btree looking for the first block where our | 1515 | * Roll up the Btree looking for the first block where our |
1422 | * current index is not at the edge of the block. Note that | 1516 | * current index is not at the edge of the block. Note that |
@@ -1463,8 +1557,8 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, | |||
1463 | * Read the next child block. | 1557 | * Read the next child block. |
1464 | */ | 1558 | */ |
1465 | blk->blkno = blkno; | 1559 | blk->blkno = blkno; |
1466 | error = xfs_da_read_buf(args->trans, args->dp, blkno, -1, | 1560 | error = xfs_da_node_read(args->trans, args->dp, blkno, -1, |
1467 | &blk->bp, args->whichfork); | 1561 | &blk->bp, args->whichfork); |
1468 | if (error) | 1562 | if (error) |
1469 | return(error); | 1563 | return(error); |
1470 | ASSERT(blk->bp != NULL); | 1564 | ASSERT(blk->bp != NULL); |
@@ -1727,7 +1821,8 @@ xfs_da_swap_lastblock( | |||
1727 | * Read the last block in the btree space. | 1821 | * Read the last block in the btree space. |
1728 | */ | 1822 | */ |
1729 | last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs; | 1823 | last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs; |
1730 | if ((error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w))) | 1824 | error = xfs_da_node_read(tp, ip, last_blkno, -1, &last_buf, w); |
1825 | if (error) | ||
1731 | return error; | 1826 | return error; |
1732 | /* | 1827 | /* |
1733 | * Copy the last block into the dead buffer and log it. | 1828 | * Copy the last block into the dead buffer and log it. |
@@ -1753,7 +1848,8 @@ xfs_da_swap_lastblock( | |||
1753 | * If the moved block has a left sibling, fix up the pointers. | 1848 | * If the moved block has a left sibling, fix up the pointers. |
1754 | */ | 1849 | */ |
1755 | if ((sib_blkno = be32_to_cpu(dead_info->back))) { | 1850 | if ((sib_blkno = be32_to_cpu(dead_info->back))) { |
1756 | if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w))) | 1851 | error = xfs_da_node_read(tp, ip, sib_blkno, -1, &sib_buf, w); |
1852 | if (error) | ||
1757 | goto done; | 1853 | goto done; |
1758 | sib_info = sib_buf->b_addr; | 1854 | sib_info = sib_buf->b_addr; |
1759 | if (unlikely( | 1855 | if (unlikely( |
@@ -1774,7 +1870,8 @@ xfs_da_swap_lastblock( | |||
1774 | * If the moved block has a right sibling, fix up the pointers. | 1870 | * If the moved block has a right sibling, fix up the pointers. |
1775 | */ | 1871 | */ |
1776 | if ((sib_blkno = be32_to_cpu(dead_info->forw))) { | 1872 | if ((sib_blkno = be32_to_cpu(dead_info->forw))) { |
1777 | if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w))) | 1873 | error = xfs_da_node_read(tp, ip, sib_blkno, -1, &sib_buf, w); |
1874 | if (error) | ||
1778 | goto done; | 1875 | goto done; |
1779 | sib_info = sib_buf->b_addr; | 1876 | sib_info = sib_buf->b_addr; |
1780 | if (unlikely( | 1877 | if (unlikely( |
@@ -1797,7 +1894,8 @@ xfs_da_swap_lastblock( | |||
1797 | * Walk down the tree looking for the parent of the moved block. | 1894 | * Walk down the tree looking for the parent of the moved block. |
1798 | */ | 1895 | */ |
1799 | for (;;) { | 1896 | for (;;) { |
1800 | if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w))) | 1897 | error = xfs_da_node_read(tp, ip, par_blkno, -1, &par_buf, w); |
1898 | if (error) | ||
1801 | goto done; | 1899 | goto done; |
1802 | par_node = par_buf->b_addr; | 1900 | par_node = par_buf->b_addr; |
1803 | if (unlikely(par_node->hdr.info.magic != | 1901 | if (unlikely(par_node->hdr.info.magic != |
@@ -1847,7 +1945,8 @@ xfs_da_swap_lastblock( | |||
1847 | error = XFS_ERROR(EFSCORRUPTED); | 1945 | error = XFS_ERROR(EFSCORRUPTED); |
1848 | goto done; | 1946 | goto done; |
1849 | } | 1947 | } |
1850 | if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w))) | 1948 | error = xfs_da_node_read(tp, ip, par_blkno, -1, &par_buf, w); |
1949 | if (error) | ||
1851 | goto done; | 1950 | goto done; |
1852 | par_node = par_buf->b_addr; | 1951 | par_node = par_buf->b_addr; |
1853 | if (unlikely( | 1952 | if (unlikely( |
@@ -2133,7 +2232,8 @@ xfs_da_read_buf( | |||
2133 | xfs_dablk_t bno, | 2232 | xfs_dablk_t bno, |
2134 | xfs_daddr_t mappedbno, | 2233 | xfs_daddr_t mappedbno, |
2135 | struct xfs_buf **bpp, | 2234 | struct xfs_buf **bpp, |
2136 | int whichfork) | 2235 | int whichfork, |
2236 | const struct xfs_buf_ops *ops) | ||
2137 | { | 2237 | { |
2138 | struct xfs_buf *bp; | 2238 | struct xfs_buf *bp; |
2139 | struct xfs_buf_map map; | 2239 | struct xfs_buf_map map; |
@@ -2155,7 +2255,7 @@ xfs_da_read_buf( | |||
2155 | 2255 | ||
2156 | error = xfs_trans_read_buf_map(dp->i_mount, trans, | 2256 | error = xfs_trans_read_buf_map(dp->i_mount, trans, |
2157 | dp->i_mount->m_ddev_targp, | 2257 | dp->i_mount->m_ddev_targp, |
2158 | mapp, nmap, 0, &bp); | 2258 | mapp, nmap, 0, &bp, ops); |
2159 | if (error) | 2259 | if (error) |
2160 | goto out_free; | 2260 | goto out_free; |
2161 | 2261 | ||
@@ -2211,9 +2311,10 @@ xfs_da_reada_buf( | |||
2211 | struct xfs_trans *trans, | 2311 | struct xfs_trans *trans, |
2212 | struct xfs_inode *dp, | 2312 | struct xfs_inode *dp, |
2213 | xfs_dablk_t bno, | 2313 | xfs_dablk_t bno, |
2214 | int whichfork) | 2314 | xfs_daddr_t mappedbno, |
2315 | int whichfork, | ||
2316 | const struct xfs_buf_ops *ops) | ||
2215 | { | 2317 | { |
2216 | xfs_daddr_t mappedbno = -1; | ||
2217 | struct xfs_buf_map map; | 2318 | struct xfs_buf_map map; |
2218 | struct xfs_buf_map *mapp; | 2319 | struct xfs_buf_map *mapp; |
2219 | int nmap; | 2320 | int nmap; |
@@ -2221,7 +2322,7 @@ xfs_da_reada_buf( | |||
2221 | 2322 | ||
2222 | mapp = ↦ | 2323 | mapp = ↦ |
2223 | nmap = 1; | 2324 | nmap = 1; |
2224 | error = xfs_dabuf_map(trans, dp, bno, -1, whichfork, | 2325 | error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, |
2225 | &mapp, &nmap); | 2326 | &mapp, &nmap); |
2226 | if (error) { | 2327 | if (error) { |
2227 | /* mapping a hole is not an error, but we don't continue */ | 2328 | /* mapping a hole is not an error, but we don't continue */ |
@@ -2231,7 +2332,7 @@ xfs_da_reada_buf( | |||
2231 | } | 2332 | } |
2232 | 2333 | ||
2233 | mappedbno = mapp[0].bm_bn; | 2334 | mappedbno = mapp[0].bm_bn; |
2234 | xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap); | 2335 | xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops); |
2235 | 2336 | ||
2236 | out_free: | 2337 | out_free: |
2237 | if (mapp != &map) | 2338 | if (mapp != &map) |
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 132adafb041e..ee5170c46ae1 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h | |||
@@ -18,7 +18,6 @@ | |||
18 | #ifndef __XFS_DA_BTREE_H__ | 18 | #ifndef __XFS_DA_BTREE_H__ |
19 | #define __XFS_DA_BTREE_H__ | 19 | #define __XFS_DA_BTREE_H__ |
20 | 20 | ||
21 | struct xfs_buf; | ||
22 | struct xfs_bmap_free; | 21 | struct xfs_bmap_free; |
23 | struct xfs_inode; | 22 | struct xfs_inode; |
24 | struct xfs_mount; | 23 | struct xfs_mount; |
@@ -214,6 +213,9 @@ int xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, | |||
214 | */ | 213 | */ |
215 | int xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, | 214 | int xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk, |
216 | xfs_da_state_blk_t *new_blk); | 215 | xfs_da_state_blk_t *new_blk); |
216 | int xfs_da_node_read(struct xfs_trans *tp, struct xfs_inode *dp, | ||
217 | xfs_dablk_t bno, xfs_daddr_t mappedbno, | ||
218 | struct xfs_buf **bpp, int which_fork); | ||
217 | 219 | ||
218 | /* | 220 | /* |
219 | * Utility routines. | 221 | * Utility routines. |
@@ -226,9 +228,11 @@ int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp, | |||
226 | struct xfs_buf **bp, int whichfork); | 228 | struct xfs_buf **bp, int whichfork); |
227 | int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp, | 229 | int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp, |
228 | xfs_dablk_t bno, xfs_daddr_t mappedbno, | 230 | xfs_dablk_t bno, xfs_daddr_t mappedbno, |
229 | struct xfs_buf **bpp, int whichfork); | 231 | struct xfs_buf **bpp, int whichfork, |
232 | const struct xfs_buf_ops *ops); | ||
230 | xfs_daddr_t xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp, | 233 | xfs_daddr_t xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp, |
231 | xfs_dablk_t bno, int whichfork); | 234 | xfs_dablk_t bno, xfs_daddr_t mapped_bno, |
235 | int whichfork, const struct xfs_buf_ops *ops); | ||
232 | int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, | 236 | int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, |
233 | struct xfs_buf *dead_buf); | 237 | struct xfs_buf *dead_buf); |
234 | 238 | ||
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index b9b8646e62db..d0e9c74d3d96 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -246,12 +246,10 @@ xfs_swap_extents( | |||
246 | goto out_unlock; | 246 | goto out_unlock; |
247 | } | 247 | } |
248 | 248 | ||
249 | if (VN_CACHED(VFS_I(tip)) != 0) { | 249 | error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); |
250 | error = xfs_flushinval_pages(tip, 0, -1, | 250 | if (error) |
251 | FI_REMAPF_LOCKED); | 251 | goto out_unlock; |
252 | if (error) | 252 | truncate_pagecache_range(VFS_I(ip), 0, -1); |
253 | goto out_unlock; | ||
254 | } | ||
255 | 253 | ||
256 | /* Verify O_DIRECT for ftmp */ | 254 | /* Verify O_DIRECT for ftmp */ |
257 | if (VN_CACHED(VFS_I(tip)) != 0) { | 255 | if (VN_CACHED(VFS_I(tip)) != 0) { |
@@ -315,8 +313,7 @@ xfs_swap_extents( | |||
315 | * are safe. We don't really care if non-io related | 313 | * are safe. We don't really care if non-io related |
316 | * fields change. | 314 | * fields change. |
317 | */ | 315 | */ |
318 | 316 | truncate_pagecache_range(VFS_I(ip), 0, -1); | |
319 | xfs_tosspages(ip, 0, -1, FI_REMAPF); | ||
320 | 317 | ||
321 | tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); | 318 | tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); |
322 | if ((error = xfs_trans_reserve(tp, 0, | 319 | if ((error = xfs_trans_reserve(tp, 0, |
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index e93ca8f054f4..7536faaa61e7 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c | |||
@@ -56,6 +56,214 @@ xfs_dir_startup(void) | |||
56 | xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); | 56 | xfs_dir_hash_dotdot = xfs_da_hashname((unsigned char *)"..", 2); |
57 | } | 57 | } |
58 | 58 | ||
59 | static void | ||
60 | xfs_dir2_block_verify( | ||
61 | struct xfs_buf *bp) | ||
62 | { | ||
63 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
64 | struct xfs_dir2_data_hdr *hdr = bp->b_addr; | ||
65 | int block_ok = 0; | ||
66 | |||
67 | block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); | ||
68 | block_ok = block_ok && __xfs_dir2_data_check(NULL, bp) == 0; | ||
69 | |||
70 | if (!block_ok) { | ||
71 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); | ||
72 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
73 | } | ||
74 | } | ||
75 | |||
76 | static void | ||
77 | xfs_dir2_block_read_verify( | ||
78 | struct xfs_buf *bp) | ||
79 | { | ||
80 | xfs_dir2_block_verify(bp); | ||
81 | } | ||
82 | |||
83 | static void | ||
84 | xfs_dir2_block_write_verify( | ||
85 | struct xfs_buf *bp) | ||
86 | { | ||
87 | xfs_dir2_block_verify(bp); | ||
88 | } | ||
89 | |||
90 | const struct xfs_buf_ops xfs_dir2_block_buf_ops = { | ||
91 | .verify_read = xfs_dir2_block_read_verify, | ||
92 | .verify_write = xfs_dir2_block_write_verify, | ||
93 | }; | ||
94 | |||
95 | static int | ||
96 | xfs_dir2_block_read( | ||
97 | struct xfs_trans *tp, | ||
98 | struct xfs_inode *dp, | ||
99 | struct xfs_buf **bpp) | ||
100 | { | ||
101 | struct xfs_mount *mp = dp->i_mount; | ||
102 | |||
103 | return xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, bpp, | ||
104 | XFS_DATA_FORK, &xfs_dir2_block_buf_ops); | ||
105 | } | ||
106 | |||
107 | static void | ||
108 | xfs_dir2_block_need_space( | ||
109 | struct xfs_dir2_data_hdr *hdr, | ||
110 | struct xfs_dir2_block_tail *btp, | ||
111 | struct xfs_dir2_leaf_entry *blp, | ||
112 | __be16 **tagpp, | ||
113 | struct xfs_dir2_data_unused **dupp, | ||
114 | struct xfs_dir2_data_unused **enddupp, | ||
115 | int *compact, | ||
116 | int len) | ||
117 | { | ||
118 | struct xfs_dir2_data_free *bf; | ||
119 | __be16 *tagp = NULL; | ||
120 | struct xfs_dir2_data_unused *dup = NULL; | ||
121 | struct xfs_dir2_data_unused *enddup = NULL; | ||
122 | |||
123 | *compact = 0; | ||
124 | bf = hdr->bestfree; | ||
125 | |||
126 | /* | ||
127 | * If there are stale entries we'll use one for the leaf. | ||
128 | */ | ||
129 | if (btp->stale) { | ||
130 | if (be16_to_cpu(bf[0].length) >= len) { | ||
131 | /* | ||
132 | * The biggest entry enough to avoid compaction. | ||
133 | */ | ||
134 | dup = (xfs_dir2_data_unused_t *) | ||
135 | ((char *)hdr + be16_to_cpu(bf[0].offset)); | ||
136 | goto out; | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * Will need to compact to make this work. | ||
141 | * Tag just before the first leaf entry. | ||
142 | */ | ||
143 | *compact = 1; | ||
144 | tagp = (__be16 *)blp - 1; | ||
145 | |||
146 | /* Data object just before the first leaf entry. */ | ||
147 | dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); | ||
148 | |||
149 | /* | ||
150 | * If it's not free then the data will go where the | ||
151 | * leaf data starts now, if it works at all. | ||
152 | */ | ||
153 | if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { | ||
154 | if (be16_to_cpu(dup->length) + (be32_to_cpu(btp->stale) - 1) * | ||
155 | (uint)sizeof(*blp) < len) | ||
156 | dup = NULL; | ||
157 | } else if ((be32_to_cpu(btp->stale) - 1) * (uint)sizeof(*blp) < len) | ||
158 | dup = NULL; | ||
159 | else | ||
160 | dup = (xfs_dir2_data_unused_t *)blp; | ||
161 | goto out; | ||
162 | } | ||
163 | |||
164 | /* | ||
165 | * no stale entries, so just use free space. | ||
166 | * Tag just before the first leaf entry. | ||
167 | */ | ||
168 | tagp = (__be16 *)blp - 1; | ||
169 | |||
170 | /* Data object just before the first leaf entry. */ | ||
171 | enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); | ||
172 | |||
173 | /* | ||
174 | * If it's not free then can't do this add without cleaning up: | ||
175 | * the space before the first leaf entry needs to be free so it | ||
176 | * can be expanded to hold the pointer to the new entry. | ||
177 | */ | ||
178 | if (be16_to_cpu(enddup->freetag) == XFS_DIR2_DATA_FREE_TAG) { | ||
179 | /* | ||
180 | * Check out the biggest freespace and see if it's the same one. | ||
181 | */ | ||
182 | dup = (xfs_dir2_data_unused_t *) | ||
183 | ((char *)hdr + be16_to_cpu(bf[0].offset)); | ||
184 | if (dup != enddup) { | ||
185 | /* | ||
186 | * Not the same free entry, just check its length. | ||
187 | */ | ||
188 | if (be16_to_cpu(dup->length) < len) | ||
189 | dup = NULL; | ||
190 | goto out; | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * It is the biggest freespace, can it hold the leaf too? | ||
195 | */ | ||
196 | if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) { | ||
197 | /* | ||
198 | * Yes, use the second-largest entry instead if it works. | ||
199 | */ | ||
200 | if (be16_to_cpu(bf[1].length) >= len) | ||
201 | dup = (xfs_dir2_data_unused_t *) | ||
202 | ((char *)hdr + be16_to_cpu(bf[1].offset)); | ||
203 | else | ||
204 | dup = NULL; | ||
205 | } | ||
206 | } | ||
207 | out: | ||
208 | *tagpp = tagp; | ||
209 | *dupp = dup; | ||
210 | *enddupp = enddup; | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | * compact the leaf entries. | ||
215 | * Leave the highest-numbered stale entry stale. | ||
216 | * XXX should be the one closest to mid but mid is not yet computed. | ||
217 | */ | ||
218 | static void | ||
219 | xfs_dir2_block_compact( | ||
220 | struct xfs_trans *tp, | ||
221 | struct xfs_buf *bp, | ||
222 | struct xfs_dir2_data_hdr *hdr, | ||
223 | struct xfs_dir2_block_tail *btp, | ||
224 | struct xfs_dir2_leaf_entry *blp, | ||
225 | int *needlog, | ||
226 | int *lfloghigh, | ||
227 | int *lfloglow) | ||
228 | { | ||
229 | int fromidx; /* source leaf index */ | ||
230 | int toidx; /* target leaf index */ | ||
231 | int needscan = 0; | ||
232 | int highstale; /* high stale index */ | ||
233 | |||
234 | fromidx = toidx = be32_to_cpu(btp->count) - 1; | ||
235 | highstale = *lfloghigh = -1; | ||
236 | for (; fromidx >= 0; fromidx--) { | ||
237 | if (blp[fromidx].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) { | ||
238 | if (highstale == -1) | ||
239 | highstale = toidx; | ||
240 | else { | ||
241 | if (*lfloghigh == -1) | ||
242 | *lfloghigh = toidx; | ||
243 | continue; | ||
244 | } | ||
245 | } | ||
246 | if (fromidx < toidx) | ||
247 | blp[toidx] = blp[fromidx]; | ||
248 | toidx--; | ||
249 | } | ||
250 | *lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1); | ||
251 | *lfloghigh -= be32_to_cpu(btp->stale) - 1; | ||
252 | be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); | ||
253 | xfs_dir2_data_make_free(tp, bp, | ||
254 | (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), | ||
255 | (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), | ||
256 | needlog, &needscan); | ||
257 | blp += be32_to_cpu(btp->stale) - 1; | ||
258 | btp->stale = cpu_to_be32(1); | ||
259 | /* | ||
260 | * If we now need to rebuild the bestfree map, do so. | ||
261 | * This needs to happen before the next call to use_free. | ||
262 | */ | ||
263 | if (needscan) | ||
264 | xfs_dir2_data_freescan(tp->t_mountp, hdr, needlog); | ||
265 | } | ||
266 | |||
59 | /* | 267 | /* |
60 | * Add an entry to a block directory. | 268 | * Add an entry to a block directory. |
61 | */ | 269 | */ |
@@ -63,7 +271,6 @@ int /* error */ | |||
63 | xfs_dir2_block_addname( | 271 | xfs_dir2_block_addname( |
64 | xfs_da_args_t *args) /* directory op arguments */ | 272 | xfs_da_args_t *args) /* directory op arguments */ |
65 | { | 273 | { |
66 | xfs_dir2_data_free_t *bf; /* bestfree table in block */ | ||
67 | xfs_dir2_data_hdr_t *hdr; /* block header */ | 274 | xfs_dir2_data_hdr_t *hdr; /* block header */ |
68 | xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ | 275 | xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ |
69 | struct xfs_buf *bp; /* buffer for block */ | 276 | struct xfs_buf *bp; /* buffer for block */ |
@@ -94,134 +301,44 @@ xfs_dir2_block_addname( | |||
94 | dp = args->dp; | 301 | dp = args->dp; |
95 | tp = args->trans; | 302 | tp = args->trans; |
96 | mp = dp->i_mount; | 303 | mp = dp->i_mount; |
97 | /* | 304 | |
98 | * Read the (one and only) directory block into dabuf bp. | 305 | /* Read the (one and only) directory block into bp. */ |
99 | */ | 306 | error = xfs_dir2_block_read(tp, dp, &bp); |
100 | if ((error = | 307 | if (error) |
101 | xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &bp, XFS_DATA_FORK))) { | ||
102 | return error; | 308 | return error; |
103 | } | 309 | |
104 | ASSERT(bp != NULL); | ||
105 | hdr = bp->b_addr; | ||
106 | /* | ||
107 | * Check the magic number, corrupted if wrong. | ||
108 | */ | ||
109 | if (unlikely(hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))) { | ||
110 | XFS_CORRUPTION_ERROR("xfs_dir2_block_addname", | ||
111 | XFS_ERRLEVEL_LOW, mp, hdr); | ||
112 | xfs_trans_brelse(tp, bp); | ||
113 | return XFS_ERROR(EFSCORRUPTED); | ||
114 | } | ||
115 | len = xfs_dir2_data_entsize(args->namelen); | 310 | len = xfs_dir2_data_entsize(args->namelen); |
311 | |||
116 | /* | 312 | /* |
117 | * Set up pointers to parts of the block. | 313 | * Set up pointers to parts of the block. |
118 | */ | 314 | */ |
119 | bf = hdr->bestfree; | 315 | hdr = bp->b_addr; |
120 | btp = xfs_dir2_block_tail_p(mp, hdr); | 316 | btp = xfs_dir2_block_tail_p(mp, hdr); |
121 | blp = xfs_dir2_block_leaf_p(btp); | 317 | blp = xfs_dir2_block_leaf_p(btp); |
318 | |||
122 | /* | 319 | /* |
123 | * No stale entries? Need space for entry and new leaf. | 320 | * Find out if we can reuse stale entries or whether we need extra |
124 | */ | 321 | * space for entry and new leaf. |
125 | if (!btp->stale) { | ||
126 | /* | ||
127 | * Tag just before the first leaf entry. | ||
128 | */ | ||
129 | tagp = (__be16 *)blp - 1; | ||
130 | /* | ||
131 | * Data object just before the first leaf entry. | ||
132 | */ | ||
133 | enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); | ||
134 | /* | ||
135 | * If it's not free then can't do this add without cleaning up: | ||
136 | * the space before the first leaf entry needs to be free so it | ||
137 | * can be expanded to hold the pointer to the new entry. | ||
138 | */ | ||
139 | if (be16_to_cpu(enddup->freetag) != XFS_DIR2_DATA_FREE_TAG) | ||
140 | dup = enddup = NULL; | ||
141 | /* | ||
142 | * Check out the biggest freespace and see if it's the same one. | ||
143 | */ | ||
144 | else { | ||
145 | dup = (xfs_dir2_data_unused_t *) | ||
146 | ((char *)hdr + be16_to_cpu(bf[0].offset)); | ||
147 | if (dup == enddup) { | ||
148 | /* | ||
149 | * It is the biggest freespace, is it too small | ||
150 | * to hold the new leaf too? | ||
151 | */ | ||
152 | if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) { | ||
153 | /* | ||
154 | * Yes, we use the second-largest | ||
155 | * entry instead if it works. | ||
156 | */ | ||
157 | if (be16_to_cpu(bf[1].length) >= len) | ||
158 | dup = (xfs_dir2_data_unused_t *) | ||
159 | ((char *)hdr + | ||
160 | be16_to_cpu(bf[1].offset)); | ||
161 | else | ||
162 | dup = NULL; | ||
163 | } | ||
164 | } else { | ||
165 | /* | ||
166 | * Not the same free entry, | ||
167 | * just check its length. | ||
168 | */ | ||
169 | if (be16_to_cpu(dup->length) < len) { | ||
170 | dup = NULL; | ||
171 | } | ||
172 | } | ||
173 | } | ||
174 | compact = 0; | ||
175 | } | ||
176 | /* | ||
177 | * If there are stale entries we'll use one for the leaf. | ||
178 | * Is the biggest entry enough to avoid compaction? | ||
179 | */ | ||
180 | else if (be16_to_cpu(bf[0].length) >= len) { | ||
181 | dup = (xfs_dir2_data_unused_t *) | ||
182 | ((char *)hdr + be16_to_cpu(bf[0].offset)); | ||
183 | compact = 0; | ||
184 | } | ||
185 | /* | ||
186 | * Will need to compact to make this work. | ||
187 | */ | 322 | */ |
188 | else { | 323 | xfs_dir2_block_need_space(hdr, btp, blp, &tagp, &dup, |
189 | /* | 324 | &enddup, &compact, len); |
190 | * Tag just before the first leaf entry. | 325 | |
191 | */ | ||
192 | tagp = (__be16 *)blp - 1; | ||
193 | /* | ||
194 | * Data object just before the first leaf entry. | ||
195 | */ | ||
196 | dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp)); | ||
197 | /* | ||
198 | * If it's not free then the data will go where the | ||
199 | * leaf data starts now, if it works at all. | ||
200 | */ | ||
201 | if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { | ||
202 | if (be16_to_cpu(dup->length) + (be32_to_cpu(btp->stale) - 1) * | ||
203 | (uint)sizeof(*blp) < len) | ||
204 | dup = NULL; | ||
205 | } else if ((be32_to_cpu(btp->stale) - 1) * (uint)sizeof(*blp) < len) | ||
206 | dup = NULL; | ||
207 | else | ||
208 | dup = (xfs_dir2_data_unused_t *)blp; | ||
209 | compact = 1; | ||
210 | } | ||
211 | /* | 326 | /* |
212 | * If this isn't a real add, we're done with the buffer. | 327 | * Done everything we need for a space check now. |
213 | */ | 328 | */ |
214 | if (args->op_flags & XFS_DA_OP_JUSTCHECK) | 329 | if (args->op_flags & XFS_DA_OP_JUSTCHECK) { |
215 | xfs_trans_brelse(tp, bp); | 330 | xfs_trans_brelse(tp, bp); |
331 | if (!dup) | ||
332 | return XFS_ERROR(ENOSPC); | ||
333 | return 0; | ||
334 | } | ||
335 | |||
216 | /* | 336 | /* |
217 | * If we don't have space for the new entry & leaf ... | 337 | * If we don't have space for the new entry & leaf ... |
218 | */ | 338 | */ |
219 | if (!dup) { | 339 | if (!dup) { |
220 | /* | 340 | /* Don't have a space reservation: return no-space. */ |
221 | * Not trying to actually do anything, or don't have | 341 | if (args->total == 0) |
222 | * a space reservation: return no-space. | ||
223 | */ | ||
224 | if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) | ||
225 | return XFS_ERROR(ENOSPC); | 342 | return XFS_ERROR(ENOSPC); |
226 | /* | 343 | /* |
227 | * Convert to the next larger format. | 344 | * Convert to the next larger format. |
@@ -232,65 +349,24 @@ xfs_dir2_block_addname( | |||
232 | return error; | 349 | return error; |
233 | return xfs_dir2_leaf_addname(args); | 350 | return xfs_dir2_leaf_addname(args); |
234 | } | 351 | } |
235 | /* | 352 | |
236 | * Just checking, and it would work, so say so. | ||
237 | */ | ||
238 | if (args->op_flags & XFS_DA_OP_JUSTCHECK) | ||
239 | return 0; | ||
240 | needlog = needscan = 0; | 353 | needlog = needscan = 0; |
354 | |||
241 | /* | 355 | /* |
242 | * If need to compact the leaf entries, do it now. | 356 | * If need to compact the leaf entries, do it now. |
243 | * Leave the highest-numbered stale entry stale. | ||
244 | * XXX should be the one closest to mid but mid is not yet computed. | ||
245 | */ | ||
246 | if (compact) { | ||
247 | int fromidx; /* source leaf index */ | ||
248 | int toidx; /* target leaf index */ | ||
249 | |||
250 | for (fromidx = toidx = be32_to_cpu(btp->count) - 1, | ||
251 | highstale = lfloghigh = -1; | ||
252 | fromidx >= 0; | ||
253 | fromidx--) { | ||
254 | if (blp[fromidx].address == | ||
255 | cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) { | ||
256 | if (highstale == -1) | ||
257 | highstale = toidx; | ||
258 | else { | ||
259 | if (lfloghigh == -1) | ||
260 | lfloghigh = toidx; | ||
261 | continue; | ||
262 | } | ||
263 | } | ||
264 | if (fromidx < toidx) | ||
265 | blp[toidx] = blp[fromidx]; | ||
266 | toidx--; | ||
267 | } | ||
268 | lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1); | ||
269 | lfloghigh -= be32_to_cpu(btp->stale) - 1; | ||
270 | be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); | ||
271 | xfs_dir2_data_make_free(tp, bp, | ||
272 | (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), | ||
273 | (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), | ||
274 | &needlog, &needscan); | ||
275 | blp += be32_to_cpu(btp->stale) - 1; | ||
276 | btp->stale = cpu_to_be32(1); | ||
277 | /* | ||
278 | * If we now need to rebuild the bestfree map, do so. | ||
279 | * This needs to happen before the next call to use_free. | ||
280 | */ | ||
281 | if (needscan) { | ||
282 | xfs_dir2_data_freescan(mp, hdr, &needlog); | ||
283 | needscan = 0; | ||
284 | } | ||
285 | } | ||
286 | /* | ||
287 | * Set leaf logging boundaries to impossible state. | ||
288 | * For the no-stale case they're set explicitly. | ||
289 | */ | 357 | */ |
358 | if (compact) | ||
359 | xfs_dir2_block_compact(tp, bp, hdr, btp, blp, &needlog, | ||
360 | &lfloghigh, &lfloglow); | ||
290 | else if (btp->stale) { | 361 | else if (btp->stale) { |
362 | /* | ||
363 | * Set leaf logging boundaries to impossible state. | ||
364 | * For the no-stale case they're set explicitly. | ||
365 | */ | ||
291 | lfloglow = be32_to_cpu(btp->count); | 366 | lfloglow = be32_to_cpu(btp->count); |
292 | lfloghigh = -1; | 367 | lfloghigh = -1; |
293 | } | 368 | } |
369 | |||
294 | /* | 370 | /* |
295 | * Find the slot that's first lower than our hash value, -1 if none. | 371 | * Find the slot that's first lower than our hash value, -1 if none. |
296 | */ | 372 | */ |
@@ -450,18 +526,13 @@ xfs_dir2_block_getdents( | |||
450 | /* | 526 | /* |
451 | * If the block number in the offset is out of range, we're done. | 527 | * If the block number in the offset is out of range, we're done. |
452 | */ | 528 | */ |
453 | if (xfs_dir2_dataptr_to_db(mp, *offset) > mp->m_dirdatablk) { | 529 | if (xfs_dir2_dataptr_to_db(mp, *offset) > mp->m_dirdatablk) |
454 | return 0; | 530 | return 0; |
455 | } | 531 | |
456 | /* | 532 | error = xfs_dir2_block_read(NULL, dp, &bp); |
457 | * Can't read the block, give up, else get dabuf in bp. | ||
458 | */ | ||
459 | error = xfs_da_read_buf(NULL, dp, mp->m_dirdatablk, -1, | ||
460 | &bp, XFS_DATA_FORK); | ||
461 | if (error) | 533 | if (error) |
462 | return error; | 534 | return error; |
463 | 535 | ||
464 | ASSERT(bp != NULL); | ||
465 | /* | 536 | /* |
466 | * Extract the byte offset we start at from the seek pointer. | 537 | * Extract the byte offset we start at from the seek pointer. |
467 | * We'll skip entries before this. | 538 | * We'll skip entries before this. |
@@ -637,14 +708,11 @@ xfs_dir2_block_lookup_int( | |||
637 | dp = args->dp; | 708 | dp = args->dp; |
638 | tp = args->trans; | 709 | tp = args->trans; |
639 | mp = dp->i_mount; | 710 | mp = dp->i_mount; |
640 | /* | 711 | |
641 | * Read the buffer, return error if we can't get it. | 712 | error = xfs_dir2_block_read(tp, dp, &bp); |
642 | */ | 713 | if (error) |
643 | if ((error = | ||
644 | xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &bp, XFS_DATA_FORK))) { | ||
645 | return error; | 714 | return error; |
646 | } | 715 | |
647 | ASSERT(bp != NULL); | ||
648 | hdr = bp->b_addr; | 716 | hdr = bp->b_addr; |
649 | xfs_dir2_data_check(dp, bp); | 717 | xfs_dir2_data_check(dp, bp); |
650 | btp = xfs_dir2_block_tail_p(mp, hdr); | 718 | btp = xfs_dir2_block_tail_p(mp, hdr); |
@@ -917,10 +985,10 @@ xfs_dir2_leaf_to_block( | |||
917 | /* | 985 | /* |
918 | * Read the data block if we don't already have it, give up if it fails. | 986 | * Read the data block if we don't already have it, give up if it fails. |
919 | */ | 987 | */ |
920 | if (dbp == NULL && | 988 | if (!dbp) { |
921 | (error = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &dbp, | 989 | error = xfs_dir2_data_read(tp, dp, mp->m_dirdatablk, -1, &dbp); |
922 | XFS_DATA_FORK))) { | 990 | if (error) |
923 | return error; | 991 | return error; |
924 | } | 992 | } |
925 | hdr = dbp->b_addr; | 993 | hdr = dbp->b_addr; |
926 | ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); | 994 | ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); |
@@ -944,6 +1012,7 @@ xfs_dir2_leaf_to_block( | |||
944 | /* | 1012 | /* |
945 | * Start converting it to block form. | 1013 | * Start converting it to block form. |
946 | */ | 1014 | */ |
1015 | dbp->b_ops = &xfs_dir2_block_buf_ops; | ||
947 | hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); | 1016 | hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); |
948 | needlog = 1; | 1017 | needlog = 1; |
949 | needscan = 0; | 1018 | needscan = 0; |
@@ -1073,6 +1142,7 @@ xfs_dir2_sf_to_block( | |||
1073 | kmem_free(sfp); | 1142 | kmem_free(sfp); |
1074 | return error; | 1143 | return error; |
1075 | } | 1144 | } |
1145 | bp->b_ops = &xfs_dir2_block_buf_ops; | ||
1076 | hdr = bp->b_addr; | 1146 | hdr = bp->b_addr; |
1077 | hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); | 1147 | hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); |
1078 | /* | 1148 | /* |
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index 44ffd4d6bc91..ffcf1774152e 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c | |||
@@ -34,14 +34,13 @@ | |||
34 | STATIC xfs_dir2_data_free_t * | 34 | STATIC xfs_dir2_data_free_t * |
35 | xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup); | 35 | xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup); |
36 | 36 | ||
37 | #ifdef DEBUG | ||
38 | /* | 37 | /* |
39 | * Check the consistency of the data block. | 38 | * Check the consistency of the data block. |
40 | * The input can also be a block-format directory. | 39 | * The input can also be a block-format directory. |
41 | * Pop an assert if we find anything bad. | 40 | * Return 0 is the buffer is good, otherwise an error. |
42 | */ | 41 | */ |
43 | void | 42 | int |
44 | xfs_dir2_data_check( | 43 | __xfs_dir2_data_check( |
45 | struct xfs_inode *dp, /* incore inode pointer */ | 44 | struct xfs_inode *dp, /* incore inode pointer */ |
46 | struct xfs_buf *bp) /* data block's buffer */ | 45 | struct xfs_buf *bp) /* data block's buffer */ |
47 | { | 46 | { |
@@ -64,18 +63,23 @@ xfs_dir2_data_check( | |||
64 | int stale; /* count of stale leaves */ | 63 | int stale; /* count of stale leaves */ |
65 | struct xfs_name name; | 64 | struct xfs_name name; |
66 | 65 | ||
67 | mp = dp->i_mount; | 66 | mp = bp->b_target->bt_mount; |
68 | hdr = bp->b_addr; | 67 | hdr = bp->b_addr; |
69 | bf = hdr->bestfree; | 68 | bf = hdr->bestfree; |
70 | p = (char *)(hdr + 1); | 69 | p = (char *)(hdr + 1); |
71 | 70 | ||
72 | if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { | 71 | switch (hdr->magic) { |
72 | case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): | ||
73 | btp = xfs_dir2_block_tail_p(mp, hdr); | 73 | btp = xfs_dir2_block_tail_p(mp, hdr); |
74 | lep = xfs_dir2_block_leaf_p(btp); | 74 | lep = xfs_dir2_block_leaf_p(btp); |
75 | endp = (char *)lep; | 75 | endp = (char *)lep; |
76 | } else { | 76 | break; |
77 | ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC)); | 77 | case cpu_to_be32(XFS_DIR2_DATA_MAGIC): |
78 | endp = (char *)hdr + mp->m_dirblksize; | 78 | endp = (char *)hdr + mp->m_dirblksize; |
79 | break; | ||
80 | default: | ||
81 | XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp); | ||
82 | return EFSCORRUPTED; | ||
79 | } | 83 | } |
80 | 84 | ||
81 | count = lastfree = freeseen = 0; | 85 | count = lastfree = freeseen = 0; |
@@ -83,19 +87,22 @@ xfs_dir2_data_check( | |||
83 | * Account for zero bestfree entries. | 87 | * Account for zero bestfree entries. |
84 | */ | 88 | */ |
85 | if (!bf[0].length) { | 89 | if (!bf[0].length) { |
86 | ASSERT(!bf[0].offset); | 90 | XFS_WANT_CORRUPTED_RETURN(!bf[0].offset); |
87 | freeseen |= 1 << 0; | 91 | freeseen |= 1 << 0; |
88 | } | 92 | } |
89 | if (!bf[1].length) { | 93 | if (!bf[1].length) { |
90 | ASSERT(!bf[1].offset); | 94 | XFS_WANT_CORRUPTED_RETURN(!bf[1].offset); |
91 | freeseen |= 1 << 1; | 95 | freeseen |= 1 << 1; |
92 | } | 96 | } |
93 | if (!bf[2].length) { | 97 | if (!bf[2].length) { |
94 | ASSERT(!bf[2].offset); | 98 | XFS_WANT_CORRUPTED_RETURN(!bf[2].offset); |
95 | freeseen |= 1 << 2; | 99 | freeseen |= 1 << 2; |
96 | } | 100 | } |
97 | ASSERT(be16_to_cpu(bf[0].length) >= be16_to_cpu(bf[1].length)); | 101 | |
98 | ASSERT(be16_to_cpu(bf[1].length) >= be16_to_cpu(bf[2].length)); | 102 | XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >= |
103 | be16_to_cpu(bf[1].length)); | ||
104 | XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >= | ||
105 | be16_to_cpu(bf[2].length)); | ||
99 | /* | 106 | /* |
100 | * Loop over the data/unused entries. | 107 | * Loop over the data/unused entries. |
101 | */ | 108 | */ |
@@ -107,17 +114,20 @@ xfs_dir2_data_check( | |||
107 | * doesn't need to be there. | 114 | * doesn't need to be there. |
108 | */ | 115 | */ |
109 | if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { | 116 | if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { |
110 | ASSERT(lastfree == 0); | 117 | XFS_WANT_CORRUPTED_RETURN(lastfree == 0); |
111 | ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == | 118 | XFS_WANT_CORRUPTED_RETURN( |
112 | (char *)dup - (char *)hdr); | 119 | be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == |
120 | (char *)dup - (char *)hdr); | ||
113 | dfp = xfs_dir2_data_freefind(hdr, dup); | 121 | dfp = xfs_dir2_data_freefind(hdr, dup); |
114 | if (dfp) { | 122 | if (dfp) { |
115 | i = (int)(dfp - bf); | 123 | i = (int)(dfp - bf); |
116 | ASSERT((freeseen & (1 << i)) == 0); | 124 | XFS_WANT_CORRUPTED_RETURN( |
125 | (freeseen & (1 << i)) == 0); | ||
117 | freeseen |= 1 << i; | 126 | freeseen |= 1 << i; |
118 | } else { | 127 | } else { |
119 | ASSERT(be16_to_cpu(dup->length) <= | 128 | XFS_WANT_CORRUPTED_RETURN( |
120 | be16_to_cpu(bf[2].length)); | 129 | be16_to_cpu(dup->length) <= |
130 | be16_to_cpu(bf[2].length)); | ||
121 | } | 131 | } |
122 | p += be16_to_cpu(dup->length); | 132 | p += be16_to_cpu(dup->length); |
123 | lastfree = 1; | 133 | lastfree = 1; |
@@ -130,10 +140,12 @@ xfs_dir2_data_check( | |||
130 | * The linear search is crude but this is DEBUG code. | 140 | * The linear search is crude but this is DEBUG code. |
131 | */ | 141 | */ |
132 | dep = (xfs_dir2_data_entry_t *)p; | 142 | dep = (xfs_dir2_data_entry_t *)p; |
133 | ASSERT(dep->namelen != 0); | 143 | XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0); |
134 | ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0); | 144 | XFS_WANT_CORRUPTED_RETURN( |
135 | ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) == | 145 | !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); |
136 | (char *)dep - (char *)hdr); | 146 | XFS_WANT_CORRUPTED_RETURN( |
147 | be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) == | ||
148 | (char *)dep - (char *)hdr); | ||
137 | count++; | 149 | count++; |
138 | lastfree = 0; | 150 | lastfree = 0; |
139 | if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { | 151 | if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { |
@@ -148,27 +160,122 @@ xfs_dir2_data_check( | |||
148 | be32_to_cpu(lep[i].hashval) == hash) | 160 | be32_to_cpu(lep[i].hashval) == hash) |
149 | break; | 161 | break; |
150 | } | 162 | } |
151 | ASSERT(i < be32_to_cpu(btp->count)); | 163 | XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count)); |
152 | } | 164 | } |
153 | p += xfs_dir2_data_entsize(dep->namelen); | 165 | p += xfs_dir2_data_entsize(dep->namelen); |
154 | } | 166 | } |
155 | /* | 167 | /* |
156 | * Need to have seen all the entries and all the bestfree slots. | 168 | * Need to have seen all the entries and all the bestfree slots. |
157 | */ | 169 | */ |
158 | ASSERT(freeseen == 7); | 170 | XFS_WANT_CORRUPTED_RETURN(freeseen == 7); |
159 | if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { | 171 | if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) { |
160 | for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { | 172 | for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { |
161 | if (lep[i].address == | 173 | if (lep[i].address == |
162 | cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) | 174 | cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) |
163 | stale++; | 175 | stale++; |
164 | if (i > 0) | 176 | if (i > 0) |
165 | ASSERT(be32_to_cpu(lep[i].hashval) >= be32_to_cpu(lep[i - 1].hashval)); | 177 | XFS_WANT_CORRUPTED_RETURN( |
178 | be32_to_cpu(lep[i].hashval) >= | ||
179 | be32_to_cpu(lep[i - 1].hashval)); | ||
166 | } | 180 | } |
167 | ASSERT(count == be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); | 181 | XFS_WANT_CORRUPTED_RETURN(count == |
168 | ASSERT(stale == be32_to_cpu(btp->stale)); | 182 | be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); |
183 | XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale)); | ||
169 | } | 184 | } |
185 | return 0; | ||
186 | } | ||
187 | |||
188 | static void | ||
189 | xfs_dir2_data_verify( | ||
190 | struct xfs_buf *bp) | ||
191 | { | ||
192 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
193 | struct xfs_dir2_data_hdr *hdr = bp->b_addr; | ||
194 | int block_ok = 0; | ||
195 | |||
196 | block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC); | ||
197 | block_ok = block_ok && __xfs_dir2_data_check(NULL, bp) == 0; | ||
198 | |||
199 | if (!block_ok) { | ||
200 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); | ||
201 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
202 | } | ||
203 | } | ||
204 | |||
205 | /* | ||
206 | * Readahead of the first block of the directory when it is opened is completely | ||
207 | * oblivious to the format of the directory. Hence we can either get a block | ||
208 | * format buffer or a data format buffer on readahead. | ||
209 | */ | ||
210 | static void | ||
211 | xfs_dir2_data_reada_verify( | ||
212 | struct xfs_buf *bp) | ||
213 | { | ||
214 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
215 | struct xfs_dir2_data_hdr *hdr = bp->b_addr; | ||
216 | |||
217 | switch (hdr->magic) { | ||
218 | case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): | ||
219 | bp->b_ops = &xfs_dir2_block_buf_ops; | ||
220 | bp->b_ops->verify_read(bp); | ||
221 | return; | ||
222 | case cpu_to_be32(XFS_DIR2_DATA_MAGIC): | ||
223 | xfs_dir2_data_verify(bp); | ||
224 | return; | ||
225 | default: | ||
226 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); | ||
227 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
228 | break; | ||
229 | } | ||
230 | } | ||
231 | |||
232 | static void | ||
233 | xfs_dir2_data_read_verify( | ||
234 | struct xfs_buf *bp) | ||
235 | { | ||
236 | xfs_dir2_data_verify(bp); | ||
237 | } | ||
238 | |||
239 | static void | ||
240 | xfs_dir2_data_write_verify( | ||
241 | struct xfs_buf *bp) | ||
242 | { | ||
243 | xfs_dir2_data_verify(bp); | ||
244 | } | ||
245 | |||
246 | const struct xfs_buf_ops xfs_dir2_data_buf_ops = { | ||
247 | .verify_read = xfs_dir2_data_read_verify, | ||
248 | .verify_write = xfs_dir2_data_write_verify, | ||
249 | }; | ||
250 | |||
251 | static const struct xfs_buf_ops xfs_dir2_data_reada_buf_ops = { | ||
252 | .verify_read = xfs_dir2_data_reada_verify, | ||
253 | .verify_write = xfs_dir2_data_write_verify, | ||
254 | }; | ||
255 | |||
256 | |||
257 | int | ||
258 | xfs_dir2_data_read( | ||
259 | struct xfs_trans *tp, | ||
260 | struct xfs_inode *dp, | ||
261 | xfs_dablk_t bno, | ||
262 | xfs_daddr_t mapped_bno, | ||
263 | struct xfs_buf **bpp) | ||
264 | { | ||
265 | return xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp, | ||
266 | XFS_DATA_FORK, &xfs_dir2_data_buf_ops); | ||
267 | } | ||
268 | |||
269 | int | ||
270 | xfs_dir2_data_readahead( | ||
271 | struct xfs_trans *tp, | ||
272 | struct xfs_inode *dp, | ||
273 | xfs_dablk_t bno, | ||
274 | xfs_daddr_t mapped_bno) | ||
275 | { | ||
276 | return xfs_da_reada_buf(tp, dp, bno, mapped_bno, | ||
277 | XFS_DATA_FORK, &xfs_dir2_data_reada_buf_ops); | ||
170 | } | 278 | } |
171 | #endif | ||
172 | 279 | ||
173 | /* | 280 | /* |
174 | * Given a data block and an unused entry from that block, | 281 | * Given a data block and an unused entry from that block, |
@@ -409,10 +516,9 @@ xfs_dir2_data_init( | |||
409 | */ | 516 | */ |
410 | error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, blkno), -1, &bp, | 517 | error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, blkno), -1, &bp, |
411 | XFS_DATA_FORK); | 518 | XFS_DATA_FORK); |
412 | if (error) { | 519 | if (error) |
413 | return error; | 520 | return error; |
414 | } | 521 | bp->b_ops = &xfs_dir2_data_buf_ops; |
415 | ASSERT(bp != NULL); | ||
416 | 522 | ||
417 | /* | 523 | /* |
418 | * Initialize the header. | 524 | * Initialize the header. |
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 0b296253bd01..60cd2fa4e047 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c | |||
@@ -48,6 +48,83 @@ static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp, | |||
48 | int first, int last); | 48 | int first, int last); |
49 | static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp); | 49 | static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp); |
50 | 50 | ||
51 | static void | ||
52 | xfs_dir2_leaf_verify( | ||
53 | struct xfs_buf *bp, | ||
54 | __be16 magic) | ||
55 | { | ||
56 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
57 | struct xfs_dir2_leaf_hdr *hdr = bp->b_addr; | ||
58 | int block_ok = 0; | ||
59 | |||
60 | block_ok = hdr->info.magic == magic; | ||
61 | if (!block_ok) { | ||
62 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); | ||
63 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
64 | } | ||
65 | } | ||
66 | |||
67 | static void | ||
68 | xfs_dir2_leaf1_read_verify( | ||
69 | struct xfs_buf *bp) | ||
70 | { | ||
71 | xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); | ||
72 | } | ||
73 | |||
74 | static void | ||
75 | xfs_dir2_leaf1_write_verify( | ||
76 | struct xfs_buf *bp) | ||
77 | { | ||
78 | xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAF1_MAGIC)); | ||
79 | } | ||
80 | |||
81 | void | ||
82 | xfs_dir2_leafn_read_verify( | ||
83 | struct xfs_buf *bp) | ||
84 | { | ||
85 | xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); | ||
86 | } | ||
87 | |||
88 | void | ||
89 | xfs_dir2_leafn_write_verify( | ||
90 | struct xfs_buf *bp) | ||
91 | { | ||
92 | xfs_dir2_leaf_verify(bp, cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)); | ||
93 | } | ||
94 | |||
95 | static const struct xfs_buf_ops xfs_dir2_leaf1_buf_ops = { | ||
96 | .verify_read = xfs_dir2_leaf1_read_verify, | ||
97 | .verify_write = xfs_dir2_leaf1_write_verify, | ||
98 | }; | ||
99 | |||
100 | const struct xfs_buf_ops xfs_dir2_leafn_buf_ops = { | ||
101 | .verify_read = xfs_dir2_leafn_read_verify, | ||
102 | .verify_write = xfs_dir2_leafn_write_verify, | ||
103 | }; | ||
104 | |||
105 | static int | ||
106 | xfs_dir2_leaf_read( | ||
107 | struct xfs_trans *tp, | ||
108 | struct xfs_inode *dp, | ||
109 | xfs_dablk_t fbno, | ||
110 | xfs_daddr_t mappedbno, | ||
111 | struct xfs_buf **bpp) | ||
112 | { | ||
113 | return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, | ||
114 | XFS_DATA_FORK, &xfs_dir2_leaf1_buf_ops); | ||
115 | } | ||
116 | |||
117 | int | ||
118 | xfs_dir2_leafn_read( | ||
119 | struct xfs_trans *tp, | ||
120 | struct xfs_inode *dp, | ||
121 | xfs_dablk_t fbno, | ||
122 | xfs_daddr_t mappedbno, | ||
123 | struct xfs_buf **bpp) | ||
124 | { | ||
125 | return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, | ||
126 | XFS_DATA_FORK, &xfs_dir2_leafn_buf_ops); | ||
127 | } | ||
51 | 128 | ||
52 | /* | 129 | /* |
53 | * Convert a block form directory to a leaf form directory. | 130 | * Convert a block form directory to a leaf form directory. |
@@ -125,6 +202,7 @@ xfs_dir2_block_to_leaf( | |||
125 | /* | 202 | /* |
126 | * Fix up the block header, make it a data block. | 203 | * Fix up the block header, make it a data block. |
127 | */ | 204 | */ |
205 | dbp->b_ops = &xfs_dir2_data_buf_ops; | ||
128 | hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); | 206 | hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); |
129 | if (needscan) | 207 | if (needscan) |
130 | xfs_dir2_data_freescan(mp, hdr, &needlog); | 208 | xfs_dir2_data_freescan(mp, hdr, &needlog); |
@@ -311,15 +389,11 @@ xfs_dir2_leaf_addname( | |||
311 | dp = args->dp; | 389 | dp = args->dp; |
312 | tp = args->trans; | 390 | tp = args->trans; |
313 | mp = dp->i_mount; | 391 | mp = dp->i_mount; |
314 | /* | 392 | |
315 | * Read the leaf block. | 393 | error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp); |
316 | */ | 394 | if (error) |
317 | error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp, | ||
318 | XFS_DATA_FORK); | ||
319 | if (error) { | ||
320 | return error; | 395 | return error; |
321 | } | 396 | |
322 | ASSERT(lbp != NULL); | ||
323 | /* | 397 | /* |
324 | * Look up the entry by hash value and name. | 398 | * Look up the entry by hash value and name. |
325 | * We know it's not there, our caller has already done a lookup. | 399 | * We know it's not there, our caller has already done a lookup. |
@@ -494,22 +568,21 @@ xfs_dir2_leaf_addname( | |||
494 | hdr = dbp->b_addr; | 568 | hdr = dbp->b_addr; |
495 | bestsp[use_block] = hdr->bestfree[0].length; | 569 | bestsp[use_block] = hdr->bestfree[0].length; |
496 | grown = 1; | 570 | grown = 1; |
497 | } | 571 | } else { |
498 | /* | 572 | /* |
499 | * Already had space in some data block. | 573 | * Already had space in some data block. |
500 | * Just read that one in. | 574 | * Just read that one in. |
501 | */ | 575 | */ |
502 | else { | 576 | error = xfs_dir2_data_read(tp, dp, |
503 | if ((error = | 577 | xfs_dir2_db_to_da(mp, use_block), |
504 | xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, use_block), | 578 | -1, &dbp); |
505 | -1, &dbp, XFS_DATA_FORK))) { | 579 | if (error) { |
506 | xfs_trans_brelse(tp, lbp); | 580 | xfs_trans_brelse(tp, lbp); |
507 | return error; | 581 | return error; |
508 | } | 582 | } |
509 | hdr = dbp->b_addr; | 583 | hdr = dbp->b_addr; |
510 | grown = 0; | 584 | grown = 0; |
511 | } | 585 | } |
512 | xfs_dir2_data_check(dp, dbp); | ||
513 | /* | 586 | /* |
514 | * Point to the biggest freespace in our data block. | 587 | * Point to the biggest freespace in our data block. |
515 | */ | 588 | */ |
@@ -892,10 +965,9 @@ xfs_dir2_leaf_readbuf( | |||
892 | * Read the directory block starting at the first mapping. | 965 | * Read the directory block starting at the first mapping. |
893 | */ | 966 | */ |
894 | mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff); | 967 | mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff); |
895 | error = xfs_da_read_buf(NULL, dp, map->br_startoff, | 968 | error = xfs_dir2_data_read(NULL, dp, map->br_startoff, |
896 | map->br_blockcount >= mp->m_dirblkfsbs ? | 969 | map->br_blockcount >= mp->m_dirblkfsbs ? |
897 | XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, | 970 | XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp); |
898 | &bp, XFS_DATA_FORK); | ||
899 | 971 | ||
900 | /* | 972 | /* |
901 | * Should just skip over the data block instead of giving up. | 973 | * Should just skip over the data block instead of giving up. |
@@ -922,11 +994,11 @@ xfs_dir2_leaf_readbuf( | |||
922 | */ | 994 | */ |
923 | if (i > mip->ra_current && | 995 | if (i > mip->ra_current && |
924 | map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) { | 996 | map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) { |
925 | xfs_buf_readahead(mp->m_ddev_targp, | 997 | xfs_dir2_data_readahead(NULL, dp, |
998 | map[mip->ra_index].br_startoff + mip->ra_offset, | ||
926 | XFS_FSB_TO_DADDR(mp, | 999 | XFS_FSB_TO_DADDR(mp, |
927 | map[mip->ra_index].br_startblock + | 1000 | map[mip->ra_index].br_startblock + |
928 | mip->ra_offset), | 1001 | mip->ra_offset)); |
929 | (int)BTOBB(mp->m_dirblksize)); | ||
930 | mip->ra_current = i; | 1002 | mip->ra_current = i; |
931 | } | 1003 | } |
932 | 1004 | ||
@@ -935,10 +1007,9 @@ xfs_dir2_leaf_readbuf( | |||
935 | * use our mapping, but this is a very rare case. | 1007 | * use our mapping, but this is a very rare case. |
936 | */ | 1008 | */ |
937 | else if (i > mip->ra_current) { | 1009 | else if (i > mip->ra_current) { |
938 | xfs_da_reada_buf(NULL, dp, | 1010 | xfs_dir2_data_readahead(NULL, dp, |
939 | map[mip->ra_index].br_startoff + | 1011 | map[mip->ra_index].br_startoff + |
940 | mip->ra_offset, | 1012 | mip->ra_offset, -1); |
941 | XFS_DATA_FORK); | ||
942 | mip->ra_current = i; | 1013 | mip->ra_current = i; |
943 | } | 1014 | } |
944 | 1015 | ||
@@ -1177,15 +1248,14 @@ xfs_dir2_leaf_init( | |||
1177 | * Get the buffer for the block. | 1248 | * Get the buffer for the block. |
1178 | */ | 1249 | */ |
1179 | error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp, | 1250 | error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp, |
1180 | XFS_DATA_FORK); | 1251 | XFS_DATA_FORK); |
1181 | if (error) { | 1252 | if (error) |
1182 | return error; | 1253 | return error; |
1183 | } | 1254 | |
1184 | ASSERT(bp != NULL); | ||
1185 | leaf = bp->b_addr; | ||
1186 | /* | 1255 | /* |
1187 | * Initialize the header. | 1256 | * Initialize the header. |
1188 | */ | 1257 | */ |
1258 | leaf = bp->b_addr; | ||
1189 | leaf->hdr.info.magic = cpu_to_be16(magic); | 1259 | leaf->hdr.info.magic = cpu_to_be16(magic); |
1190 | leaf->hdr.info.forw = 0; | 1260 | leaf->hdr.info.forw = 0; |
1191 | leaf->hdr.info.back = 0; | 1261 | leaf->hdr.info.back = 0; |
@@ -1198,10 +1268,12 @@ xfs_dir2_leaf_init( | |||
1198 | * the block. | 1268 | * the block. |
1199 | */ | 1269 | */ |
1200 | if (magic == XFS_DIR2_LEAF1_MAGIC) { | 1270 | if (magic == XFS_DIR2_LEAF1_MAGIC) { |
1271 | bp->b_ops = &xfs_dir2_leaf1_buf_ops; | ||
1201 | ltp = xfs_dir2_leaf_tail_p(mp, leaf); | 1272 | ltp = xfs_dir2_leaf_tail_p(mp, leaf); |
1202 | ltp->bestcount = 0; | 1273 | ltp->bestcount = 0; |
1203 | xfs_dir2_leaf_log_tail(tp, bp); | 1274 | xfs_dir2_leaf_log_tail(tp, bp); |
1204 | } | 1275 | } else |
1276 | bp->b_ops = &xfs_dir2_leafn_buf_ops; | ||
1205 | *bpp = bp; | 1277 | *bpp = bp; |
1206 | return 0; | 1278 | return 0; |
1207 | } | 1279 | } |
@@ -1372,13 +1444,11 @@ xfs_dir2_leaf_lookup_int( | |||
1372 | dp = args->dp; | 1444 | dp = args->dp; |
1373 | tp = args->trans; | 1445 | tp = args->trans; |
1374 | mp = dp->i_mount; | 1446 | mp = dp->i_mount; |
1375 | /* | 1447 | |
1376 | * Read the leaf block into the buffer. | 1448 | error = xfs_dir2_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp); |
1377 | */ | ||
1378 | error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp, | ||
1379 | XFS_DATA_FORK); | ||
1380 | if (error) | 1449 | if (error) |
1381 | return error; | 1450 | return error; |
1451 | |||
1382 | *lbpp = lbp; | 1452 | *lbpp = lbp; |
1383 | leaf = lbp->b_addr; | 1453 | leaf = lbp->b_addr; |
1384 | xfs_dir2_leaf_check(dp, lbp); | 1454 | xfs_dir2_leaf_check(dp, lbp); |
@@ -1409,14 +1479,13 @@ xfs_dir2_leaf_lookup_int( | |||
1409 | if (newdb != curdb) { | 1479 | if (newdb != curdb) { |
1410 | if (dbp) | 1480 | if (dbp) |
1411 | xfs_trans_brelse(tp, dbp); | 1481 | xfs_trans_brelse(tp, dbp); |
1412 | error = xfs_da_read_buf(tp, dp, | 1482 | error = xfs_dir2_data_read(tp, dp, |
1413 | xfs_dir2_db_to_da(mp, newdb), | 1483 | xfs_dir2_db_to_da(mp, newdb), |
1414 | -1, &dbp, XFS_DATA_FORK); | 1484 | -1, &dbp); |
1415 | if (error) { | 1485 | if (error) { |
1416 | xfs_trans_brelse(tp, lbp); | 1486 | xfs_trans_brelse(tp, lbp); |
1417 | return error; | 1487 | return error; |
1418 | } | 1488 | } |
1419 | xfs_dir2_data_check(dp, dbp); | ||
1420 | curdb = newdb; | 1489 | curdb = newdb; |
1421 | } | 1490 | } |
1422 | /* | 1491 | /* |
@@ -1451,9 +1520,9 @@ xfs_dir2_leaf_lookup_int( | |||
1451 | ASSERT(cidb != -1); | 1520 | ASSERT(cidb != -1); |
1452 | if (cidb != curdb) { | 1521 | if (cidb != curdb) { |
1453 | xfs_trans_brelse(tp, dbp); | 1522 | xfs_trans_brelse(tp, dbp); |
1454 | error = xfs_da_read_buf(tp, dp, | 1523 | error = xfs_dir2_data_read(tp, dp, |
1455 | xfs_dir2_db_to_da(mp, cidb), | 1524 | xfs_dir2_db_to_da(mp, cidb), |
1456 | -1, &dbp, XFS_DATA_FORK); | 1525 | -1, &dbp); |
1457 | if (error) { | 1526 | if (error) { |
1458 | xfs_trans_brelse(tp, lbp); | 1527 | xfs_trans_brelse(tp, lbp); |
1459 | return error; | 1528 | return error; |
@@ -1738,10 +1807,9 @@ xfs_dir2_leaf_trim_data( | |||
1738 | /* | 1807 | /* |
1739 | * Read the offending data block. We need its buffer. | 1808 | * Read the offending data block. We need its buffer. |
1740 | */ | 1809 | */ |
1741 | if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp, | 1810 | error = xfs_dir2_data_read(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp); |
1742 | XFS_DATA_FORK))) { | 1811 | if (error) |
1743 | return error; | 1812 | return error; |
1744 | } | ||
1745 | 1813 | ||
1746 | leaf = lbp->b_addr; | 1814 | leaf = lbp->b_addr; |
1747 | ltp = xfs_dir2_leaf_tail_p(mp, leaf); | 1815 | ltp = xfs_dir2_leaf_tail_p(mp, leaf); |
@@ -1864,10 +1932,9 @@ xfs_dir2_node_to_leaf( | |||
1864 | /* | 1932 | /* |
1865 | * Read the freespace block. | 1933 | * Read the freespace block. |
1866 | */ | 1934 | */ |
1867 | if ((error = xfs_da_read_buf(tp, dp, mp->m_dirfreeblk, -1, &fbp, | 1935 | error = xfs_dir2_free_read(tp, dp, mp->m_dirfreeblk, &fbp); |
1868 | XFS_DATA_FORK))) { | 1936 | if (error) |
1869 | return error; | 1937 | return error; |
1870 | } | ||
1871 | free = fbp->b_addr; | 1938 | free = fbp->b_addr; |
1872 | ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); | 1939 | ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); |
1873 | ASSERT(!free->hdr.firstdb); | 1940 | ASSERT(!free->hdr.firstdb); |
@@ -1890,7 +1957,10 @@ xfs_dir2_node_to_leaf( | |||
1890 | xfs_dir2_leaf_compact(args, lbp); | 1957 | xfs_dir2_leaf_compact(args, lbp); |
1891 | else | 1958 | else |
1892 | xfs_dir2_leaf_log_header(tp, lbp); | 1959 | xfs_dir2_leaf_log_header(tp, lbp); |
1960 | |||
1961 | lbp->b_ops = &xfs_dir2_leaf1_buf_ops; | ||
1893 | leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAF1_MAGIC); | 1962 | leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAF1_MAGIC); |
1963 | |||
1894 | /* | 1964 | /* |
1895 | * Set up the leaf tail from the freespace block. | 1965 | * Set up the leaf tail from the freespace block. |
1896 | */ | 1966 | */ |
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 6c7052406605..5980f9b7fa9b 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c | |||
@@ -55,6 +55,74 @@ static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp, | |||
55 | static int xfs_dir2_node_addname_int(xfs_da_args_t *args, | 55 | static int xfs_dir2_node_addname_int(xfs_da_args_t *args, |
56 | xfs_da_state_blk_t *fblk); | 56 | xfs_da_state_blk_t *fblk); |
57 | 57 | ||
58 | static void | ||
59 | xfs_dir2_free_verify( | ||
60 | struct xfs_buf *bp) | ||
61 | { | ||
62 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
63 | struct xfs_dir2_free_hdr *hdr = bp->b_addr; | ||
64 | int block_ok = 0; | ||
65 | |||
66 | block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC); | ||
67 | if (!block_ok) { | ||
68 | XFS_CORRUPTION_ERROR("xfs_dir2_free_verify magic", | ||
69 | XFS_ERRLEVEL_LOW, mp, hdr); | ||
70 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
71 | } | ||
72 | } | ||
73 | |||
74 | static void | ||
75 | xfs_dir2_free_read_verify( | ||
76 | struct xfs_buf *bp) | ||
77 | { | ||
78 | xfs_dir2_free_verify(bp); | ||
79 | } | ||
80 | |||
81 | static void | ||
82 | xfs_dir2_free_write_verify( | ||
83 | struct xfs_buf *bp) | ||
84 | { | ||
85 | xfs_dir2_free_verify(bp); | ||
86 | } | ||
87 | |||
88 | static const struct xfs_buf_ops xfs_dir2_free_buf_ops = { | ||
89 | .verify_read = xfs_dir2_free_read_verify, | ||
90 | .verify_write = xfs_dir2_free_write_verify, | ||
91 | }; | ||
92 | |||
93 | |||
94 | static int | ||
95 | __xfs_dir2_free_read( | ||
96 | struct xfs_trans *tp, | ||
97 | struct xfs_inode *dp, | ||
98 | xfs_dablk_t fbno, | ||
99 | xfs_daddr_t mappedbno, | ||
100 | struct xfs_buf **bpp) | ||
101 | { | ||
102 | return xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp, | ||
103 | XFS_DATA_FORK, &xfs_dir2_free_buf_ops); | ||
104 | } | ||
105 | |||
106 | int | ||
107 | xfs_dir2_free_read( | ||
108 | struct xfs_trans *tp, | ||
109 | struct xfs_inode *dp, | ||
110 | xfs_dablk_t fbno, | ||
111 | struct xfs_buf **bpp) | ||
112 | { | ||
113 | return __xfs_dir2_free_read(tp, dp, fbno, -1, bpp); | ||
114 | } | ||
115 | |||
116 | static int | ||
117 | xfs_dir2_free_try_read( | ||
118 | struct xfs_trans *tp, | ||
119 | struct xfs_inode *dp, | ||
120 | xfs_dablk_t fbno, | ||
121 | struct xfs_buf **bpp) | ||
122 | { | ||
123 | return __xfs_dir2_free_read(tp, dp, fbno, -2, bpp); | ||
124 | } | ||
125 | |||
58 | /* | 126 | /* |
59 | * Log entries from a freespace block. | 127 | * Log entries from a freespace block. |
60 | */ | 128 | */ |
@@ -131,11 +199,12 @@ xfs_dir2_leaf_to_node( | |||
131 | /* | 199 | /* |
132 | * Get the buffer for the new freespace block. | 200 | * Get the buffer for the new freespace block. |
133 | */ | 201 | */ |
134 | if ((error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp, | 202 | error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp, |
135 | XFS_DATA_FORK))) { | 203 | XFS_DATA_FORK); |
204 | if (error) | ||
136 | return error; | 205 | return error; |
137 | } | 206 | fbp->b_ops = &xfs_dir2_free_buf_ops; |
138 | ASSERT(fbp != NULL); | 207 | |
139 | free = fbp->b_addr; | 208 | free = fbp->b_addr; |
140 | leaf = lbp->b_addr; | 209 | leaf = lbp->b_addr; |
141 | ltp = xfs_dir2_leaf_tail_p(mp, leaf); | 210 | ltp = xfs_dir2_leaf_tail_p(mp, leaf); |
@@ -157,7 +226,10 @@ xfs_dir2_leaf_to_node( | |||
157 | *to = cpu_to_be16(off); | 226 | *to = cpu_to_be16(off); |
158 | } | 227 | } |
159 | free->hdr.nused = cpu_to_be32(n); | 228 | free->hdr.nused = cpu_to_be32(n); |
229 | |||
230 | lbp->b_ops = &xfs_dir2_leafn_buf_ops; | ||
160 | leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC); | 231 | leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC); |
232 | |||
161 | /* | 233 | /* |
162 | * Log everything. | 234 | * Log everything. |
163 | */ | 235 | */ |
@@ -394,12 +466,10 @@ xfs_dir2_leafn_lookup_for_addname( | |||
394 | */ | 466 | */ |
395 | if (curbp) | 467 | if (curbp) |
396 | xfs_trans_brelse(tp, curbp); | 468 | xfs_trans_brelse(tp, curbp); |
397 | /* | 469 | |
398 | * Read the free block. | 470 | error = xfs_dir2_free_read(tp, dp, |
399 | */ | ||
400 | error = xfs_da_read_buf(tp, dp, | ||
401 | xfs_dir2_db_to_da(mp, newfdb), | 471 | xfs_dir2_db_to_da(mp, newfdb), |
402 | -1, &curbp, XFS_DATA_FORK); | 472 | &curbp); |
403 | if (error) | 473 | if (error) |
404 | return error; | 474 | return error; |
405 | free = curbp->b_addr; | 475 | free = curbp->b_addr; |
@@ -534,9 +604,9 @@ xfs_dir2_leafn_lookup_for_entry( | |||
534 | ASSERT(state->extravalid); | 604 | ASSERT(state->extravalid); |
535 | curbp = state->extrablk.bp; | 605 | curbp = state->extrablk.bp; |
536 | } else { | 606 | } else { |
537 | error = xfs_da_read_buf(tp, dp, | 607 | error = xfs_dir2_data_read(tp, dp, |
538 | xfs_dir2_db_to_da(mp, newdb), | 608 | xfs_dir2_db_to_da(mp, newdb), |
539 | -1, &curbp, XFS_DATA_FORK); | 609 | -1, &curbp); |
540 | if (error) | 610 | if (error) |
541 | return error; | 611 | return error; |
542 | } | 612 | } |
@@ -568,6 +638,7 @@ xfs_dir2_leafn_lookup_for_entry( | |||
568 | state->extrablk.index = (int)((char *)dep - | 638 | state->extrablk.index = (int)((char *)dep - |
569 | (char *)curbp->b_addr); | 639 | (char *)curbp->b_addr); |
570 | state->extrablk.magic = XFS_DIR2_DATA_MAGIC; | 640 | state->extrablk.magic = XFS_DIR2_DATA_MAGIC; |
641 | curbp->b_ops = &xfs_dir2_data_buf_ops; | ||
571 | if (cmp == XFS_CMP_EXACT) | 642 | if (cmp == XFS_CMP_EXACT) |
572 | return XFS_ERROR(EEXIST); | 643 | return XFS_ERROR(EEXIST); |
573 | } | 644 | } |
@@ -582,6 +653,7 @@ xfs_dir2_leafn_lookup_for_entry( | |||
582 | state->extrablk.index = -1; | 653 | state->extrablk.index = -1; |
583 | state->extrablk.blkno = curdb; | 654 | state->extrablk.blkno = curdb; |
584 | state->extrablk.magic = XFS_DIR2_DATA_MAGIC; | 655 | state->extrablk.magic = XFS_DIR2_DATA_MAGIC; |
656 | curbp->b_ops = &xfs_dir2_data_buf_ops; | ||
585 | } else { | 657 | } else { |
586 | /* If the curbp is not the CI match block, drop it */ | 658 | /* If the curbp is not the CI match block, drop it */ |
587 | if (state->extrablk.bp != curbp) | 659 | if (state->extrablk.bp != curbp) |
@@ -825,6 +897,77 @@ xfs_dir2_leafn_rebalance( | |||
825 | } | 897 | } |
826 | } | 898 | } |
827 | 899 | ||
900 | static int | ||
901 | xfs_dir2_data_block_free( | ||
902 | xfs_da_args_t *args, | ||
903 | struct xfs_dir2_data_hdr *hdr, | ||
904 | struct xfs_dir2_free *free, | ||
905 | xfs_dir2_db_t fdb, | ||
906 | int findex, | ||
907 | struct xfs_buf *fbp, | ||
908 | int longest) | ||
909 | { | ||
910 | struct xfs_trans *tp = args->trans; | ||
911 | int logfree = 0; | ||
912 | |||
913 | if (!hdr) { | ||
914 | /* One less used entry in the free table. */ | ||
915 | be32_add_cpu(&free->hdr.nused, -1); | ||
916 | xfs_dir2_free_log_header(tp, fbp); | ||
917 | |||
918 | /* | ||
919 | * If this was the last entry in the table, we can trim the | ||
920 | * table size back. There might be other entries at the end | ||
921 | * referring to non-existent data blocks, get those too. | ||
922 | */ | ||
923 | if (findex == be32_to_cpu(free->hdr.nvalid) - 1) { | ||
924 | int i; /* free entry index */ | ||
925 | |||
926 | for (i = findex - 1; i >= 0; i--) { | ||
927 | if (free->bests[i] != cpu_to_be16(NULLDATAOFF)) | ||
928 | break; | ||
929 | } | ||
930 | free->hdr.nvalid = cpu_to_be32(i + 1); | ||
931 | logfree = 0; | ||
932 | } else { | ||
933 | /* Not the last entry, just punch it out. */ | ||
934 | free->bests[findex] = cpu_to_be16(NULLDATAOFF); | ||
935 | logfree = 1; | ||
936 | } | ||
937 | /* | ||
938 | * If there are no useful entries left in the block, | ||
939 | * get rid of the block if we can. | ||
940 | */ | ||
941 | if (!free->hdr.nused) { | ||
942 | int error; | ||
943 | |||
944 | error = xfs_dir2_shrink_inode(args, fdb, fbp); | ||
945 | if (error == 0) { | ||
946 | fbp = NULL; | ||
947 | logfree = 0; | ||
948 | } else if (error != ENOSPC || args->total != 0) | ||
949 | return error; | ||
950 | /* | ||
951 | * It's possible to get ENOSPC if there is no | ||
952 | * space reservation. In this case some one | ||
953 | * else will eventually get rid of this block. | ||
954 | */ | ||
955 | } | ||
956 | } else { | ||
957 | /* | ||
958 | * Data block is not empty, just set the free entry to the new | ||
959 | * value. | ||
960 | */ | ||
961 | free->bests[findex] = cpu_to_be16(longest); | ||
962 | logfree = 1; | ||
963 | } | ||
964 | |||
965 | /* Log the free entry that changed, unless we got rid of it. */ | ||
966 | if (logfree) | ||
967 | xfs_dir2_free_log_bests(tp, fbp, findex, findex); | ||
968 | return 0; | ||
969 | } | ||
970 | |||
828 | /* | 971 | /* |
829 | * Remove an entry from a node directory. | 972 | * Remove an entry from a node directory. |
830 | * This removes the leaf entry and the data entry, | 973 | * This removes the leaf entry and the data entry, |
@@ -908,17 +1051,16 @@ xfs_dir2_leafn_remove( | |||
908 | xfs_dir2_db_t fdb; /* freeblock block number */ | 1051 | xfs_dir2_db_t fdb; /* freeblock block number */ |
909 | int findex; /* index in freeblock entries */ | 1052 | int findex; /* index in freeblock entries */ |
910 | xfs_dir2_free_t *free; /* freeblock structure */ | 1053 | xfs_dir2_free_t *free; /* freeblock structure */ |
911 | int logfree; /* need to log free entry */ | ||
912 | 1054 | ||
913 | /* | 1055 | /* |
914 | * Convert the data block number to a free block, | 1056 | * Convert the data block number to a free block, |
915 | * read in the free block. | 1057 | * read in the free block. |
916 | */ | 1058 | */ |
917 | fdb = xfs_dir2_db_to_fdb(mp, db); | 1059 | fdb = xfs_dir2_db_to_fdb(mp, db); |
918 | if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), | 1060 | error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(mp, fdb), |
919 | -1, &fbp, XFS_DATA_FORK))) { | 1061 | &fbp); |
1062 | if (error) | ||
920 | return error; | 1063 | return error; |
921 | } | ||
922 | free = fbp->b_addr; | 1064 | free = fbp->b_addr; |
923 | ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); | 1065 | ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); |
924 | ASSERT(be32_to_cpu(free->hdr.firstdb) == | 1066 | ASSERT(be32_to_cpu(free->hdr.firstdb) == |
@@ -954,68 +1096,12 @@ xfs_dir2_leafn_remove( | |||
954 | * If we got rid of the data block, we can eliminate that entry | 1096 | * If we got rid of the data block, we can eliminate that entry |
955 | * in the free block. | 1097 | * in the free block. |
956 | */ | 1098 | */ |
957 | if (hdr == NULL) { | 1099 | error = xfs_dir2_data_block_free(args, hdr, free, |
958 | /* | 1100 | fdb, findex, fbp, longest); |
959 | * One less used entry in the free table. | 1101 | if (error) |
960 | */ | 1102 | return error; |
961 | be32_add_cpu(&free->hdr.nused, -1); | ||
962 | xfs_dir2_free_log_header(tp, fbp); | ||
963 | /* | ||
964 | * If this was the last entry in the table, we can | ||
965 | * trim the table size back. There might be other | ||
966 | * entries at the end referring to non-existent | ||
967 | * data blocks, get those too. | ||
968 | */ | ||
969 | if (findex == be32_to_cpu(free->hdr.nvalid) - 1) { | ||
970 | int i; /* free entry index */ | ||
971 | |||
972 | for (i = findex - 1; | ||
973 | i >= 0 && | ||
974 | free->bests[i] == cpu_to_be16(NULLDATAOFF); | ||
975 | i--) | ||
976 | continue; | ||
977 | free->hdr.nvalid = cpu_to_be32(i + 1); | ||
978 | logfree = 0; | ||
979 | } | ||
980 | /* | ||
981 | * Not the last entry, just punch it out. | ||
982 | */ | ||
983 | else { | ||
984 | free->bests[findex] = cpu_to_be16(NULLDATAOFF); | ||
985 | logfree = 1; | ||
986 | } | ||
987 | /* | ||
988 | * If there are no useful entries left in the block, | ||
989 | * get rid of the block if we can. | ||
990 | */ | ||
991 | if (!free->hdr.nused) { | ||
992 | error = xfs_dir2_shrink_inode(args, fdb, fbp); | ||
993 | if (error == 0) { | ||
994 | fbp = NULL; | ||
995 | logfree = 0; | ||
996 | } else if (error != ENOSPC || args->total != 0) | ||
997 | return error; | ||
998 | /* | ||
999 | * It's possible to get ENOSPC if there is no | ||
1000 | * space reservation. In this case some one | ||
1001 | * else will eventually get rid of this block. | ||
1002 | */ | ||
1003 | } | ||
1004 | } | ||
1005 | /* | ||
1006 | * Data block is not empty, just set the free entry to | ||
1007 | * the new value. | ||
1008 | */ | ||
1009 | else { | ||
1010 | free->bests[findex] = cpu_to_be16(longest); | ||
1011 | logfree = 1; | ||
1012 | } | ||
1013 | /* | ||
1014 | * Log the free entry that changed, unless we got rid of it. | ||
1015 | */ | ||
1016 | if (logfree) | ||
1017 | xfs_dir2_free_log_bests(tp, fbp, findex, findex); | ||
1018 | } | 1103 | } |
1104 | |||
1019 | xfs_dir2_leafn_check(dp, bp); | 1105 | xfs_dir2_leafn_check(dp, bp); |
1020 | /* | 1106 | /* |
1021 | * Return indication of whether this leaf block is empty enough | 1107 | * Return indication of whether this leaf block is empty enough |
@@ -1169,12 +1255,11 @@ xfs_dir2_leafn_toosmall( | |||
1169 | /* | 1255 | /* |
1170 | * Read the sibling leaf block. | 1256 | * Read the sibling leaf block. |
1171 | */ | 1257 | */ |
1172 | if ((error = | 1258 | error = xfs_dir2_leafn_read(state->args->trans, state->args->dp, |
1173 | xfs_da_read_buf(state->args->trans, state->args->dp, blkno, | 1259 | blkno, -1, &bp); |
1174 | -1, &bp, XFS_DATA_FORK))) { | 1260 | if (error) |
1175 | return error; | 1261 | return error; |
1176 | } | 1262 | |
1177 | ASSERT(bp != NULL); | ||
1178 | /* | 1263 | /* |
1179 | * Count bytes in the two blocks combined. | 1264 | * Count bytes in the two blocks combined. |
1180 | */ | 1265 | */ |
@@ -1454,14 +1539,13 @@ xfs_dir2_node_addname_int( | |||
1454 | * This should be really rare, so there's no reason | 1539 | * This should be really rare, so there's no reason |
1455 | * to avoid it. | 1540 | * to avoid it. |
1456 | */ | 1541 | */ |
1457 | if ((error = xfs_da_read_buf(tp, dp, | 1542 | error = xfs_dir2_free_try_read(tp, dp, |
1458 | xfs_dir2_db_to_da(mp, fbno), -2, &fbp, | 1543 | xfs_dir2_db_to_da(mp, fbno), |
1459 | XFS_DATA_FORK))) { | 1544 | &fbp); |
1545 | if (error) | ||
1460 | return error; | 1546 | return error; |
1461 | } | 1547 | if (!fbp) |
1462 | if (unlikely(fbp == NULL)) { | ||
1463 | continue; | 1548 | continue; |
1464 | } | ||
1465 | free = fbp->b_addr; | 1549 | free = fbp->b_addr; |
1466 | ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); | 1550 | ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); |
1467 | findex = 0; | 1551 | findex = 0; |
@@ -1520,9 +1604,10 @@ xfs_dir2_node_addname_int( | |||
1520 | * that was just allocated. | 1604 | * that was just allocated. |
1521 | */ | 1605 | */ |
1522 | fbno = xfs_dir2_db_to_fdb(mp, dbno); | 1606 | fbno = xfs_dir2_db_to_fdb(mp, dbno); |
1523 | if (unlikely(error = xfs_da_read_buf(tp, dp, | 1607 | error = xfs_dir2_free_try_read(tp, dp, |
1524 | xfs_dir2_db_to_da(mp, fbno), -2, &fbp, | 1608 | xfs_dir2_db_to_da(mp, fbno), |
1525 | XFS_DATA_FORK))) | 1609 | &fbp); |
1610 | if (error) | ||
1526 | return error; | 1611 | return error; |
1527 | 1612 | ||
1528 | /* | 1613 | /* |
@@ -1561,12 +1646,12 @@ xfs_dir2_node_addname_int( | |||
1561 | /* | 1646 | /* |
1562 | * Get a buffer for the new block. | 1647 | * Get a buffer for the new block. |
1563 | */ | 1648 | */ |
1564 | if ((error = xfs_da_get_buf(tp, dp, | 1649 | error = xfs_da_get_buf(tp, dp, |
1565 | xfs_dir2_db_to_da(mp, fbno), | 1650 | xfs_dir2_db_to_da(mp, fbno), |
1566 | -1, &fbp, XFS_DATA_FORK))) { | 1651 | -1, &fbp, XFS_DATA_FORK); |
1652 | if (error) | ||
1567 | return error; | 1653 | return error; |
1568 | } | 1654 | fbp->b_ops = &xfs_dir2_free_buf_ops; |
1569 | ASSERT(fbp != NULL); | ||
1570 | 1655 | ||
1571 | /* | 1656 | /* |
1572 | * Initialize the new block to be empty, and remember | 1657 | * Initialize the new block to be empty, and remember |
@@ -1630,8 +1715,8 @@ xfs_dir2_node_addname_int( | |||
1630 | /* | 1715 | /* |
1631 | * Read the data block in. | 1716 | * Read the data block in. |
1632 | */ | 1717 | */ |
1633 | error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno), | 1718 | error = xfs_dir2_data_read(tp, dp, xfs_dir2_db_to_da(mp, dbno), |
1634 | -1, &dbp, XFS_DATA_FORK); | 1719 | -1, &dbp); |
1635 | if (error) | 1720 | if (error) |
1636 | return error; | 1721 | return error; |
1637 | hdr = dbp->b_addr; | 1722 | hdr = dbp->b_addr; |
@@ -1917,18 +2002,15 @@ xfs_dir2_node_trim_free( | |||
1917 | /* | 2002 | /* |
1918 | * Read the freespace block. | 2003 | * Read the freespace block. |
1919 | */ | 2004 | */ |
1920 | if (unlikely(error = xfs_da_read_buf(tp, dp, (xfs_dablk_t)fo, -2, &bp, | 2005 | error = xfs_dir2_free_try_read(tp, dp, fo, &bp); |
1921 | XFS_DATA_FORK))) { | 2006 | if (error) |
1922 | return error; | 2007 | return error; |
1923 | } | ||
1924 | |||
1925 | /* | 2008 | /* |
1926 | * There can be holes in freespace. If fo is a hole, there's | 2009 | * There can be holes in freespace. If fo is a hole, there's |
1927 | * nothing to do. | 2010 | * nothing to do. |
1928 | */ | 2011 | */ |
1929 | if (bp == NULL) { | 2012 | if (!bp) |
1930 | return 0; | 2013 | return 0; |
1931 | } | ||
1932 | free = bp->b_addr; | 2014 | free = bp->b_addr; |
1933 | ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); | 2015 | ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)); |
1934 | /* | 2016 | /* |
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h index 3523d3e15aa8..7da79f6515fd 100644 --- a/fs/xfs/xfs_dir2_priv.h +++ b/fs/xfs/xfs_dir2_priv.h | |||
@@ -30,6 +30,8 @@ extern int xfs_dir_cilookup_result(struct xfs_da_args *args, | |||
30 | const unsigned char *name, int len); | 30 | const unsigned char *name, int len); |
31 | 31 | ||
32 | /* xfs_dir2_block.c */ | 32 | /* xfs_dir2_block.c */ |
33 | extern const struct xfs_buf_ops xfs_dir2_block_buf_ops; | ||
34 | |||
33 | extern int xfs_dir2_block_addname(struct xfs_da_args *args); | 35 | extern int xfs_dir2_block_addname(struct xfs_da_args *args); |
34 | extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent, | 36 | extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent, |
35 | xfs_off_t *offset, filldir_t filldir); | 37 | xfs_off_t *offset, filldir_t filldir); |
@@ -41,10 +43,19 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args, | |||
41 | 43 | ||
42 | /* xfs_dir2_data.c */ | 44 | /* xfs_dir2_data.c */ |
43 | #ifdef DEBUG | 45 | #ifdef DEBUG |
44 | extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_buf *bp); | 46 | #define xfs_dir2_data_check(dp,bp) __xfs_dir2_data_check(dp, bp); |
45 | #else | 47 | #else |
46 | #define xfs_dir2_data_check(dp,bp) | 48 | #define xfs_dir2_data_check(dp,bp) |
47 | #endif | 49 | #endif |
50 | |||
51 | extern const struct xfs_buf_ops xfs_dir2_data_buf_ops; | ||
52 | |||
53 | extern int __xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_buf *bp); | ||
54 | extern int xfs_dir2_data_read(struct xfs_trans *tp, struct xfs_inode *dp, | ||
55 | xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); | ||
56 | extern int xfs_dir2_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp, | ||
57 | xfs_dablk_t bno, xfs_daddr_t mapped_bno); | ||
58 | |||
48 | extern struct xfs_dir2_data_free * | 59 | extern struct xfs_dir2_data_free * |
49 | xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, | 60 | xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, |
50 | struct xfs_dir2_data_unused *dup, int *loghead); | 61 | struct xfs_dir2_data_unused *dup, int *loghead); |
@@ -66,6 +77,10 @@ extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp, | |||
66 | xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); | 77 | xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); |
67 | 78 | ||
68 | /* xfs_dir2_leaf.c */ | 79 | /* xfs_dir2_leaf.c */ |
80 | extern const struct xfs_buf_ops xfs_dir2_leafn_buf_ops; | ||
81 | |||
82 | extern int xfs_dir2_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp, | ||
83 | xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp); | ||
69 | extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, | 84 | extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args, |
70 | struct xfs_buf *dbp); | 85 | struct xfs_buf *dbp); |
71 | extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); | 86 | extern int xfs_dir2_leaf_addname(struct xfs_da_args *args); |
@@ -115,6 +130,8 @@ extern int xfs_dir2_node_removename(struct xfs_da_args *args); | |||
115 | extern int xfs_dir2_node_replace(struct xfs_da_args *args); | 130 | extern int xfs_dir2_node_replace(struct xfs_da_args *args); |
116 | extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo, | 131 | extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo, |
117 | int *rvalp); | 132 | int *rvalp); |
133 | extern int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp, | ||
134 | xfs_dablk_t fbno, struct xfs_buf **bpp); | ||
118 | 135 | ||
119 | /* xfs_dir2_sf.c */ | 136 | /* xfs_dir2_sf.c */ |
120 | extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp); | 137 | extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp); |
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index bf27fcca4843..9e1bf5294c91 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c | |||
@@ -248,7 +248,59 @@ xfs_qm_init_dquot_blk( | |||
248 | xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); | 248 | xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); |
249 | } | 249 | } |
250 | 250 | ||
251 | static void | ||
252 | xfs_dquot_buf_verify( | ||
253 | struct xfs_buf *bp) | ||
254 | { | ||
255 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
256 | struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; | ||
257 | struct xfs_disk_dquot *ddq; | ||
258 | xfs_dqid_t id = 0; | ||
259 | int i; | ||
260 | |||
261 | /* | ||
262 | * On the first read of the buffer, verify that each dquot is valid. | ||
263 | * We don't know what the id of the dquot is supposed to be, just that | ||
264 | * they should be increasing monotonically within the buffer. If the | ||
265 | * first id is corrupt, then it will fail on the second dquot in the | ||
266 | * buffer so corruptions could point to the wrong dquot in this case. | ||
267 | */ | ||
268 | for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { | ||
269 | int error; | ||
270 | |||
271 | ddq = &d[i].dd_diskdq; | ||
272 | |||
273 | if (i == 0) | ||
274 | id = be32_to_cpu(ddq->d_id); | ||
275 | |||
276 | error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, | ||
277 | "xfs_dquot_read_verify"); | ||
278 | if (error) { | ||
279 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, d); | ||
280 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
281 | break; | ||
282 | } | ||
283 | } | ||
284 | } | ||
285 | |||
286 | static void | ||
287 | xfs_dquot_buf_read_verify( | ||
288 | struct xfs_buf *bp) | ||
289 | { | ||
290 | xfs_dquot_buf_verify(bp); | ||
291 | } | ||
292 | |||
293 | void | ||
294 | xfs_dquot_buf_write_verify( | ||
295 | struct xfs_buf *bp) | ||
296 | { | ||
297 | xfs_dquot_buf_verify(bp); | ||
298 | } | ||
251 | 299 | ||
300 | const struct xfs_buf_ops xfs_dquot_buf_ops = { | ||
301 | .verify_read = xfs_dquot_buf_read_verify, | ||
302 | .verify_write = xfs_dquot_buf_write_verify, | ||
303 | }; | ||
252 | 304 | ||
253 | /* | 305 | /* |
254 | * Allocate a block and fill it with dquots. | 306 | * Allocate a block and fill it with dquots. |
@@ -315,6 +367,7 @@ xfs_qm_dqalloc( | |||
315 | error = xfs_buf_geterror(bp); | 367 | error = xfs_buf_geterror(bp); |
316 | if (error) | 368 | if (error) |
317 | goto error1; | 369 | goto error1; |
370 | bp->b_ops = &xfs_dquot_buf_ops; | ||
318 | 371 | ||
319 | /* | 372 | /* |
320 | * Make a chunk of dquots out of this buffer and log | 373 | * Make a chunk of dquots out of this buffer and log |
@@ -359,6 +412,51 @@ xfs_qm_dqalloc( | |||
359 | 412 | ||
360 | return (error); | 413 | return (error); |
361 | } | 414 | } |
415 | STATIC int | ||
416 | xfs_qm_dqrepair( | ||
417 | struct xfs_mount *mp, | ||
418 | struct xfs_trans *tp, | ||
419 | struct xfs_dquot *dqp, | ||
420 | xfs_dqid_t firstid, | ||
421 | struct xfs_buf **bpp) | ||
422 | { | ||
423 | int error; | ||
424 | struct xfs_disk_dquot *ddq; | ||
425 | struct xfs_dqblk *d; | ||
426 | int i; | ||
427 | |||
428 | /* | ||
429 | * Read the buffer without verification so we get the corrupted | ||
430 | * buffer returned to us. make sure we verify it on write, though. | ||
431 | */ | ||
432 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno, | ||
433 | mp->m_quotainfo->qi_dqchunklen, | ||
434 | 0, bpp, NULL); | ||
435 | |||
436 | if (error) { | ||
437 | ASSERT(*bpp == NULL); | ||
438 | return XFS_ERROR(error); | ||
439 | } | ||
440 | (*bpp)->b_ops = &xfs_dquot_buf_ops; | ||
441 | |||
442 | ASSERT(xfs_buf_islocked(*bpp)); | ||
443 | d = (struct xfs_dqblk *)(*bpp)->b_addr; | ||
444 | |||
445 | /* Do the actual repair of dquots in this buffer */ | ||
446 | for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { | ||
447 | ddq = &d[i].dd_diskdq; | ||
448 | error = xfs_qm_dqcheck(mp, ddq, firstid + i, | ||
449 | dqp->dq_flags & XFS_DQ_ALLTYPES, | ||
450 | XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair"); | ||
451 | if (error) { | ||
452 | /* repair failed, we're screwed */ | ||
453 | xfs_trans_brelse(tp, *bpp); | ||
454 | return XFS_ERROR(EIO); | ||
455 | } | ||
456 | } | ||
457 | |||
458 | return 0; | ||
459 | } | ||
362 | 460 | ||
363 | /* | 461 | /* |
364 | * Maps a dquot to the buffer containing its on-disk version. | 462 | * Maps a dquot to the buffer containing its on-disk version. |
@@ -378,7 +476,6 @@ xfs_qm_dqtobp( | |||
378 | xfs_buf_t *bp; | 476 | xfs_buf_t *bp; |
379 | xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); | 477 | xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); |
380 | xfs_mount_t *mp = dqp->q_mount; | 478 | xfs_mount_t *mp = dqp->q_mount; |
381 | xfs_disk_dquot_t *ddq; | ||
382 | xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); | 479 | xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); |
383 | xfs_trans_t *tp = (tpp ? *tpp : NULL); | 480 | xfs_trans_t *tp = (tpp ? *tpp : NULL); |
384 | 481 | ||
@@ -439,33 +536,24 @@ xfs_qm_dqtobp( | |||
439 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, | 536 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, |
440 | dqp->q_blkno, | 537 | dqp->q_blkno, |
441 | mp->m_quotainfo->qi_dqchunklen, | 538 | mp->m_quotainfo->qi_dqchunklen, |
442 | 0, &bp); | 539 | 0, &bp, &xfs_dquot_buf_ops); |
443 | if (error || !bp) | ||
444 | return XFS_ERROR(error); | ||
445 | } | ||
446 | |||
447 | ASSERT(xfs_buf_islocked(bp)); | ||
448 | 540 | ||
449 | /* | 541 | if (error == EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) { |
450 | * calculate the location of the dquot inside the buffer. | 542 | xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff * |
451 | */ | 543 | mp->m_quotainfo->qi_dqperchunk; |
452 | ddq = bp->b_addr + dqp->q_bufoffset; | 544 | ASSERT(bp == NULL); |
545 | error = xfs_qm_dqrepair(mp, tp, dqp, firstid, &bp); | ||
546 | } | ||
453 | 547 | ||
454 | /* | 548 | if (error) { |
455 | * A simple sanity check in case we got a corrupted dquot... | 549 | ASSERT(bp == NULL); |
456 | */ | 550 | return XFS_ERROR(error); |
457 | error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, | ||
458 | flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), | ||
459 | "dqtobp"); | ||
460 | if (error) { | ||
461 | if (!(flags & XFS_QMOPT_DQREPAIR)) { | ||
462 | xfs_trans_brelse(tp, bp); | ||
463 | return XFS_ERROR(EIO); | ||
464 | } | 551 | } |
465 | } | 552 | } |
466 | 553 | ||
554 | ASSERT(xfs_buf_islocked(bp)); | ||
467 | *O_bpp = bp; | 555 | *O_bpp = bp; |
468 | *O_ddpp = ddq; | 556 | *O_ddpp = bp->b_addr + dqp->q_bufoffset; |
469 | 557 | ||
470 | return (0); | 558 | return (0); |
471 | } | 559 | } |
@@ -920,7 +1008,7 @@ xfs_qm_dqflush( | |||
920 | * Get the buffer containing the on-disk dquot | 1008 | * Get the buffer containing the on-disk dquot |
921 | */ | 1009 | */ |
922 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, | 1010 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, |
923 | mp->m_quotainfo->qi_dqchunklen, 0, &bp); | 1011 | mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL); |
924 | if (error) | 1012 | if (error) |
925 | goto out_unlock; | 1013 | goto out_unlock; |
926 | 1014 | ||
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 7d20af27346d..c694a8469c4a 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h | |||
@@ -161,4 +161,6 @@ static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) | |||
161 | return dqp; | 161 | return dqp; |
162 | } | 162 | } |
163 | 163 | ||
164 | extern const struct xfs_buf_ops xfs_dquot_buf_ops; | ||
165 | |||
164 | #endif /* __XFS_DQUOT_H__ */ | 166 | #endif /* __XFS_DQUOT_H__ */ |
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 8c6d1d70278c..a83611849cee 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include "xfs_inode.h" | 29 | #include "xfs_inode.h" |
30 | #include "xfs_inode_item.h" | 30 | #include "xfs_inode_item.h" |
31 | #include "xfs_trace.h" | 31 | #include "xfs_trace.h" |
32 | #include "xfs_icache.h" | ||
32 | 33 | ||
33 | /* | 34 | /* |
34 | * Note that we only accept fileids which are long enough rather than allow | 35 | * Note that we only accept fileids which are long enough rather than allow |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index aa473fa640a2..67284edb84d7 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -31,6 +31,8 @@ | |||
31 | #include "xfs_error.h" | 31 | #include "xfs_error.h" |
32 | #include "xfs_vnodeops.h" | 32 | #include "xfs_vnodeops.h" |
33 | #include "xfs_da_btree.h" | 33 | #include "xfs_da_btree.h" |
34 | #include "xfs_dir2_format.h" | ||
35 | #include "xfs_dir2_priv.h" | ||
34 | #include "xfs_ioctl.h" | 36 | #include "xfs_ioctl.h" |
35 | #include "xfs_trace.h" | 37 | #include "xfs_trace.h" |
36 | 38 | ||
@@ -84,7 +86,7 @@ xfs_rw_ilock_demote( | |||
84 | * valid before the operation, it will be read from disk before | 86 | * valid before the operation, it will be read from disk before |
85 | * being partially zeroed. | 87 | * being partially zeroed. |
86 | */ | 88 | */ |
87 | STATIC int | 89 | int |
88 | xfs_iozero( | 90 | xfs_iozero( |
89 | struct xfs_inode *ip, /* inode */ | 91 | struct xfs_inode *ip, /* inode */ |
90 | loff_t pos, /* offset in file */ | 92 | loff_t pos, /* offset in file */ |
@@ -255,15 +257,14 @@ xfs_file_aio_read( | |||
255 | xfs_buftarg_t *target = | 257 | xfs_buftarg_t *target = |
256 | XFS_IS_REALTIME_INODE(ip) ? | 258 | XFS_IS_REALTIME_INODE(ip) ? |
257 | mp->m_rtdev_targp : mp->m_ddev_targp; | 259 | mp->m_rtdev_targp : mp->m_ddev_targp; |
258 | if ((iocb->ki_pos & target->bt_smask) || | 260 | if ((pos & target->bt_smask) || (size & target->bt_smask)) { |
259 | (size & target->bt_smask)) { | 261 | if (pos == i_size_read(inode)) |
260 | if (iocb->ki_pos == i_size_read(inode)) | ||
261 | return 0; | 262 | return 0; |
262 | return -XFS_ERROR(EINVAL); | 263 | return -XFS_ERROR(EINVAL); |
263 | } | 264 | } |
264 | } | 265 | } |
265 | 266 | ||
266 | n = mp->m_super->s_maxbytes - iocb->ki_pos; | 267 | n = mp->m_super->s_maxbytes - pos; |
267 | if (n <= 0 || size == 0) | 268 | if (n <= 0 || size == 0) |
268 | return 0; | 269 | return 0; |
269 | 270 | ||
@@ -289,20 +290,21 @@ xfs_file_aio_read( | |||
289 | xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); | 290 | xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); |
290 | 291 | ||
291 | if (inode->i_mapping->nrpages) { | 292 | if (inode->i_mapping->nrpages) { |
292 | ret = -xfs_flushinval_pages(ip, | 293 | ret = -filemap_write_and_wait_range( |
293 | (iocb->ki_pos & PAGE_CACHE_MASK), | 294 | VFS_I(ip)->i_mapping, |
294 | -1, FI_REMAPF_LOCKED); | 295 | pos, -1); |
295 | if (ret) { | 296 | if (ret) { |
296 | xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); | 297 | xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); |
297 | return ret; | 298 | return ret; |
298 | } | 299 | } |
300 | truncate_pagecache_range(VFS_I(ip), pos, -1); | ||
299 | } | 301 | } |
300 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); | 302 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
301 | } | 303 | } |
302 | 304 | ||
303 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); | 305 | trace_xfs_file_read(ip, size, pos, ioflags); |
304 | 306 | ||
305 | ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos); | 307 | ret = generic_file_aio_read(iocb, iovp, nr_segs, pos); |
306 | if (ret > 0) | 308 | if (ret > 0) |
307 | XFS_STATS_ADD(xs_read_bytes, ret); | 309 | XFS_STATS_ADD(xs_read_bytes, ret); |
308 | 310 | ||
@@ -670,10 +672,11 @@ xfs_file_dio_aio_write( | |||
670 | goto out; | 672 | goto out; |
671 | 673 | ||
672 | if (mapping->nrpages) { | 674 | if (mapping->nrpages) { |
673 | ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, | 675 | ret = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, |
674 | FI_REMAPF_LOCKED); | 676 | pos, -1); |
675 | if (ret) | 677 | if (ret) |
676 | goto out; | 678 | goto out; |
679 | truncate_pagecache_range(VFS_I(ip), pos, -1); | ||
677 | } | 680 | } |
678 | 681 | ||
679 | /* | 682 | /* |
@@ -728,16 +731,17 @@ xfs_file_buffered_aio_write( | |||
728 | write_retry: | 731 | write_retry: |
729 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); | 732 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); |
730 | ret = generic_file_buffered_write(iocb, iovp, nr_segs, | 733 | ret = generic_file_buffered_write(iocb, iovp, nr_segs, |
731 | pos, &iocb->ki_pos, count, ret); | 734 | pos, &iocb->ki_pos, count, 0); |
735 | |||
732 | /* | 736 | /* |
733 | * if we just got an ENOSPC, flush the inode now we aren't holding any | 737 | * If we just got an ENOSPC, try to write back all dirty inodes to |
734 | * page locks and retry *once* | 738 | * convert delalloc space to free up some of the excess reserved |
739 | * metadata space. | ||
735 | */ | 740 | */ |
736 | if (ret == -ENOSPC && !enospc) { | 741 | if (ret == -ENOSPC && !enospc) { |
737 | enospc = 1; | 742 | enospc = 1; |
738 | ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE); | 743 | xfs_flush_inodes(ip->i_mount); |
739 | if (!ret) | 744 | goto write_retry; |
740 | goto write_retry; | ||
741 | } | 745 | } |
742 | 746 | ||
743 | current->backing_dev_info = NULL; | 747 | current->backing_dev_info = NULL; |
@@ -889,7 +893,7 @@ xfs_dir_open( | |||
889 | */ | 893 | */ |
890 | mode = xfs_ilock_map_shared(ip); | 894 | mode = xfs_ilock_map_shared(ip); |
891 | if (ip->i_d.di_nextents > 0) | 895 | if (ip->i_d.di_nextents > 0) |
892 | xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); | 896 | xfs_dir2_data_readahead(NULL, ip, 0, -1); |
893 | xfs_iunlock(ip, mode); | 897 | xfs_iunlock(ip, mode); |
894 | return 0; | 898 | return 0; |
895 | } | 899 | } |
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index c13fed8c394a..6dda3f949b04 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h | |||
@@ -233,7 +233,8 @@ typedef struct xfs_fsop_resblks { | |||
233 | #define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */ | 233 | #define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */ |
234 | #define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */ | 234 | #define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */ |
235 | #define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */ | 235 | #define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */ |
236 | #define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */ | 236 | #define XFS_FSOP_GEOM_FLAGS_PROJID32 0x0800 /* 32-bit project IDs */ |
237 | #define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */ | ||
237 | #define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ | 238 | #define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ |
238 | 239 | ||
239 | 240 | ||
@@ -339,6 +340,35 @@ typedef struct xfs_error_injection { | |||
339 | 340 | ||
340 | 341 | ||
341 | /* | 342 | /* |
343 | * Speculative preallocation trimming. | ||
344 | */ | ||
345 | #define XFS_EOFBLOCKS_VERSION 1 | ||
346 | struct xfs_eofblocks { | ||
347 | __u32 eof_version; | ||
348 | __u32 eof_flags; | ||
349 | uid_t eof_uid; | ||
350 | gid_t eof_gid; | ||
351 | prid_t eof_prid; | ||
352 | __u32 pad32; | ||
353 | __u64 eof_min_file_size; | ||
354 | __u64 pad64[12]; | ||
355 | }; | ||
356 | |||
357 | /* eof_flags values */ | ||
358 | #define XFS_EOF_FLAGS_SYNC (1 << 0) /* sync/wait mode scan */ | ||
359 | #define XFS_EOF_FLAGS_UID (1 << 1) /* filter by uid */ | ||
360 | #define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */ | ||
361 | #define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */ | ||
362 | #define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */ | ||
363 | #define XFS_EOF_FLAGS_VALID \ | ||
364 | (XFS_EOF_FLAGS_SYNC | \ | ||
365 | XFS_EOF_FLAGS_UID | \ | ||
366 | XFS_EOF_FLAGS_GID | \ | ||
367 | XFS_EOF_FLAGS_PRID | \ | ||
368 | XFS_EOF_FLAGS_MINFILESIZE) | ||
369 | |||
370 | |||
371 | /* | ||
342 | * The user-level Handle Request interface structure. | 372 | * The user-level Handle Request interface structure. |
343 | */ | 373 | */ |
344 | typedef struct xfs_fsop_handlereq { | 374 | typedef struct xfs_fsop_handlereq { |
@@ -456,6 +486,7 @@ typedef struct xfs_handle { | |||
456 | /* XFS_IOC_GETBIOSIZE ---- deprecated 47 */ | 486 | /* XFS_IOC_GETBIOSIZE ---- deprecated 47 */ |
457 | #define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) | 487 | #define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) |
458 | #define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) | 488 | #define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) |
489 | #define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_eofblocks) | ||
459 | 490 | ||
460 | /* | 491 | /* |
461 | * ioctl commands that replace IRIX syssgi()'s | 492 | * ioctl commands that replace IRIX syssgi()'s |
diff --git a/fs/xfs/xfs_fs_subr.c b/fs/xfs/xfs_fs_subr.c deleted file mode 100644 index 652b875a9d4c..000000000000 --- a/fs/xfs/xfs_fs_subr.c +++ /dev/null | |||
@@ -1,96 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_vnodeops.h" | ||
20 | #include "xfs_bmap_btree.h" | ||
21 | #include "xfs_inode.h" | ||
22 | #include "xfs_trace.h" | ||
23 | |||
24 | /* | ||
25 | * note: all filemap functions return negative error codes. These | ||
26 | * need to be inverted before returning to the xfs core functions. | ||
27 | */ | ||
28 | void | ||
29 | xfs_tosspages( | ||
30 | xfs_inode_t *ip, | ||
31 | xfs_off_t first, | ||
32 | xfs_off_t last, | ||
33 | int fiopt) | ||
34 | { | ||
35 | /* can't toss partial tail pages, so mask them out */ | ||
36 | last &= ~(PAGE_SIZE - 1); | ||
37 | truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1); | ||
38 | } | ||
39 | |||
40 | int | ||
41 | xfs_flushinval_pages( | ||
42 | xfs_inode_t *ip, | ||
43 | xfs_off_t first, | ||
44 | xfs_off_t last, | ||
45 | int fiopt) | ||
46 | { | ||
47 | struct address_space *mapping = VFS_I(ip)->i_mapping; | ||
48 | int ret = 0; | ||
49 | |||
50 | trace_xfs_pagecache_inval(ip, first, last); | ||
51 | |||
52 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | ||
53 | ret = filemap_write_and_wait_range(mapping, first, | ||
54 | last == -1 ? LLONG_MAX : last); | ||
55 | if (!ret) | ||
56 | truncate_inode_pages_range(mapping, first, last); | ||
57 | return -ret; | ||
58 | } | ||
59 | |||
60 | int | ||
61 | xfs_flush_pages( | ||
62 | xfs_inode_t *ip, | ||
63 | xfs_off_t first, | ||
64 | xfs_off_t last, | ||
65 | uint64_t flags, | ||
66 | int fiopt) | ||
67 | { | ||
68 | struct address_space *mapping = VFS_I(ip)->i_mapping; | ||
69 | int ret = 0; | ||
70 | int ret2; | ||
71 | |||
72 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | ||
73 | ret = -filemap_fdatawrite_range(mapping, first, | ||
74 | last == -1 ? LLONG_MAX : last); | ||
75 | if (flags & XBF_ASYNC) | ||
76 | return ret; | ||
77 | ret2 = xfs_wait_on_pages(ip, first, last); | ||
78 | if (!ret) | ||
79 | ret = ret2; | ||
80 | return ret; | ||
81 | } | ||
82 | |||
83 | int | ||
84 | xfs_wait_on_pages( | ||
85 | xfs_inode_t *ip, | ||
86 | xfs_off_t first, | ||
87 | xfs_off_t last) | ||
88 | { | ||
89 | struct address_space *mapping = VFS_I(ip)->i_mapping; | ||
90 | |||
91 | if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { | ||
92 | return -filemap_fdatawait_range(mapping, first, | ||
93 | last == -1 ? XFS_ISIZE(ip) - 1 : last); | ||
94 | } | ||
95 | return 0; | ||
96 | } | ||
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 4beaede43277..94eaeedc5498 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -97,7 +97,9 @@ xfs_fs_geometry( | |||
97 | (xfs_sb_version_haslazysbcount(&mp->m_sb) ? | 97 | (xfs_sb_version_haslazysbcount(&mp->m_sb) ? |
98 | XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) | | 98 | XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) | |
99 | (xfs_sb_version_hasattr2(&mp->m_sb) ? | 99 | (xfs_sb_version_hasattr2(&mp->m_sb) ? |
100 | XFS_FSOP_GEOM_FLAGS_ATTR2 : 0); | 100 | XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) | |
101 | (xfs_sb_version_hasprojid32bit(&mp->m_sb) ? | ||
102 | XFS_FSOP_GEOM_FLAGS_PROJID32 : 0); | ||
101 | geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? | 103 | geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? |
102 | mp->m_sb.sb_logsectsize : BBSIZE; | 104 | mp->m_sb.sb_logsectsize : BBSIZE; |
103 | geo->rtsectsize = mp->m_sb.sb_blocksize; | 105 | geo->rtsectsize = mp->m_sb.sb_blocksize; |
@@ -112,18 +114,40 @@ xfs_fs_geometry( | |||
112 | return 0; | 114 | return 0; |
113 | } | 115 | } |
114 | 116 | ||
117 | static struct xfs_buf * | ||
118 | xfs_growfs_get_hdr_buf( | ||
119 | struct xfs_mount *mp, | ||
120 | xfs_daddr_t blkno, | ||
121 | size_t numblks, | ||
122 | int flags, | ||
123 | const struct xfs_buf_ops *ops) | ||
124 | { | ||
125 | struct xfs_buf *bp; | ||
126 | |||
127 | bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags); | ||
128 | if (!bp) | ||
129 | return NULL; | ||
130 | |||
131 | xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); | ||
132 | bp->b_bn = blkno; | ||
133 | bp->b_maps[0].bm_bn = blkno; | ||
134 | bp->b_ops = ops; | ||
135 | |||
136 | return bp; | ||
137 | } | ||
138 | |||
115 | static int | 139 | static int |
116 | xfs_growfs_data_private( | 140 | xfs_growfs_data_private( |
117 | xfs_mount_t *mp, /* mount point for filesystem */ | 141 | xfs_mount_t *mp, /* mount point for filesystem */ |
118 | xfs_growfs_data_t *in) /* growfs data input struct */ | 142 | xfs_growfs_data_t *in) /* growfs data input struct */ |
119 | { | 143 | { |
120 | xfs_agf_t *agf; | 144 | xfs_agf_t *agf; |
145 | struct xfs_agfl *agfl; | ||
121 | xfs_agi_t *agi; | 146 | xfs_agi_t *agi; |
122 | xfs_agnumber_t agno; | 147 | xfs_agnumber_t agno; |
123 | xfs_extlen_t agsize; | 148 | xfs_extlen_t agsize; |
124 | xfs_extlen_t tmpsize; | 149 | xfs_extlen_t tmpsize; |
125 | xfs_alloc_rec_t *arec; | 150 | xfs_alloc_rec_t *arec; |
126 | struct xfs_btree_block *block; | ||
127 | xfs_buf_t *bp; | 151 | xfs_buf_t *bp; |
128 | int bucket; | 152 | int bucket; |
129 | int dpct; | 153 | int dpct; |
@@ -146,9 +170,14 @@ xfs_growfs_data_private( | |||
146 | dpct = pct - mp->m_sb.sb_imax_pct; | 170 | dpct = pct - mp->m_sb.sb_imax_pct; |
147 | bp = xfs_buf_read_uncached(mp->m_ddev_targp, | 171 | bp = xfs_buf_read_uncached(mp->m_ddev_targp, |
148 | XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), | 172 | XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), |
149 | XFS_FSS_TO_BB(mp, 1), 0); | 173 | XFS_FSS_TO_BB(mp, 1), 0, NULL); |
150 | if (!bp) | 174 | if (!bp) |
151 | return EIO; | 175 | return EIO; |
176 | if (bp->b_error) { | ||
177 | int error = bp->b_error; | ||
178 | xfs_buf_relse(bp); | ||
179 | return error; | ||
180 | } | ||
152 | xfs_buf_relse(bp); | 181 | xfs_buf_relse(bp); |
153 | 182 | ||
154 | new = nb; /* use new as a temporary here */ | 183 | new = nb; /* use new as a temporary here */ |
@@ -186,17 +215,18 @@ xfs_growfs_data_private( | |||
186 | nfree = 0; | 215 | nfree = 0; |
187 | for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) { | 216 | for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) { |
188 | /* | 217 | /* |
189 | * AG freelist header block | 218 | * AG freespace header block |
190 | */ | 219 | */ |
191 | bp = xfs_buf_get(mp->m_ddev_targp, | 220 | bp = xfs_growfs_get_hdr_buf(mp, |
192 | XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), | 221 | XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), |
193 | XFS_FSS_TO_BB(mp, 1), 0); | 222 | XFS_FSS_TO_BB(mp, 1), 0, |
223 | &xfs_agf_buf_ops); | ||
194 | if (!bp) { | 224 | if (!bp) { |
195 | error = ENOMEM; | 225 | error = ENOMEM; |
196 | goto error0; | 226 | goto error0; |
197 | } | 227 | } |
228 | |||
198 | agf = XFS_BUF_TO_AGF(bp); | 229 | agf = XFS_BUF_TO_AGF(bp); |
199 | memset(agf, 0, mp->m_sb.sb_sectsize); | ||
200 | agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); | 230 | agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); |
201 | agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); | 231 | agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); |
202 | agf->agf_seqno = cpu_to_be32(agno); | 232 | agf->agf_seqno = cpu_to_be32(agno); |
@@ -223,17 +253,39 @@ xfs_growfs_data_private( | |||
223 | goto error0; | 253 | goto error0; |
224 | 254 | ||
225 | /* | 255 | /* |
256 | * AG freelist header block | ||
257 | */ | ||
258 | bp = xfs_growfs_get_hdr_buf(mp, | ||
259 | XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), | ||
260 | XFS_FSS_TO_BB(mp, 1), 0, | ||
261 | &xfs_agfl_buf_ops); | ||
262 | if (!bp) { | ||
263 | error = ENOMEM; | ||
264 | goto error0; | ||
265 | } | ||
266 | |||
267 | agfl = XFS_BUF_TO_AGFL(bp); | ||
268 | for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++) | ||
269 | agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); | ||
270 | |||
271 | error = xfs_bwrite(bp); | ||
272 | xfs_buf_relse(bp); | ||
273 | if (error) | ||
274 | goto error0; | ||
275 | |||
276 | /* | ||
226 | * AG inode header block | 277 | * AG inode header block |
227 | */ | 278 | */ |
228 | bp = xfs_buf_get(mp->m_ddev_targp, | 279 | bp = xfs_growfs_get_hdr_buf(mp, |
229 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), | 280 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), |
230 | XFS_FSS_TO_BB(mp, 1), 0); | 281 | XFS_FSS_TO_BB(mp, 1), 0, |
282 | &xfs_agi_buf_ops); | ||
231 | if (!bp) { | 283 | if (!bp) { |
232 | error = ENOMEM; | 284 | error = ENOMEM; |
233 | goto error0; | 285 | goto error0; |
234 | } | 286 | } |
287 | |||
235 | agi = XFS_BUF_TO_AGI(bp); | 288 | agi = XFS_BUF_TO_AGI(bp); |
236 | memset(agi, 0, mp->m_sb.sb_sectsize); | ||
237 | agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); | 289 | agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); |
238 | agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION); | 290 | agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION); |
239 | agi->agi_seqno = cpu_to_be32(agno); | 291 | agi->agi_seqno = cpu_to_be32(agno); |
@@ -254,24 +306,22 @@ xfs_growfs_data_private( | |||
254 | /* | 306 | /* |
255 | * BNO btree root block | 307 | * BNO btree root block |
256 | */ | 308 | */ |
257 | bp = xfs_buf_get(mp->m_ddev_targp, | 309 | bp = xfs_growfs_get_hdr_buf(mp, |
258 | XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), | 310 | XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), |
259 | BTOBB(mp->m_sb.sb_blocksize), 0); | 311 | BTOBB(mp->m_sb.sb_blocksize), 0, |
312 | &xfs_allocbt_buf_ops); | ||
313 | |||
260 | if (!bp) { | 314 | if (!bp) { |
261 | error = ENOMEM; | 315 | error = ENOMEM; |
262 | goto error0; | 316 | goto error0; |
263 | } | 317 | } |
264 | block = XFS_BUF_TO_BLOCK(bp); | 318 | |
265 | memset(block, 0, mp->m_sb.sb_blocksize); | 319 | xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1, 0); |
266 | block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC); | 320 | arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); |
267 | block->bb_level = 0; | ||
268 | block->bb_numrecs = cpu_to_be16(1); | ||
269 | block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); | ||
270 | block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); | ||
271 | arec = XFS_ALLOC_REC_ADDR(mp, block, 1); | ||
272 | arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); | 321 | arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); |
273 | arec->ar_blockcount = cpu_to_be32( | 322 | arec->ar_blockcount = cpu_to_be32( |
274 | agsize - be32_to_cpu(arec->ar_startblock)); | 323 | agsize - be32_to_cpu(arec->ar_startblock)); |
324 | |||
275 | error = xfs_bwrite(bp); | 325 | error = xfs_bwrite(bp); |
276 | xfs_buf_relse(bp); | 326 | xfs_buf_relse(bp); |
277 | if (error) | 327 | if (error) |
@@ -280,25 +330,22 @@ xfs_growfs_data_private( | |||
280 | /* | 330 | /* |
281 | * CNT btree root block | 331 | * CNT btree root block |
282 | */ | 332 | */ |
283 | bp = xfs_buf_get(mp->m_ddev_targp, | 333 | bp = xfs_growfs_get_hdr_buf(mp, |
284 | XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), | 334 | XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), |
285 | BTOBB(mp->m_sb.sb_blocksize), 0); | 335 | BTOBB(mp->m_sb.sb_blocksize), 0, |
336 | &xfs_allocbt_buf_ops); | ||
286 | if (!bp) { | 337 | if (!bp) { |
287 | error = ENOMEM; | 338 | error = ENOMEM; |
288 | goto error0; | 339 | goto error0; |
289 | } | 340 | } |
290 | block = XFS_BUF_TO_BLOCK(bp); | 341 | |
291 | memset(block, 0, mp->m_sb.sb_blocksize); | 342 | xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1, 0); |
292 | block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC); | 343 | arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); |
293 | block->bb_level = 0; | ||
294 | block->bb_numrecs = cpu_to_be16(1); | ||
295 | block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); | ||
296 | block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); | ||
297 | arec = XFS_ALLOC_REC_ADDR(mp, block, 1); | ||
298 | arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); | 344 | arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); |
299 | arec->ar_blockcount = cpu_to_be32( | 345 | arec->ar_blockcount = cpu_to_be32( |
300 | agsize - be32_to_cpu(arec->ar_startblock)); | 346 | agsize - be32_to_cpu(arec->ar_startblock)); |
301 | nfree += be32_to_cpu(arec->ar_blockcount); | 347 | nfree += be32_to_cpu(arec->ar_blockcount); |
348 | |||
302 | error = xfs_bwrite(bp); | 349 | error = xfs_bwrite(bp); |
303 | xfs_buf_relse(bp); | 350 | xfs_buf_relse(bp); |
304 | if (error) | 351 | if (error) |
@@ -307,20 +354,17 @@ xfs_growfs_data_private( | |||
307 | /* | 354 | /* |
308 | * INO btree root block | 355 | * INO btree root block |
309 | */ | 356 | */ |
310 | bp = xfs_buf_get(mp->m_ddev_targp, | 357 | bp = xfs_growfs_get_hdr_buf(mp, |
311 | XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), | 358 | XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), |
312 | BTOBB(mp->m_sb.sb_blocksize), 0); | 359 | BTOBB(mp->m_sb.sb_blocksize), 0, |
360 | &xfs_inobt_buf_ops); | ||
313 | if (!bp) { | 361 | if (!bp) { |
314 | error = ENOMEM; | 362 | error = ENOMEM; |
315 | goto error0; | 363 | goto error0; |
316 | } | 364 | } |
317 | block = XFS_BUF_TO_BLOCK(bp); | 365 | |
318 | memset(block, 0, mp->m_sb.sb_blocksize); | 366 | xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0, 0); |
319 | block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC); | 367 | |
320 | block->bb_level = 0; | ||
321 | block->bb_numrecs = 0; | ||
322 | block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); | ||
323 | block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); | ||
324 | error = xfs_bwrite(bp); | 368 | error = xfs_bwrite(bp); |
325 | xfs_buf_relse(bp); | 369 | xfs_buf_relse(bp); |
326 | if (error) | 370 | if (error) |
@@ -408,14 +452,16 @@ xfs_growfs_data_private( | |||
408 | if (agno < oagcount) { | 452 | if (agno < oagcount) { |
409 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, | 453 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, |
410 | XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), | 454 | XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), |
411 | XFS_FSS_TO_BB(mp, 1), 0, &bp); | 455 | XFS_FSS_TO_BB(mp, 1), 0, &bp, |
456 | &xfs_sb_buf_ops); | ||
412 | } else { | 457 | } else { |
413 | bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp, | 458 | bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp, |
414 | XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), | 459 | XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), |
415 | XFS_FSS_TO_BB(mp, 1), 0); | 460 | XFS_FSS_TO_BB(mp, 1), 0); |
416 | if (bp) | 461 | if (bp) { |
462 | bp->b_ops = &xfs_sb_buf_ops; | ||
417 | xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); | 463 | xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); |
418 | else | 464 | } else |
419 | error = ENOMEM; | 465 | error = ENOMEM; |
420 | } | 466 | } |
421 | 467 | ||
@@ -426,6 +472,7 @@ xfs_growfs_data_private( | |||
426 | break; | 472 | break; |
427 | } | 473 | } |
428 | xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, XFS_SB_ALL_BITS); | 474 | xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, XFS_SB_ALL_BITS); |
475 | |||
429 | /* | 476 | /* |
430 | * If we get an error writing out the alternate superblocks, | 477 | * If we get an error writing out the alternate superblocks, |
431 | * just issue a warning and continue. The real work is | 478 | * just issue a warning and continue. The real work is |
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c index 76e81cff70b9..5399ef222dd7 100644 --- a/fs/xfs/xfs_globals.c +++ b/fs/xfs/xfs_globals.c | |||
@@ -21,7 +21,8 @@ | |||
21 | /* | 21 | /* |
22 | * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, | 22 | * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, |
23 | * other XFS code uses these values. Times are measured in centisecs (i.e. | 23 | * other XFS code uses these values. Times are measured in centisecs (i.e. |
24 | * 100ths of a second). | 24 | * 100ths of a second) with the exception of eofb_timer, which is measured in |
25 | * seconds. | ||
25 | */ | 26 | */ |
26 | xfs_param_t xfs_params = { | 27 | xfs_param_t xfs_params = { |
27 | /* MIN DFLT MAX */ | 28 | /* MIN DFLT MAX */ |
@@ -40,4 +41,5 @@ xfs_param_t xfs_params = { | |||
40 | .rotorstep = { 1, 1, 255 }, | 41 | .rotorstep = { 1, 1, 255 }, |
41 | .inherit_nodfrg = { 0, 1, 1 }, | 42 | .inherit_nodfrg = { 0, 1, 1 }, |
42 | .fstrm_timer = { 1, 30*100, 3600*100}, | 43 | .fstrm_timer = { 1, 30*100, 3600*100}, |
44 | .eofb_timer = { 1, 300, 3600*24}, | ||
43 | }; | 45 | }; |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index c5c4ef4f2bdb..a815412eab80 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -200,7 +200,8 @@ xfs_ialloc_inode_init( | |||
200 | */ | 200 | */ |
201 | d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); | 201 | d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); |
202 | fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, | 202 | fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, |
203 | mp->m_bsize * blks_per_cluster, 0); | 203 | mp->m_bsize * blks_per_cluster, |
204 | XBF_UNMAPPED); | ||
204 | if (!fbuf) | 205 | if (!fbuf) |
205 | return ENOMEM; | 206 | return ENOMEM; |
206 | /* | 207 | /* |
@@ -210,6 +211,7 @@ xfs_ialloc_inode_init( | |||
210 | * to log a whole cluster of inodes instead of all the | 211 | * to log a whole cluster of inodes instead of all the |
211 | * individual transactions causing a lot of log traffic. | 212 | * individual transactions causing a lot of log traffic. |
212 | */ | 213 | */ |
214 | fbuf->b_ops = &xfs_inode_buf_ops; | ||
213 | xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); | 215 | xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); |
214 | for (i = 0; i < ninodes; i++) { | 216 | for (i = 0; i < ninodes; i++) { |
215 | int ioffset = i << mp->m_sb.sb_inodelog; | 217 | int ioffset = i << mp->m_sb.sb_inodelog; |
@@ -877,9 +879,9 @@ error0: | |||
877 | * This function is designed to be called twice if it has to do an allocation | 879 | * This function is designed to be called twice if it has to do an allocation |
878 | * to make more free inodes. On the first call, *IO_agbp should be set to NULL. | 880 | * to make more free inodes. On the first call, *IO_agbp should be set to NULL. |
879 | * If an inode is available without having to performn an allocation, an inode | 881 | * If an inode is available without having to performn an allocation, an inode |
880 | * number is returned. In this case, *IO_agbp would be NULL. If an allocation | 882 | * number is returned. In this case, *IO_agbp is set to NULL. If an allocation |
881 | * needes to be done, xfs_dialloc would return the current AGI buffer in | 883 | * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp. |
882 | * *IO_agbp. The caller should then commit the current transaction, allocate a | 884 | * The caller should then commit the current transaction, allocate a |
883 | * new transaction, and call xfs_dialloc() again, passing in the previous value | 885 | * new transaction, and call xfs_dialloc() again, passing in the previous value |
884 | * of *IO_agbp. IO_agbp should be held across the transactions. Since the AGI | 886 | * of *IO_agbp. IO_agbp should be held across the transactions. Since the AGI |
885 | * buffer is locked across the two calls, the second call is guaranteed to have | 887 | * buffer is locked across the two calls, the second call is guaranteed to have |
@@ -1472,6 +1474,57 @@ xfs_check_agi_unlinked( | |||
1472 | #define xfs_check_agi_unlinked(agi) | 1474 | #define xfs_check_agi_unlinked(agi) |
1473 | #endif | 1475 | #endif |
1474 | 1476 | ||
1477 | static void | ||
1478 | xfs_agi_verify( | ||
1479 | struct xfs_buf *bp) | ||
1480 | { | ||
1481 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
1482 | struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); | ||
1483 | int agi_ok; | ||
1484 | |||
1485 | /* | ||
1486 | * Validate the magic number of the agi block. | ||
1487 | */ | ||
1488 | agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) && | ||
1489 | XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)); | ||
1490 | |||
1491 | /* | ||
1492 | * during growfs operations, the perag is not fully initialised, | ||
1493 | * so we can't use it for any useful checking. growfs ensures we can't | ||
1494 | * use it by using uncached buffers that don't have the perag attached | ||
1495 | * so we can detect and avoid this problem. | ||
1496 | */ | ||
1497 | if (bp->b_pag) | ||
1498 | agi_ok = agi_ok && be32_to_cpu(agi->agi_seqno) == | ||
1499 | bp->b_pag->pag_agno; | ||
1500 | |||
1501 | if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, | ||
1502 | XFS_RANDOM_IALLOC_READ_AGI))) { | ||
1503 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agi); | ||
1504 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
1505 | } | ||
1506 | xfs_check_agi_unlinked(agi); | ||
1507 | } | ||
1508 | |||
1509 | static void | ||
1510 | xfs_agi_read_verify( | ||
1511 | struct xfs_buf *bp) | ||
1512 | { | ||
1513 | xfs_agi_verify(bp); | ||
1514 | } | ||
1515 | |||
1516 | static void | ||
1517 | xfs_agi_write_verify( | ||
1518 | struct xfs_buf *bp) | ||
1519 | { | ||
1520 | xfs_agi_verify(bp); | ||
1521 | } | ||
1522 | |||
1523 | const struct xfs_buf_ops xfs_agi_buf_ops = { | ||
1524 | .verify_read = xfs_agi_read_verify, | ||
1525 | .verify_write = xfs_agi_write_verify, | ||
1526 | }; | ||
1527 | |||
1475 | /* | 1528 | /* |
1476 | * Read in the allocation group header (inode allocation section) | 1529 | * Read in the allocation group header (inode allocation section) |
1477 | */ | 1530 | */ |
@@ -1482,38 +1535,18 @@ xfs_read_agi( | |||
1482 | xfs_agnumber_t agno, /* allocation group number */ | 1535 | xfs_agnumber_t agno, /* allocation group number */ |
1483 | struct xfs_buf **bpp) /* allocation group hdr buf */ | 1536 | struct xfs_buf **bpp) /* allocation group hdr buf */ |
1484 | { | 1537 | { |
1485 | struct xfs_agi *agi; /* allocation group header */ | ||
1486 | int agi_ok; /* agi is consistent */ | ||
1487 | int error; | 1538 | int error; |
1488 | 1539 | ||
1489 | ASSERT(agno != NULLAGNUMBER); | 1540 | ASSERT(agno != NULLAGNUMBER); |
1490 | 1541 | ||
1491 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, | 1542 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, |
1492 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), | 1543 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), |
1493 | XFS_FSS_TO_BB(mp, 1), 0, bpp); | 1544 | XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops); |
1494 | if (error) | 1545 | if (error) |
1495 | return error; | 1546 | return error; |
1496 | 1547 | ||
1497 | ASSERT(!xfs_buf_geterror(*bpp)); | 1548 | ASSERT(!xfs_buf_geterror(*bpp)); |
1498 | agi = XFS_BUF_TO_AGI(*bpp); | ||
1499 | |||
1500 | /* | ||
1501 | * Validate the magic number of the agi block. | ||
1502 | */ | ||
1503 | agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) && | ||
1504 | XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) && | ||
1505 | be32_to_cpu(agi->agi_seqno) == agno; | ||
1506 | if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, | ||
1507 | XFS_RANDOM_IALLOC_READ_AGI))) { | ||
1508 | XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW, | ||
1509 | mp, agi); | ||
1510 | xfs_trans_brelse(tp, *bpp); | ||
1511 | return XFS_ERROR(EFSCORRUPTED); | ||
1512 | } | ||
1513 | |||
1514 | xfs_buf_set_ref(*bpp, XFS_AGI_REF); | 1549 | xfs_buf_set_ref(*bpp, XFS_AGI_REF); |
1515 | |||
1516 | xfs_check_agi_unlinked(agi); | ||
1517 | return 0; | 1550 | return 0; |
1518 | } | 1551 | } |
1519 | 1552 | ||
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index 1fd6ea4e9c91..c8da3df271e6 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h | |||
@@ -147,7 +147,9 @@ int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino, | |||
147 | /* | 147 | /* |
148 | * Get the data from the pointed-to record. | 148 | * Get the data from the pointed-to record. |
149 | */ | 149 | */ |
150 | extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, | 150 | int xfs_inobt_get_rec(struct xfs_btree_cur *cur, |
151 | xfs_inobt_rec_incore_t *rec, int *stat); | 151 | xfs_inobt_rec_incore_t *rec, int *stat); |
152 | 152 | ||
153 | extern const struct xfs_buf_ops xfs_agi_buf_ops; | ||
154 | |||
153 | #endif /* __XFS_IALLOC_H__ */ | 155 | #endif /* __XFS_IALLOC_H__ */ |
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 2b8b7a37aa18..bec344b36507 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include "xfs_ialloc.h" | 33 | #include "xfs_ialloc.h" |
34 | #include "xfs_alloc.h" | 34 | #include "xfs_alloc.h" |
35 | #include "xfs_error.h" | 35 | #include "xfs_error.h" |
36 | #include "xfs_trace.h" | ||
36 | 37 | ||
37 | 38 | ||
38 | STATIC int | 39 | STATIC int |
@@ -181,6 +182,59 @@ xfs_inobt_key_diff( | |||
181 | cur->bc_rec.i.ir_startino; | 182 | cur->bc_rec.i.ir_startino; |
182 | } | 183 | } |
183 | 184 | ||
185 | void | ||
186 | xfs_inobt_verify( | ||
187 | struct xfs_buf *bp) | ||
188 | { | ||
189 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
190 | struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); | ||
191 | unsigned int level; | ||
192 | int sblock_ok; /* block passes checks */ | ||
193 | |||
194 | /* magic number and level verification */ | ||
195 | level = be16_to_cpu(block->bb_level); | ||
196 | sblock_ok = block->bb_magic == cpu_to_be32(XFS_IBT_MAGIC) && | ||
197 | level < mp->m_in_maxlevels; | ||
198 | |||
199 | /* numrecs verification */ | ||
200 | sblock_ok = sblock_ok && | ||
201 | be16_to_cpu(block->bb_numrecs) <= mp->m_inobt_mxr[level != 0]; | ||
202 | |||
203 | /* sibling pointer verification */ | ||
204 | sblock_ok = sblock_ok && | ||
205 | (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || | ||
206 | be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) && | ||
207 | block->bb_u.s.bb_leftsib && | ||
208 | (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || | ||
209 | be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) && | ||
210 | block->bb_u.s.bb_rightsib; | ||
211 | |||
212 | if (!sblock_ok) { | ||
213 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
214 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block); | ||
215 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
216 | } | ||
217 | } | ||
218 | |||
219 | static void | ||
220 | xfs_inobt_read_verify( | ||
221 | struct xfs_buf *bp) | ||
222 | { | ||
223 | xfs_inobt_verify(bp); | ||
224 | } | ||
225 | |||
226 | static void | ||
227 | xfs_inobt_write_verify( | ||
228 | struct xfs_buf *bp) | ||
229 | { | ||
230 | xfs_inobt_verify(bp); | ||
231 | } | ||
232 | |||
233 | const struct xfs_buf_ops xfs_inobt_buf_ops = { | ||
234 | .verify_read = xfs_inobt_read_verify, | ||
235 | .verify_write = xfs_inobt_write_verify, | ||
236 | }; | ||
237 | |||
184 | #ifdef DEBUG | 238 | #ifdef DEBUG |
185 | STATIC int | 239 | STATIC int |
186 | xfs_inobt_keys_inorder( | 240 | xfs_inobt_keys_inorder( |
@@ -218,6 +272,7 @@ static const struct xfs_btree_ops xfs_inobt_ops = { | |||
218 | .init_rec_from_cur = xfs_inobt_init_rec_from_cur, | 272 | .init_rec_from_cur = xfs_inobt_init_rec_from_cur, |
219 | .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, | 273 | .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, |
220 | .key_diff = xfs_inobt_key_diff, | 274 | .key_diff = xfs_inobt_key_diff, |
275 | .buf_ops = &xfs_inobt_buf_ops, | ||
221 | #ifdef DEBUG | 276 | #ifdef DEBUG |
222 | .keys_inorder = xfs_inobt_keys_inorder, | 277 | .keys_inorder = xfs_inobt_keys_inorder, |
223 | .recs_inorder = xfs_inobt_recs_inorder, | 278 | .recs_inorder = xfs_inobt_recs_inorder, |
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index f782ad0c4769..25c0239a8eab 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h | |||
@@ -109,4 +109,6 @@ extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, | |||
109 | struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); | 109 | struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); |
110 | extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); | 110 | extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); |
111 | 111 | ||
112 | extern const struct xfs_buf_ops xfs_inobt_buf_ops; | ||
113 | |||
112 | #endif /* __XFS_IALLOC_BTREE_H__ */ | 114 | #endif /* __XFS_IALLOC_BTREE_H__ */ |
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_icache.c index 9500caf15acf..96e344e3e927 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_icache.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_types.h" |
21 | #include "xfs_log.h" | 21 | #include "xfs_log.h" |
22 | #include "xfs_log_priv.h" | ||
22 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
23 | #include "xfs_trans.h" | 24 | #include "xfs_trans.h" |
24 | #include "xfs_trans_priv.h" | 25 | #include "xfs_trans_priv.h" |
@@ -35,11 +36,425 @@ | |||
35 | #include "xfs_quota.h" | 36 | #include "xfs_quota.h" |
36 | #include "xfs_trace.h" | 37 | #include "xfs_trace.h" |
37 | #include "xfs_fsops.h" | 38 | #include "xfs_fsops.h" |
39 | #include "xfs_icache.h" | ||
38 | 40 | ||
39 | #include <linux/kthread.h> | 41 | #include <linux/kthread.h> |
40 | #include <linux/freezer.h> | 42 | #include <linux/freezer.h> |
41 | 43 | ||
42 | struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ | 44 | STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, |
45 | struct xfs_perag *pag, struct xfs_inode *ip); | ||
46 | |||
47 | /* | ||
48 | * Allocate and initialise an xfs_inode. | ||
49 | */ | ||
50 | STATIC struct xfs_inode * | ||
51 | xfs_inode_alloc( | ||
52 | struct xfs_mount *mp, | ||
53 | xfs_ino_t ino) | ||
54 | { | ||
55 | struct xfs_inode *ip; | ||
56 | |||
57 | /* | ||
58 | * if this didn't occur in transactions, we could use | ||
59 | * KM_MAYFAIL and return NULL here on ENOMEM. Set the | ||
60 | * code up to do this anyway. | ||
61 | */ | ||
62 | ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); | ||
63 | if (!ip) | ||
64 | return NULL; | ||
65 | if (inode_init_always(mp->m_super, VFS_I(ip))) { | ||
66 | kmem_zone_free(xfs_inode_zone, ip); | ||
67 | return NULL; | ||
68 | } | ||
69 | |||
70 | ASSERT(atomic_read(&ip->i_pincount) == 0); | ||
71 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | ||
72 | ASSERT(!xfs_isiflocked(ip)); | ||
73 | ASSERT(ip->i_ino == 0); | ||
74 | |||
75 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | ||
76 | |||
77 | /* initialise the xfs inode */ | ||
78 | ip->i_ino = ino; | ||
79 | ip->i_mount = mp; | ||
80 | memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); | ||
81 | ip->i_afp = NULL; | ||
82 | memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); | ||
83 | ip->i_flags = 0; | ||
84 | ip->i_delayed_blks = 0; | ||
85 | memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); | ||
86 | |||
87 | return ip; | ||
88 | } | ||
89 | |||
90 | STATIC void | ||
91 | xfs_inode_free_callback( | ||
92 | struct rcu_head *head) | ||
93 | { | ||
94 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
95 | struct xfs_inode *ip = XFS_I(inode); | ||
96 | |||
97 | kmem_zone_free(xfs_inode_zone, ip); | ||
98 | } | ||
99 | |||
100 | STATIC void | ||
101 | xfs_inode_free( | ||
102 | struct xfs_inode *ip) | ||
103 | { | ||
104 | switch (ip->i_d.di_mode & S_IFMT) { | ||
105 | case S_IFREG: | ||
106 | case S_IFDIR: | ||
107 | case S_IFLNK: | ||
108 | xfs_idestroy_fork(ip, XFS_DATA_FORK); | ||
109 | break; | ||
110 | } | ||
111 | |||
112 | if (ip->i_afp) | ||
113 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | ||
114 | |||
115 | if (ip->i_itemp) { | ||
116 | ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL)); | ||
117 | xfs_inode_item_destroy(ip); | ||
118 | ip->i_itemp = NULL; | ||
119 | } | ||
120 | |||
121 | /* asserts to verify all state is correct here */ | ||
122 | ASSERT(atomic_read(&ip->i_pincount) == 0); | ||
123 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | ||
124 | ASSERT(!xfs_isiflocked(ip)); | ||
125 | |||
126 | /* | ||
127 | * Because we use RCU freeing we need to ensure the inode always | ||
128 | * appears to be reclaimed with an invalid inode number when in the | ||
129 | * free state. The ip->i_flags_lock provides the barrier against lookup | ||
130 | * races. | ||
131 | */ | ||
132 | spin_lock(&ip->i_flags_lock); | ||
133 | ip->i_flags = XFS_IRECLAIM; | ||
134 | ip->i_ino = 0; | ||
135 | spin_unlock(&ip->i_flags_lock); | ||
136 | |||
137 | call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * Check the validity of the inode we just found it the cache | ||
142 | */ | ||
143 | static int | ||
144 | xfs_iget_cache_hit( | ||
145 | struct xfs_perag *pag, | ||
146 | struct xfs_inode *ip, | ||
147 | xfs_ino_t ino, | ||
148 | int flags, | ||
149 | int lock_flags) __releases(RCU) | ||
150 | { | ||
151 | struct inode *inode = VFS_I(ip); | ||
152 | struct xfs_mount *mp = ip->i_mount; | ||
153 | int error; | ||
154 | |||
155 | /* | ||
156 | * check for re-use of an inode within an RCU grace period due to the | ||
157 | * radix tree nodes not being updated yet. We monitor for this by | ||
158 | * setting the inode number to zero before freeing the inode structure. | ||
159 | * If the inode has been reallocated and set up, then the inode number | ||
160 | * will not match, so check for that, too. | ||
161 | */ | ||
162 | spin_lock(&ip->i_flags_lock); | ||
163 | if (ip->i_ino != ino) { | ||
164 | trace_xfs_iget_skip(ip); | ||
165 | XFS_STATS_INC(xs_ig_frecycle); | ||
166 | error = EAGAIN; | ||
167 | goto out_error; | ||
168 | } | ||
169 | |||
170 | |||
171 | /* | ||
172 | * If we are racing with another cache hit that is currently | ||
173 | * instantiating this inode or currently recycling it out of | ||
174 | * reclaimabe state, wait for the initialisation to complete | ||
175 | * before continuing. | ||
176 | * | ||
177 | * XXX(hch): eventually we should do something equivalent to | ||
178 | * wait_on_inode to wait for these flags to be cleared | ||
179 | * instead of polling for it. | ||
180 | */ | ||
181 | if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { | ||
182 | trace_xfs_iget_skip(ip); | ||
183 | XFS_STATS_INC(xs_ig_frecycle); | ||
184 | error = EAGAIN; | ||
185 | goto out_error; | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * If lookup is racing with unlink return an error immediately. | ||
190 | */ | ||
191 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { | ||
192 | error = ENOENT; | ||
193 | goto out_error; | ||
194 | } | ||
195 | |||
196 | /* | ||
197 | * If IRECLAIMABLE is set, we've torn down the VFS inode already. | ||
198 | * Need to carefully get it back into useable state. | ||
199 | */ | ||
200 | if (ip->i_flags & XFS_IRECLAIMABLE) { | ||
201 | trace_xfs_iget_reclaim(ip); | ||
202 | |||
203 | /* | ||
204 | * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode | ||
205 | * from stomping over us while we recycle the inode. We can't | ||
206 | * clear the radix tree reclaimable tag yet as it requires | ||
207 | * pag_ici_lock to be held exclusive. | ||
208 | */ | ||
209 | ip->i_flags |= XFS_IRECLAIM; | ||
210 | |||
211 | spin_unlock(&ip->i_flags_lock); | ||
212 | rcu_read_unlock(); | ||
213 | |||
214 | error = -inode_init_always(mp->m_super, inode); | ||
215 | if (error) { | ||
216 | /* | ||
217 | * Re-initializing the inode failed, and we are in deep | ||
218 | * trouble. Try to re-add it to the reclaim list. | ||
219 | */ | ||
220 | rcu_read_lock(); | ||
221 | spin_lock(&ip->i_flags_lock); | ||
222 | |||
223 | ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); | ||
224 | ASSERT(ip->i_flags & XFS_IRECLAIMABLE); | ||
225 | trace_xfs_iget_reclaim_fail(ip); | ||
226 | goto out_error; | ||
227 | } | ||
228 | |||
229 | spin_lock(&pag->pag_ici_lock); | ||
230 | spin_lock(&ip->i_flags_lock); | ||
231 | |||
232 | /* | ||
233 | * Clear the per-lifetime state in the inode as we are now | ||
234 | * effectively a new inode and need to return to the initial | ||
235 | * state before reuse occurs. | ||
236 | */ | ||
237 | ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; | ||
238 | ip->i_flags |= XFS_INEW; | ||
239 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | ||
240 | inode->i_state = I_NEW; | ||
241 | |||
242 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | ||
243 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | ||
244 | |||
245 | spin_unlock(&ip->i_flags_lock); | ||
246 | spin_unlock(&pag->pag_ici_lock); | ||
247 | } else { | ||
248 | /* If the VFS inode is being torn down, pause and try again. */ | ||
249 | if (!igrab(inode)) { | ||
250 | trace_xfs_iget_skip(ip); | ||
251 | error = EAGAIN; | ||
252 | goto out_error; | ||
253 | } | ||
254 | |||
255 | /* We've got a live one. */ | ||
256 | spin_unlock(&ip->i_flags_lock); | ||
257 | rcu_read_unlock(); | ||
258 | trace_xfs_iget_hit(ip); | ||
259 | } | ||
260 | |||
261 | if (lock_flags != 0) | ||
262 | xfs_ilock(ip, lock_flags); | ||
263 | |||
264 | xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE); | ||
265 | XFS_STATS_INC(xs_ig_found); | ||
266 | |||
267 | return 0; | ||
268 | |||
269 | out_error: | ||
270 | spin_unlock(&ip->i_flags_lock); | ||
271 | rcu_read_unlock(); | ||
272 | return error; | ||
273 | } | ||
274 | |||
275 | |||
276 | static int | ||
277 | xfs_iget_cache_miss( | ||
278 | struct xfs_mount *mp, | ||
279 | struct xfs_perag *pag, | ||
280 | xfs_trans_t *tp, | ||
281 | xfs_ino_t ino, | ||
282 | struct xfs_inode **ipp, | ||
283 | int flags, | ||
284 | int lock_flags) | ||
285 | { | ||
286 | struct xfs_inode *ip; | ||
287 | int error; | ||
288 | xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); | ||
289 | int iflags; | ||
290 | |||
291 | ip = xfs_inode_alloc(mp, ino); | ||
292 | if (!ip) | ||
293 | return ENOMEM; | ||
294 | |||
295 | error = xfs_iread(mp, tp, ip, flags); | ||
296 | if (error) | ||
297 | goto out_destroy; | ||
298 | |||
299 | trace_xfs_iget_miss(ip); | ||
300 | |||
301 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { | ||
302 | error = ENOENT; | ||
303 | goto out_destroy; | ||
304 | } | ||
305 | |||
306 | /* | ||
307 | * Preload the radix tree so we can insert safely under the | ||
308 | * write spinlock. Note that we cannot sleep inside the preload | ||
309 | * region. Since we can be called from transaction context, don't | ||
310 | * recurse into the file system. | ||
311 | */ | ||
312 | if (radix_tree_preload(GFP_NOFS)) { | ||
313 | error = EAGAIN; | ||
314 | goto out_destroy; | ||
315 | } | ||
316 | |||
317 | /* | ||
318 | * Because the inode hasn't been added to the radix-tree yet it can't | ||
319 | * be found by another thread, so we can do the non-sleeping lock here. | ||
320 | */ | ||
321 | if (lock_flags) { | ||
322 | if (!xfs_ilock_nowait(ip, lock_flags)) | ||
323 | BUG(); | ||
324 | } | ||
325 | |||
326 | /* | ||
327 | * These values must be set before inserting the inode into the radix | ||
328 | * tree as the moment it is inserted a concurrent lookup (allowed by the | ||
329 | * RCU locking mechanism) can find it and that lookup must see that this | ||
330 | * is an inode currently under construction (i.e. that XFS_INEW is set). | ||
331 | * The ip->i_flags_lock that protects the XFS_INEW flag forms the | ||
332 | * memory barrier that ensures this detection works correctly at lookup | ||
333 | * time. | ||
334 | */ | ||
335 | iflags = XFS_INEW; | ||
336 | if (flags & XFS_IGET_DONTCACHE) | ||
337 | iflags |= XFS_IDONTCACHE; | ||
338 | ip->i_udquot = ip->i_gdquot = NULL; | ||
339 | xfs_iflags_set(ip, iflags); | ||
340 | |||
341 | /* insert the new inode */ | ||
342 | spin_lock(&pag->pag_ici_lock); | ||
343 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); | ||
344 | if (unlikely(error)) { | ||
345 | WARN_ON(error != -EEXIST); | ||
346 | XFS_STATS_INC(xs_ig_dup); | ||
347 | error = EAGAIN; | ||
348 | goto out_preload_end; | ||
349 | } | ||
350 | spin_unlock(&pag->pag_ici_lock); | ||
351 | radix_tree_preload_end(); | ||
352 | |||
353 | *ipp = ip; | ||
354 | return 0; | ||
355 | |||
356 | out_preload_end: | ||
357 | spin_unlock(&pag->pag_ici_lock); | ||
358 | radix_tree_preload_end(); | ||
359 | if (lock_flags) | ||
360 | xfs_iunlock(ip, lock_flags); | ||
361 | out_destroy: | ||
362 | __destroy_inode(VFS_I(ip)); | ||
363 | xfs_inode_free(ip); | ||
364 | return error; | ||
365 | } | ||
366 | |||
367 | /* | ||
368 | * Look up an inode by number in the given file system. | ||
369 | * The inode is looked up in the cache held in each AG. | ||
370 | * If the inode is found in the cache, initialise the vfs inode | ||
371 | * if necessary. | ||
372 | * | ||
373 | * If it is not in core, read it in from the file system's device, | ||
374 | * add it to the cache and initialise the vfs inode. | ||
375 | * | ||
376 | * The inode is locked according to the value of the lock_flags parameter. | ||
377 | * This flag parameter indicates how and if the inode's IO lock and inode lock | ||
378 | * should be taken. | ||
379 | * | ||
380 | * mp -- the mount point structure for the current file system. It points | ||
381 | * to the inode hash table. | ||
382 | * tp -- a pointer to the current transaction if there is one. This is | ||
383 | * simply passed through to the xfs_iread() call. | ||
384 | * ino -- the number of the inode desired. This is the unique identifier | ||
385 | * within the file system for the inode being requested. | ||
386 | * lock_flags -- flags indicating how to lock the inode. See the comment | ||
387 | * for xfs_ilock() for a list of valid values. | ||
388 | */ | ||
389 | int | ||
390 | xfs_iget( | ||
391 | xfs_mount_t *mp, | ||
392 | xfs_trans_t *tp, | ||
393 | xfs_ino_t ino, | ||
394 | uint flags, | ||
395 | uint lock_flags, | ||
396 | xfs_inode_t **ipp) | ||
397 | { | ||
398 | xfs_inode_t *ip; | ||
399 | int error; | ||
400 | xfs_perag_t *pag; | ||
401 | xfs_agino_t agino; | ||
402 | |||
403 | /* | ||
404 | * xfs_reclaim_inode() uses the ILOCK to ensure an inode | ||
405 | * doesn't get freed while it's being referenced during a | ||
406 | * radix tree traversal here. It assumes this function | ||
407 | * aqcuires only the ILOCK (and therefore it has no need to | ||
408 | * involve the IOLOCK in this synchronization). | ||
409 | */ | ||
410 | ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0); | ||
411 | |||
412 | /* reject inode numbers outside existing AGs */ | ||
413 | if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) | ||
414 | return EINVAL; | ||
415 | |||
416 | /* get the perag structure and ensure that it's inode capable */ | ||
417 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); | ||
418 | agino = XFS_INO_TO_AGINO(mp, ino); | ||
419 | |||
420 | again: | ||
421 | error = 0; | ||
422 | rcu_read_lock(); | ||
423 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); | ||
424 | |||
425 | if (ip) { | ||
426 | error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); | ||
427 | if (error) | ||
428 | goto out_error_or_again; | ||
429 | } else { | ||
430 | rcu_read_unlock(); | ||
431 | XFS_STATS_INC(xs_ig_missed); | ||
432 | |||
433 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, | ||
434 | flags, lock_flags); | ||
435 | if (error) | ||
436 | goto out_error_or_again; | ||
437 | } | ||
438 | xfs_perag_put(pag); | ||
439 | |||
440 | *ipp = ip; | ||
441 | |||
442 | /* | ||
443 | * If we have a real type for an on-disk inode, we can set ops(&unlock) | ||
444 | * now. If it's a new inode being created, xfs_ialloc will handle it. | ||
445 | */ | ||
446 | if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0) | ||
447 | xfs_setup_inode(ip); | ||
448 | return 0; | ||
449 | |||
450 | out_error_or_again: | ||
451 | if (error == EAGAIN) { | ||
452 | delay(1); | ||
453 | goto again; | ||
454 | } | ||
455 | xfs_perag_put(pag); | ||
456 | return error; | ||
457 | } | ||
43 | 458 | ||
44 | /* | 459 | /* |
45 | * The inode lookup is done in batches to keep the amount of lock traffic and | 460 | * The inode lookup is done in batches to keep the amount of lock traffic and |
@@ -101,8 +516,11 @@ xfs_inode_ag_walk( | |||
101 | struct xfs_mount *mp, | 516 | struct xfs_mount *mp, |
102 | struct xfs_perag *pag, | 517 | struct xfs_perag *pag, |
103 | int (*execute)(struct xfs_inode *ip, | 518 | int (*execute)(struct xfs_inode *ip, |
104 | struct xfs_perag *pag, int flags), | 519 | struct xfs_perag *pag, int flags, |
105 | int flags) | 520 | void *args), |
521 | int flags, | ||
522 | void *args, | ||
523 | int tag) | ||
106 | { | 524 | { |
107 | uint32_t first_index; | 525 | uint32_t first_index; |
108 | int last_error = 0; | 526 | int last_error = 0; |
@@ -121,9 +539,17 @@ restart: | |||
121 | int i; | 539 | int i; |
122 | 540 | ||
123 | rcu_read_lock(); | 541 | rcu_read_lock(); |
124 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, | 542 | |
543 | if (tag == -1) | ||
544 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, | ||
125 | (void **)batch, first_index, | 545 | (void **)batch, first_index, |
126 | XFS_LOOKUP_BATCH); | 546 | XFS_LOOKUP_BATCH); |
547 | else | ||
548 | nr_found = radix_tree_gang_lookup_tag( | ||
549 | &pag->pag_ici_root, | ||
550 | (void **) batch, first_index, | ||
551 | XFS_LOOKUP_BATCH, tag); | ||
552 | |||
127 | if (!nr_found) { | 553 | if (!nr_found) { |
128 | rcu_read_unlock(); | 554 | rcu_read_unlock(); |
129 | break; | 555 | break; |
@@ -164,7 +590,7 @@ restart: | |||
164 | for (i = 0; i < nr_found; i++) { | 590 | for (i = 0; i < nr_found; i++) { |
165 | if (!batch[i]) | 591 | if (!batch[i]) |
166 | continue; | 592 | continue; |
167 | error = execute(batch[i], pag, flags); | 593 | error = execute(batch[i], pag, flags, args); |
168 | IRELE(batch[i]); | 594 | IRELE(batch[i]); |
169 | if (error == EAGAIN) { | 595 | if (error == EAGAIN) { |
170 | skipped++; | 596 | skipped++; |
@@ -189,12 +615,40 @@ restart: | |||
189 | return last_error; | 615 | return last_error; |
190 | } | 616 | } |
191 | 617 | ||
618 | /* | ||
619 | * Background scanning to trim post-EOF preallocated space. This is queued | ||
620 | * based on the 'background_prealloc_discard_period' tunable (5m by default). | ||
621 | */ | ||
622 | STATIC void | ||
623 | xfs_queue_eofblocks( | ||
624 | struct xfs_mount *mp) | ||
625 | { | ||
626 | rcu_read_lock(); | ||
627 | if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_EOFBLOCKS_TAG)) | ||
628 | queue_delayed_work(mp->m_eofblocks_workqueue, | ||
629 | &mp->m_eofblocks_work, | ||
630 | msecs_to_jiffies(xfs_eofb_secs * 1000)); | ||
631 | rcu_read_unlock(); | ||
632 | } | ||
633 | |||
634 | void | ||
635 | xfs_eofblocks_worker( | ||
636 | struct work_struct *work) | ||
637 | { | ||
638 | struct xfs_mount *mp = container_of(to_delayed_work(work), | ||
639 | struct xfs_mount, m_eofblocks_work); | ||
640 | xfs_icache_free_eofblocks(mp, NULL); | ||
641 | xfs_queue_eofblocks(mp); | ||
642 | } | ||
643 | |||
192 | int | 644 | int |
193 | xfs_inode_ag_iterator( | 645 | xfs_inode_ag_iterator( |
194 | struct xfs_mount *mp, | 646 | struct xfs_mount *mp, |
195 | int (*execute)(struct xfs_inode *ip, | 647 | int (*execute)(struct xfs_inode *ip, |
196 | struct xfs_perag *pag, int flags), | 648 | struct xfs_perag *pag, int flags, |
197 | int flags) | 649 | void *args), |
650 | int flags, | ||
651 | void *args) | ||
198 | { | 652 | { |
199 | struct xfs_perag *pag; | 653 | struct xfs_perag *pag; |
200 | int error = 0; | 654 | int error = 0; |
@@ -204,7 +658,7 @@ xfs_inode_ag_iterator( | |||
204 | ag = 0; | 658 | ag = 0; |
205 | while ((pag = xfs_perag_get(mp, ag))) { | 659 | while ((pag = xfs_perag_get(mp, ag))) { |
206 | ag = pag->pag_agno + 1; | 660 | ag = pag->pag_agno + 1; |
207 | error = xfs_inode_ag_walk(mp, pag, execute, flags); | 661 | error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1); |
208 | xfs_perag_put(pag); | 662 | xfs_perag_put(pag); |
209 | if (error) { | 663 | if (error) { |
210 | last_error = error; | 664 | last_error = error; |
@@ -215,224 +669,50 @@ xfs_inode_ag_iterator( | |||
215 | return XFS_ERROR(last_error); | 669 | return XFS_ERROR(last_error); |
216 | } | 670 | } |
217 | 671 | ||
218 | STATIC int | ||
219 | xfs_sync_inode_data( | ||
220 | struct xfs_inode *ip, | ||
221 | struct xfs_perag *pag, | ||
222 | int flags) | ||
223 | { | ||
224 | struct inode *inode = VFS_I(ip); | ||
225 | struct address_space *mapping = inode->i_mapping; | ||
226 | int error = 0; | ||
227 | |||
228 | if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) | ||
229 | return 0; | ||
230 | |||
231 | if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { | ||
232 | if (flags & SYNC_TRYLOCK) | ||
233 | return 0; | ||
234 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
235 | } | ||
236 | |||
237 | error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? | ||
238 | 0 : XBF_ASYNC, FI_NONE); | ||
239 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | ||
240 | return error; | ||
241 | } | ||
242 | |||
243 | /* | ||
244 | * Write out pagecache data for the whole filesystem. | ||
245 | */ | ||
246 | STATIC int | ||
247 | xfs_sync_data( | ||
248 | struct xfs_mount *mp, | ||
249 | int flags) | ||
250 | { | ||
251 | int error; | ||
252 | |||
253 | ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); | ||
254 | |||
255 | error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags); | ||
256 | if (error) | ||
257 | return XFS_ERROR(error); | ||
258 | |||
259 | xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); | ||
260 | return 0; | ||
261 | } | ||
262 | |||
263 | STATIC int | ||
264 | xfs_sync_fsdata( | ||
265 | struct xfs_mount *mp) | ||
266 | { | ||
267 | struct xfs_buf *bp; | ||
268 | int error; | ||
269 | |||
270 | /* | ||
271 | * If the buffer is pinned then push on the log so we won't get stuck | ||
272 | * waiting in the write for someone, maybe ourselves, to flush the log. | ||
273 | * | ||
274 | * Even though we just pushed the log above, we did not have the | ||
275 | * superblock buffer locked at that point so it can become pinned in | ||
276 | * between there and here. | ||
277 | */ | ||
278 | bp = xfs_getsb(mp, 0); | ||
279 | if (xfs_buf_ispinned(bp)) | ||
280 | xfs_log_force(mp, 0); | ||
281 | error = xfs_bwrite(bp); | ||
282 | xfs_buf_relse(bp); | ||
283 | return error; | ||
284 | } | ||
285 | |||
286 | /* | ||
287 | * When remounting a filesystem read-only or freezing the filesystem, we have | ||
288 | * two phases to execute. This first phase is syncing the data before we | ||
289 | * quiesce the filesystem, and the second is flushing all the inodes out after | ||
290 | * we've waited for all the transactions created by the first phase to | ||
291 | * complete. The second phase ensures that the inodes are written to their | ||
292 | * location on disk rather than just existing in transactions in the log. This | ||
293 | * means after a quiesce there is no log replay required to write the inodes to | ||
294 | * disk (this is the main difference between a sync and a quiesce). | ||
295 | */ | ||
296 | /* | ||
297 | * First stage of freeze - no writers will make progress now we are here, | ||
298 | * so we flush delwri and delalloc buffers here, then wait for all I/O to | ||
299 | * complete. Data is frozen at that point. Metadata is not frozen, | ||
300 | * transactions can still occur here so don't bother emptying the AIL | ||
301 | * because it'll just get dirty again. | ||
302 | */ | ||
303 | int | 672 | int |
304 | xfs_quiesce_data( | 673 | xfs_inode_ag_iterator_tag( |
305 | struct xfs_mount *mp) | 674 | struct xfs_mount *mp, |
306 | { | 675 | int (*execute)(struct xfs_inode *ip, |
307 | int error, error2 = 0; | 676 | struct xfs_perag *pag, int flags, |
308 | 677 | void *args), | |
309 | /* force out the log */ | 678 | int flags, |
310 | xfs_log_force(mp, XFS_LOG_SYNC); | 679 | void *args, |
311 | 680 | int tag) | |
312 | /* write superblock and hoover up shutdown errors */ | ||
313 | error = xfs_sync_fsdata(mp); | ||
314 | |||
315 | /* mark the log as covered if needed */ | ||
316 | if (xfs_log_need_covered(mp)) | ||
317 | error2 = xfs_fs_log_dummy(mp); | ||
318 | |||
319 | return error ? error : error2; | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * Second stage of a quiesce. The data is already synced, now we have to take | ||
324 | * care of the metadata. New transactions are already blocked, so we need to | ||
325 | * wait for any remaining transactions to drain out before proceeding. | ||
326 | */ | ||
327 | void | ||
328 | xfs_quiesce_attr( | ||
329 | struct xfs_mount *mp) | ||
330 | { | ||
331 | int error = 0; | ||
332 | |||
333 | /* wait for all modifications to complete */ | ||
334 | while (atomic_read(&mp->m_active_trans) > 0) | ||
335 | delay(100); | ||
336 | |||
337 | /* reclaim inodes to do any IO before the freeze completes */ | ||
338 | xfs_reclaim_inodes(mp, 0); | ||
339 | xfs_reclaim_inodes(mp, SYNC_WAIT); | ||
340 | |||
341 | /* flush all pending changes from the AIL */ | ||
342 | xfs_ail_push_all_sync(mp->m_ail); | ||
343 | |||
344 | /* | ||
345 | * Just warn here till VFS can correctly support | ||
346 | * read-only remount without racing. | ||
347 | */ | ||
348 | WARN_ON(atomic_read(&mp->m_active_trans) != 0); | ||
349 | |||
350 | /* Push the superblock and write an unmount record */ | ||
351 | error = xfs_log_sbcount(mp); | ||
352 | if (error) | ||
353 | xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " | ||
354 | "Frozen image may not be consistent."); | ||
355 | xfs_log_unmount_write(mp); | ||
356 | |||
357 | /* | ||
358 | * At this point we might have modified the superblock again and thus | ||
359 | * added an item to the AIL, thus flush it again. | ||
360 | */ | ||
361 | xfs_ail_push_all_sync(mp->m_ail); | ||
362 | |||
363 | /* | ||
364 | * The superblock buffer is uncached and xfsaild_push() will lock and | ||
365 | * set the XBF_ASYNC flag on the buffer. We cannot do xfs_buf_iowait() | ||
366 | * here but a lock on the superblock buffer will block until iodone() | ||
367 | * has completed. | ||
368 | */ | ||
369 | xfs_buf_lock(mp->m_sb_bp); | ||
370 | xfs_buf_unlock(mp->m_sb_bp); | ||
371 | } | ||
372 | |||
373 | static void | ||
374 | xfs_syncd_queue_sync( | ||
375 | struct xfs_mount *mp) | ||
376 | { | ||
377 | queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work, | ||
378 | msecs_to_jiffies(xfs_syncd_centisecs * 10)); | ||
379 | } | ||
380 | |||
381 | /* | ||
382 | * Every sync period we need to unpin all items, reclaim inodes and sync | ||
383 | * disk quotas. We might need to cover the log to indicate that the | ||
384 | * filesystem is idle and not frozen. | ||
385 | */ | ||
386 | STATIC void | ||
387 | xfs_sync_worker( | ||
388 | struct work_struct *work) | ||
389 | { | 681 | { |
390 | struct xfs_mount *mp = container_of(to_delayed_work(work), | 682 | struct xfs_perag *pag; |
391 | struct xfs_mount, m_sync_work); | 683 | int error = 0; |
392 | int error; | 684 | int last_error = 0; |
393 | 685 | xfs_agnumber_t ag; | |
394 | /* | ||
395 | * We shouldn't write/force the log if we are in the mount/unmount | ||
396 | * process or on a read only filesystem. The workqueue still needs to be | ||
397 | * active in both cases, however, because it is used for inode reclaim | ||
398 | * during these times. Use the MS_ACTIVE flag to avoid doing anything | ||
399 | * during mount. Doing work during unmount is avoided by calling | ||
400 | * cancel_delayed_work_sync on this work queue before tearing down | ||
401 | * the ail and the log in xfs_log_unmount. | ||
402 | */ | ||
403 | if (!(mp->m_super->s_flags & MS_ACTIVE) && | ||
404 | !(mp->m_flags & XFS_MOUNT_RDONLY)) { | ||
405 | /* dgc: errors ignored here */ | ||
406 | if (mp->m_super->s_writers.frozen == SB_UNFROZEN && | ||
407 | xfs_log_need_covered(mp)) | ||
408 | error = xfs_fs_log_dummy(mp); | ||
409 | else | ||
410 | xfs_log_force(mp, 0); | ||
411 | 686 | ||
412 | /* start pushing all the metadata that is currently | 687 | ag = 0; |
413 | * dirty */ | 688 | while ((pag = xfs_perag_get_tag(mp, ag, tag))) { |
414 | xfs_ail_push_all(mp->m_ail); | 689 | ag = pag->pag_agno + 1; |
690 | error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag); | ||
691 | xfs_perag_put(pag); | ||
692 | if (error) { | ||
693 | last_error = error; | ||
694 | if (error == EFSCORRUPTED) | ||
695 | break; | ||
696 | } | ||
415 | } | 697 | } |
416 | 698 | return XFS_ERROR(last_error); | |
417 | /* queue us up again */ | ||
418 | xfs_syncd_queue_sync(mp); | ||
419 | } | 699 | } |
420 | 700 | ||
421 | /* | 701 | /* |
422 | * Queue a new inode reclaim pass if there are reclaimable inodes and there | 702 | * Queue a new inode reclaim pass if there are reclaimable inodes and there |
423 | * isn't a reclaim pass already in progress. By default it runs every 5s based | 703 | * isn't a reclaim pass already in progress. By default it runs every 5s based |
424 | * on the xfs syncd work default of 30s. Perhaps this should have it's own | 704 | * on the xfs periodic sync default of 30s. Perhaps this should have it's own |
425 | * tunable, but that can be done if this method proves to be ineffective or too | 705 | * tunable, but that can be done if this method proves to be ineffective or too |
426 | * aggressive. | 706 | * aggressive. |
427 | */ | 707 | */ |
428 | static void | 708 | static void |
429 | xfs_syncd_queue_reclaim( | 709 | xfs_reclaim_work_queue( |
430 | struct xfs_mount *mp) | 710 | struct xfs_mount *mp) |
431 | { | 711 | { |
432 | 712 | ||
433 | rcu_read_lock(); | 713 | rcu_read_lock(); |
434 | if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { | 714 | if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { |
435 | queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, | 715 | queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, |
436 | msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); | 716 | msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); |
437 | } | 717 | } |
438 | rcu_read_unlock(); | 718 | rcu_read_unlock(); |
@@ -445,7 +725,7 @@ xfs_syncd_queue_reclaim( | |||
445 | * goes low. It scans as quickly as possible avoiding locked inodes or those | 725 | * goes low. It scans as quickly as possible avoiding locked inodes or those |
446 | * already being flushed, and once done schedules a future pass. | 726 | * already being flushed, and once done schedules a future pass. |
447 | */ | 727 | */ |
448 | STATIC void | 728 | void |
449 | xfs_reclaim_worker( | 729 | xfs_reclaim_worker( |
450 | struct work_struct *work) | 730 | struct work_struct *work) |
451 | { | 731 | { |
@@ -453,65 +733,10 @@ xfs_reclaim_worker( | |||
453 | struct xfs_mount, m_reclaim_work); | 733 | struct xfs_mount, m_reclaim_work); |
454 | 734 | ||
455 | xfs_reclaim_inodes(mp, SYNC_TRYLOCK); | 735 | xfs_reclaim_inodes(mp, SYNC_TRYLOCK); |
456 | xfs_syncd_queue_reclaim(mp); | 736 | xfs_reclaim_work_queue(mp); |
457 | } | 737 | } |
458 | 738 | ||
459 | /* | 739 | static void |
460 | * Flush delayed allocate data, attempting to free up reserved space | ||
461 | * from existing allocations. At this point a new allocation attempt | ||
462 | * has failed with ENOSPC and we are in the process of scratching our | ||
463 | * heads, looking about for more room. | ||
464 | * | ||
465 | * Queue a new data flush if there isn't one already in progress and | ||
466 | * wait for completion of the flush. This means that we only ever have one | ||
467 | * inode flush in progress no matter how many ENOSPC events are occurring and | ||
468 | * so will prevent the system from bogging down due to every concurrent | ||
469 | * ENOSPC event scanning all the active inodes in the system for writeback. | ||
470 | */ | ||
471 | void | ||
472 | xfs_flush_inodes( | ||
473 | struct xfs_inode *ip) | ||
474 | { | ||
475 | struct xfs_mount *mp = ip->i_mount; | ||
476 | |||
477 | queue_work(xfs_syncd_wq, &mp->m_flush_work); | ||
478 | flush_work(&mp->m_flush_work); | ||
479 | } | ||
480 | |||
481 | STATIC void | ||
482 | xfs_flush_worker( | ||
483 | struct work_struct *work) | ||
484 | { | ||
485 | struct xfs_mount *mp = container_of(work, | ||
486 | struct xfs_mount, m_flush_work); | ||
487 | |||
488 | xfs_sync_data(mp, SYNC_TRYLOCK); | ||
489 | xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); | ||
490 | } | ||
491 | |||
492 | int | ||
493 | xfs_syncd_init( | ||
494 | struct xfs_mount *mp) | ||
495 | { | ||
496 | INIT_WORK(&mp->m_flush_work, xfs_flush_worker); | ||
497 | INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); | ||
498 | INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); | ||
499 | |||
500 | xfs_syncd_queue_sync(mp); | ||
501 | |||
502 | return 0; | ||
503 | } | ||
504 | |||
505 | void | ||
506 | xfs_syncd_stop( | ||
507 | struct xfs_mount *mp) | ||
508 | { | ||
509 | cancel_delayed_work_sync(&mp->m_sync_work); | ||
510 | cancel_delayed_work_sync(&mp->m_reclaim_work); | ||
511 | cancel_work_sync(&mp->m_flush_work); | ||
512 | } | ||
513 | |||
514 | void | ||
515 | __xfs_inode_set_reclaim_tag( | 740 | __xfs_inode_set_reclaim_tag( |
516 | struct xfs_perag *pag, | 741 | struct xfs_perag *pag, |
517 | struct xfs_inode *ip) | 742 | struct xfs_inode *ip) |
@@ -529,7 +754,7 @@ __xfs_inode_set_reclaim_tag( | |||
529 | spin_unlock(&ip->i_mount->m_perag_lock); | 754 | spin_unlock(&ip->i_mount->m_perag_lock); |
530 | 755 | ||
531 | /* schedule periodic background inode reclaim */ | 756 | /* schedule periodic background inode reclaim */ |
532 | xfs_syncd_queue_reclaim(ip->i_mount); | 757 | xfs_reclaim_work_queue(ip->i_mount); |
533 | 758 | ||
534 | trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, | 759 | trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, |
535 | -1, _RET_IP_); | 760 | -1, _RET_IP_); |
@@ -577,7 +802,7 @@ __xfs_inode_clear_reclaim( | |||
577 | } | 802 | } |
578 | } | 803 | } |
579 | 804 | ||
580 | void | 805 | STATIC void |
581 | __xfs_inode_clear_reclaim_tag( | 806 | __xfs_inode_clear_reclaim_tag( |
582 | xfs_mount_t *mp, | 807 | xfs_mount_t *mp, |
583 | xfs_perag_t *pag, | 808 | xfs_perag_t *pag, |
@@ -787,9 +1012,9 @@ out: | |||
787 | /* | 1012 | /* |
788 | * We could return EAGAIN here to make reclaim rescan the inode tree in | 1013 | * We could return EAGAIN here to make reclaim rescan the inode tree in |
789 | * a short while. However, this just burns CPU time scanning the tree | 1014 | * a short while. However, this just burns CPU time scanning the tree |
790 | * waiting for IO to complete and xfssyncd never goes back to the idle | 1015 | * waiting for IO to complete and the reclaim work never goes back to |
791 | * state. Instead, return 0 to let the next scheduled background reclaim | 1016 | * the idle state. Instead, return 0 to let the next scheduled |
792 | * attempt to reclaim the inode again. | 1017 | * background reclaim attempt to reclaim the inode again. |
793 | */ | 1018 | */ |
794 | return 0; | 1019 | return 0; |
795 | } | 1020 | } |
@@ -800,7 +1025,7 @@ out: | |||
800 | * then a shut down during filesystem unmount reclaim walk leak all the | 1025 | * then a shut down during filesystem unmount reclaim walk leak all the |
801 | * unreclaimed inodes. | 1026 | * unreclaimed inodes. |
802 | */ | 1027 | */ |
803 | int | 1028 | STATIC int |
804 | xfs_reclaim_inodes_ag( | 1029 | xfs_reclaim_inodes_ag( |
805 | struct xfs_mount *mp, | 1030 | struct xfs_mount *mp, |
806 | int flags, | 1031 | int flags, |
@@ -945,7 +1170,7 @@ xfs_reclaim_inodes_nr( | |||
945 | int nr_to_scan) | 1170 | int nr_to_scan) |
946 | { | 1171 | { |
947 | /* kick background reclaimer and push the AIL */ | 1172 | /* kick background reclaimer and push the AIL */ |
948 | xfs_syncd_queue_reclaim(mp); | 1173 | xfs_reclaim_work_queue(mp); |
949 | xfs_ail_push_all(mp->m_ail); | 1174 | xfs_ail_push_all(mp->m_ail); |
950 | 1175 | ||
951 | xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); | 1176 | xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); |
@@ -971,3 +1196,146 @@ xfs_reclaim_inodes_count( | |||
971 | return reclaimable; | 1196 | return reclaimable; |
972 | } | 1197 | } |
973 | 1198 | ||
1199 | STATIC int | ||
1200 | xfs_inode_match_id( | ||
1201 | struct xfs_inode *ip, | ||
1202 | struct xfs_eofblocks *eofb) | ||
1203 | { | ||
1204 | if (eofb->eof_flags & XFS_EOF_FLAGS_UID && | ||
1205 | ip->i_d.di_uid != eofb->eof_uid) | ||
1206 | return 0; | ||
1207 | |||
1208 | if (eofb->eof_flags & XFS_EOF_FLAGS_GID && | ||
1209 | ip->i_d.di_gid != eofb->eof_gid) | ||
1210 | return 0; | ||
1211 | |||
1212 | if (eofb->eof_flags & XFS_EOF_FLAGS_PRID && | ||
1213 | xfs_get_projid(ip) != eofb->eof_prid) | ||
1214 | return 0; | ||
1215 | |||
1216 | return 1; | ||
1217 | } | ||
1218 | |||
1219 | STATIC int | ||
1220 | xfs_inode_free_eofblocks( | ||
1221 | struct xfs_inode *ip, | ||
1222 | struct xfs_perag *pag, | ||
1223 | int flags, | ||
1224 | void *args) | ||
1225 | { | ||
1226 | int ret; | ||
1227 | struct xfs_eofblocks *eofb = args; | ||
1228 | |||
1229 | if (!xfs_can_free_eofblocks(ip, false)) { | ||
1230 | /* inode could be preallocated or append-only */ | ||
1231 | trace_xfs_inode_free_eofblocks_invalid(ip); | ||
1232 | xfs_inode_clear_eofblocks_tag(ip); | ||
1233 | return 0; | ||
1234 | } | ||
1235 | |||
1236 | /* | ||
1237 | * If the mapping is dirty the operation can block and wait for some | ||
1238 | * time. Unless we are waiting, skip it. | ||
1239 | */ | ||
1240 | if (!(flags & SYNC_WAIT) && | ||
1241 | mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY)) | ||
1242 | return 0; | ||
1243 | |||
1244 | if (eofb) { | ||
1245 | if (!xfs_inode_match_id(ip, eofb)) | ||
1246 | return 0; | ||
1247 | |||
1248 | /* skip the inode if the file size is too small */ | ||
1249 | if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && | ||
1250 | XFS_ISIZE(ip) < eofb->eof_min_file_size) | ||
1251 | return 0; | ||
1252 | } | ||
1253 | |||
1254 | ret = xfs_free_eofblocks(ip->i_mount, ip, true); | ||
1255 | |||
1256 | /* don't revisit the inode if we're not waiting */ | ||
1257 | if (ret == EAGAIN && !(flags & SYNC_WAIT)) | ||
1258 | ret = 0; | ||
1259 | |||
1260 | return ret; | ||
1261 | } | ||
1262 | |||
1263 | int | ||
1264 | xfs_icache_free_eofblocks( | ||
1265 | struct xfs_mount *mp, | ||
1266 | struct xfs_eofblocks *eofb) | ||
1267 | { | ||
1268 | int flags = SYNC_TRYLOCK; | ||
1269 | |||
1270 | if (eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC)) | ||
1271 | flags = SYNC_WAIT; | ||
1272 | |||
1273 | return xfs_inode_ag_iterator_tag(mp, xfs_inode_free_eofblocks, flags, | ||
1274 | eofb, XFS_ICI_EOFBLOCKS_TAG); | ||
1275 | } | ||
1276 | |||
1277 | void | ||
1278 | xfs_inode_set_eofblocks_tag( | ||
1279 | xfs_inode_t *ip) | ||
1280 | { | ||
1281 | struct xfs_mount *mp = ip->i_mount; | ||
1282 | struct xfs_perag *pag; | ||
1283 | int tagged; | ||
1284 | |||
1285 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); | ||
1286 | spin_lock(&pag->pag_ici_lock); | ||
1287 | trace_xfs_inode_set_eofblocks_tag(ip); | ||
1288 | |||
1289 | tagged = radix_tree_tagged(&pag->pag_ici_root, | ||
1290 | XFS_ICI_EOFBLOCKS_TAG); | ||
1291 | radix_tree_tag_set(&pag->pag_ici_root, | ||
1292 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), | ||
1293 | XFS_ICI_EOFBLOCKS_TAG); | ||
1294 | if (!tagged) { | ||
1295 | /* propagate the eofblocks tag up into the perag radix tree */ | ||
1296 | spin_lock(&ip->i_mount->m_perag_lock); | ||
1297 | radix_tree_tag_set(&ip->i_mount->m_perag_tree, | ||
1298 | XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), | ||
1299 | XFS_ICI_EOFBLOCKS_TAG); | ||
1300 | spin_unlock(&ip->i_mount->m_perag_lock); | ||
1301 | |||
1302 | /* kick off background trimming */ | ||
1303 | xfs_queue_eofblocks(ip->i_mount); | ||
1304 | |||
1305 | trace_xfs_perag_set_eofblocks(ip->i_mount, pag->pag_agno, | ||
1306 | -1, _RET_IP_); | ||
1307 | } | ||
1308 | |||
1309 | spin_unlock(&pag->pag_ici_lock); | ||
1310 | xfs_perag_put(pag); | ||
1311 | } | ||
1312 | |||
1313 | void | ||
1314 | xfs_inode_clear_eofblocks_tag( | ||
1315 | xfs_inode_t *ip) | ||
1316 | { | ||
1317 | struct xfs_mount *mp = ip->i_mount; | ||
1318 | struct xfs_perag *pag; | ||
1319 | |||
1320 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); | ||
1321 | spin_lock(&pag->pag_ici_lock); | ||
1322 | trace_xfs_inode_clear_eofblocks_tag(ip); | ||
1323 | |||
1324 | radix_tree_tag_clear(&pag->pag_ici_root, | ||
1325 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), | ||
1326 | XFS_ICI_EOFBLOCKS_TAG); | ||
1327 | if (!radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_EOFBLOCKS_TAG)) { | ||
1328 | /* clear the eofblocks tag from the perag radix tree */ | ||
1329 | spin_lock(&ip->i_mount->m_perag_lock); | ||
1330 | radix_tree_tag_clear(&ip->i_mount->m_perag_tree, | ||
1331 | XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), | ||
1332 | XFS_ICI_EOFBLOCKS_TAG); | ||
1333 | spin_unlock(&ip->i_mount->m_perag_lock); | ||
1334 | trace_xfs_perag_clear_eofblocks(ip->i_mount, pag->pag_agno, | ||
1335 | -1, _RET_IP_); | ||
1336 | } | ||
1337 | |||
1338 | spin_unlock(&pag->pag_ici_lock); | ||
1339 | xfs_perag_put(pag); | ||
1340 | } | ||
1341 | |||
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_icache.h index 941202e7ac6e..e0f138c70a2f 100644 --- a/fs/xfs/xfs_sync.h +++ b/fs/xfs/xfs_icache.h | |||
@@ -24,28 +24,30 @@ struct xfs_perag; | |||
24 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ | 24 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ |
25 | #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ | 25 | #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ |
26 | 26 | ||
27 | extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ | 27 | int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino, |
28 | uint flags, uint lock_flags, xfs_inode_t **ipp); | ||
28 | 29 | ||
29 | int xfs_syncd_init(struct xfs_mount *mp); | 30 | void xfs_reclaim_worker(struct work_struct *work); |
30 | void xfs_syncd_stop(struct xfs_mount *mp); | ||
31 | |||
32 | int xfs_quiesce_data(struct xfs_mount *mp); | ||
33 | void xfs_quiesce_attr(struct xfs_mount *mp); | ||
34 | |||
35 | void xfs_flush_inodes(struct xfs_inode *ip); | ||
36 | 31 | ||
37 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); | 32 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); |
38 | int xfs_reclaim_inodes_count(struct xfs_mount *mp); | 33 | int xfs_reclaim_inodes_count(struct xfs_mount *mp); |
39 | void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); | 34 | void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); |
40 | 35 | ||
41 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); | 36 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); |
42 | void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); | 37 | |
43 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, | 38 | void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip); |
44 | struct xfs_inode *ip); | 39 | void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip); |
40 | int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *); | ||
41 | void xfs_eofblocks_worker(struct work_struct *); | ||
45 | 42 | ||
46 | int xfs_sync_inode_grab(struct xfs_inode *ip); | 43 | int xfs_sync_inode_grab(struct xfs_inode *ip); |
47 | int xfs_inode_ag_iterator(struct xfs_mount *mp, | 44 | int xfs_inode_ag_iterator(struct xfs_mount *mp, |
48 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), | 45 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, |
49 | int flags); | 46 | int flags, void *args), |
47 | int flags, void *args); | ||
48 | int xfs_inode_ag_iterator_tag(struct xfs_mount *mp, | ||
49 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, | ||
50 | int flags, void *args), | ||
51 | int flags, void *args, int tag); | ||
50 | 52 | ||
51 | #endif | 53 | #endif |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c deleted file mode 100644 index 784a803383ec..000000000000 --- a/fs/xfs/xfs_iget.c +++ /dev/null | |||
@@ -1,705 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #include "xfs.h" | ||
19 | #include "xfs_fs.h" | ||
20 | #include "xfs_types.h" | ||
21 | #include "xfs_acl.h" | ||
22 | #include "xfs_log.h" | ||
23 | #include "xfs_inum.h" | ||
24 | #include "xfs_trans.h" | ||
25 | #include "xfs_sb.h" | ||
26 | #include "xfs_ag.h" | ||
27 | #include "xfs_mount.h" | ||
28 | #include "xfs_bmap_btree.h" | ||
29 | #include "xfs_alloc_btree.h" | ||
30 | #include "xfs_ialloc_btree.h" | ||
31 | #include "xfs_dinode.h" | ||
32 | #include "xfs_inode.h" | ||
33 | #include "xfs_btree.h" | ||
34 | #include "xfs_ialloc.h" | ||
35 | #include "xfs_quota.h" | ||
36 | #include "xfs_utils.h" | ||
37 | #include "xfs_trans_priv.h" | ||
38 | #include "xfs_inode_item.h" | ||
39 | #include "xfs_bmap.h" | ||
40 | #include "xfs_trace.h" | ||
41 | |||
42 | |||
43 | /* | ||
44 | * Allocate and initialise an xfs_inode. | ||
45 | */ | ||
46 | STATIC struct xfs_inode * | ||
47 | xfs_inode_alloc( | ||
48 | struct xfs_mount *mp, | ||
49 | xfs_ino_t ino) | ||
50 | { | ||
51 | struct xfs_inode *ip; | ||
52 | |||
53 | /* | ||
54 | * if this didn't occur in transactions, we could use | ||
55 | * KM_MAYFAIL and return NULL here on ENOMEM. Set the | ||
56 | * code up to do this anyway. | ||
57 | */ | ||
58 | ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); | ||
59 | if (!ip) | ||
60 | return NULL; | ||
61 | if (inode_init_always(mp->m_super, VFS_I(ip))) { | ||
62 | kmem_zone_free(xfs_inode_zone, ip); | ||
63 | return NULL; | ||
64 | } | ||
65 | |||
66 | ASSERT(atomic_read(&ip->i_pincount) == 0); | ||
67 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | ||
68 | ASSERT(!xfs_isiflocked(ip)); | ||
69 | ASSERT(ip->i_ino == 0); | ||
70 | |||
71 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | ||
72 | |||
73 | /* initialise the xfs inode */ | ||
74 | ip->i_ino = ino; | ||
75 | ip->i_mount = mp; | ||
76 | memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); | ||
77 | ip->i_afp = NULL; | ||
78 | memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); | ||
79 | ip->i_flags = 0; | ||
80 | ip->i_delayed_blks = 0; | ||
81 | memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); | ||
82 | |||
83 | return ip; | ||
84 | } | ||
85 | |||
86 | STATIC void | ||
87 | xfs_inode_free_callback( | ||
88 | struct rcu_head *head) | ||
89 | { | ||
90 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
91 | struct xfs_inode *ip = XFS_I(inode); | ||
92 | |||
93 | kmem_zone_free(xfs_inode_zone, ip); | ||
94 | } | ||
95 | |||
96 | void | ||
97 | xfs_inode_free( | ||
98 | struct xfs_inode *ip) | ||
99 | { | ||
100 | switch (ip->i_d.di_mode & S_IFMT) { | ||
101 | case S_IFREG: | ||
102 | case S_IFDIR: | ||
103 | case S_IFLNK: | ||
104 | xfs_idestroy_fork(ip, XFS_DATA_FORK); | ||
105 | break; | ||
106 | } | ||
107 | |||
108 | if (ip->i_afp) | ||
109 | xfs_idestroy_fork(ip, XFS_ATTR_FORK); | ||
110 | |||
111 | if (ip->i_itemp) { | ||
112 | ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL)); | ||
113 | xfs_inode_item_destroy(ip); | ||
114 | ip->i_itemp = NULL; | ||
115 | } | ||
116 | |||
117 | /* asserts to verify all state is correct here */ | ||
118 | ASSERT(atomic_read(&ip->i_pincount) == 0); | ||
119 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | ||
120 | ASSERT(!xfs_isiflocked(ip)); | ||
121 | |||
122 | /* | ||
123 | * Because we use RCU freeing we need to ensure the inode always | ||
124 | * appears to be reclaimed with an invalid inode number when in the | ||
125 | * free state. The ip->i_flags_lock provides the barrier against lookup | ||
126 | * races. | ||
127 | */ | ||
128 | spin_lock(&ip->i_flags_lock); | ||
129 | ip->i_flags = XFS_IRECLAIM; | ||
130 | ip->i_ino = 0; | ||
131 | spin_unlock(&ip->i_flags_lock); | ||
132 | |||
133 | call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); | ||
134 | } | ||
135 | |||
136 | /* | ||
137 | * Check the validity of the inode we just found it the cache | ||
138 | */ | ||
139 | static int | ||
140 | xfs_iget_cache_hit( | ||
141 | struct xfs_perag *pag, | ||
142 | struct xfs_inode *ip, | ||
143 | xfs_ino_t ino, | ||
144 | int flags, | ||
145 | int lock_flags) __releases(RCU) | ||
146 | { | ||
147 | struct inode *inode = VFS_I(ip); | ||
148 | struct xfs_mount *mp = ip->i_mount; | ||
149 | int error; | ||
150 | |||
151 | /* | ||
152 | * check for re-use of an inode within an RCU grace period due to the | ||
153 | * radix tree nodes not being updated yet. We monitor for this by | ||
154 | * setting the inode number to zero before freeing the inode structure. | ||
155 | * If the inode has been reallocated and set up, then the inode number | ||
156 | * will not match, so check for that, too. | ||
157 | */ | ||
158 | spin_lock(&ip->i_flags_lock); | ||
159 | if (ip->i_ino != ino) { | ||
160 | trace_xfs_iget_skip(ip); | ||
161 | XFS_STATS_INC(xs_ig_frecycle); | ||
162 | error = EAGAIN; | ||
163 | goto out_error; | ||
164 | } | ||
165 | |||
166 | |||
167 | /* | ||
168 | * If we are racing with another cache hit that is currently | ||
169 | * instantiating this inode or currently recycling it out of | ||
170 | * reclaimabe state, wait for the initialisation to complete | ||
171 | * before continuing. | ||
172 | * | ||
173 | * XXX(hch): eventually we should do something equivalent to | ||
174 | * wait_on_inode to wait for these flags to be cleared | ||
175 | * instead of polling for it. | ||
176 | */ | ||
177 | if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { | ||
178 | trace_xfs_iget_skip(ip); | ||
179 | XFS_STATS_INC(xs_ig_frecycle); | ||
180 | error = EAGAIN; | ||
181 | goto out_error; | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * If lookup is racing with unlink return an error immediately. | ||
186 | */ | ||
187 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { | ||
188 | error = ENOENT; | ||
189 | goto out_error; | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * If IRECLAIMABLE is set, we've torn down the VFS inode already. | ||
194 | * Need to carefully get it back into useable state. | ||
195 | */ | ||
196 | if (ip->i_flags & XFS_IRECLAIMABLE) { | ||
197 | trace_xfs_iget_reclaim(ip); | ||
198 | |||
199 | /* | ||
200 | * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode | ||
201 | * from stomping over us while we recycle the inode. We can't | ||
202 | * clear the radix tree reclaimable tag yet as it requires | ||
203 | * pag_ici_lock to be held exclusive. | ||
204 | */ | ||
205 | ip->i_flags |= XFS_IRECLAIM; | ||
206 | |||
207 | spin_unlock(&ip->i_flags_lock); | ||
208 | rcu_read_unlock(); | ||
209 | |||
210 | error = -inode_init_always(mp->m_super, inode); | ||
211 | if (error) { | ||
212 | /* | ||
213 | * Re-initializing the inode failed, and we are in deep | ||
214 | * trouble. Try to re-add it to the reclaim list. | ||
215 | */ | ||
216 | rcu_read_lock(); | ||
217 | spin_lock(&ip->i_flags_lock); | ||
218 | |||
219 | ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); | ||
220 | ASSERT(ip->i_flags & XFS_IRECLAIMABLE); | ||
221 | trace_xfs_iget_reclaim_fail(ip); | ||
222 | goto out_error; | ||
223 | } | ||
224 | |||
225 | spin_lock(&pag->pag_ici_lock); | ||
226 | spin_lock(&ip->i_flags_lock); | ||
227 | |||
228 | /* | ||
229 | * Clear the per-lifetime state in the inode as we are now | ||
230 | * effectively a new inode and need to return to the initial | ||
231 | * state before reuse occurs. | ||
232 | */ | ||
233 | ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; | ||
234 | ip->i_flags |= XFS_INEW; | ||
235 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | ||
236 | inode->i_state = I_NEW; | ||
237 | |||
238 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | ||
239 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | ||
240 | |||
241 | spin_unlock(&ip->i_flags_lock); | ||
242 | spin_unlock(&pag->pag_ici_lock); | ||
243 | } else { | ||
244 | /* If the VFS inode is being torn down, pause and try again. */ | ||
245 | if (!igrab(inode)) { | ||
246 | trace_xfs_iget_skip(ip); | ||
247 | error = EAGAIN; | ||
248 | goto out_error; | ||
249 | } | ||
250 | |||
251 | /* We've got a live one. */ | ||
252 | spin_unlock(&ip->i_flags_lock); | ||
253 | rcu_read_unlock(); | ||
254 | trace_xfs_iget_hit(ip); | ||
255 | } | ||
256 | |||
257 | if (lock_flags != 0) | ||
258 | xfs_ilock(ip, lock_flags); | ||
259 | |||
260 | xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE); | ||
261 | XFS_STATS_INC(xs_ig_found); | ||
262 | |||
263 | return 0; | ||
264 | |||
265 | out_error: | ||
266 | spin_unlock(&ip->i_flags_lock); | ||
267 | rcu_read_unlock(); | ||
268 | return error; | ||
269 | } | ||
270 | |||
271 | |||
272 | static int | ||
273 | xfs_iget_cache_miss( | ||
274 | struct xfs_mount *mp, | ||
275 | struct xfs_perag *pag, | ||
276 | xfs_trans_t *tp, | ||
277 | xfs_ino_t ino, | ||
278 | struct xfs_inode **ipp, | ||
279 | int flags, | ||
280 | int lock_flags) | ||
281 | { | ||
282 | struct xfs_inode *ip; | ||
283 | int error; | ||
284 | xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); | ||
285 | int iflags; | ||
286 | |||
287 | ip = xfs_inode_alloc(mp, ino); | ||
288 | if (!ip) | ||
289 | return ENOMEM; | ||
290 | |||
291 | error = xfs_iread(mp, tp, ip, flags); | ||
292 | if (error) | ||
293 | goto out_destroy; | ||
294 | |||
295 | trace_xfs_iget_miss(ip); | ||
296 | |||
297 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { | ||
298 | error = ENOENT; | ||
299 | goto out_destroy; | ||
300 | } | ||
301 | |||
302 | /* | ||
303 | * Preload the radix tree so we can insert safely under the | ||
304 | * write spinlock. Note that we cannot sleep inside the preload | ||
305 | * region. Since we can be called from transaction context, don't | ||
306 | * recurse into the file system. | ||
307 | */ | ||
308 | if (radix_tree_preload(GFP_NOFS)) { | ||
309 | error = EAGAIN; | ||
310 | goto out_destroy; | ||
311 | } | ||
312 | |||
313 | /* | ||
314 | * Because the inode hasn't been added to the radix-tree yet it can't | ||
315 | * be found by another thread, so we can do the non-sleeping lock here. | ||
316 | */ | ||
317 | if (lock_flags) { | ||
318 | if (!xfs_ilock_nowait(ip, lock_flags)) | ||
319 | BUG(); | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * These values must be set before inserting the inode into the radix | ||
324 | * tree as the moment it is inserted a concurrent lookup (allowed by the | ||
325 | * RCU locking mechanism) can find it and that lookup must see that this | ||
326 | * is an inode currently under construction (i.e. that XFS_INEW is set). | ||
327 | * The ip->i_flags_lock that protects the XFS_INEW flag forms the | ||
328 | * memory barrier that ensures this detection works correctly at lookup | ||
329 | * time. | ||
330 | */ | ||
331 | iflags = XFS_INEW; | ||
332 | if (flags & XFS_IGET_DONTCACHE) | ||
333 | iflags |= XFS_IDONTCACHE; | ||
334 | ip->i_udquot = ip->i_gdquot = NULL; | ||
335 | xfs_iflags_set(ip, iflags); | ||
336 | |||
337 | /* insert the new inode */ | ||
338 | spin_lock(&pag->pag_ici_lock); | ||
339 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); | ||
340 | if (unlikely(error)) { | ||
341 | WARN_ON(error != -EEXIST); | ||
342 | XFS_STATS_INC(xs_ig_dup); | ||
343 | error = EAGAIN; | ||
344 | goto out_preload_end; | ||
345 | } | ||
346 | spin_unlock(&pag->pag_ici_lock); | ||
347 | radix_tree_preload_end(); | ||
348 | |||
349 | *ipp = ip; | ||
350 | return 0; | ||
351 | |||
352 | out_preload_end: | ||
353 | spin_unlock(&pag->pag_ici_lock); | ||
354 | radix_tree_preload_end(); | ||
355 | if (lock_flags) | ||
356 | xfs_iunlock(ip, lock_flags); | ||
357 | out_destroy: | ||
358 | __destroy_inode(VFS_I(ip)); | ||
359 | xfs_inode_free(ip); | ||
360 | return error; | ||
361 | } | ||
362 | |||
363 | /* | ||
364 | * Look up an inode by number in the given file system. | ||
365 | * The inode is looked up in the cache held in each AG. | ||
366 | * If the inode is found in the cache, initialise the vfs inode | ||
367 | * if necessary. | ||
368 | * | ||
369 | * If it is not in core, read it in from the file system's device, | ||
370 | * add it to the cache and initialise the vfs inode. | ||
371 | * | ||
372 | * The inode is locked according to the value of the lock_flags parameter. | ||
373 | * This flag parameter indicates how and if the inode's IO lock and inode lock | ||
374 | * should be taken. | ||
375 | * | ||
376 | * mp -- the mount point structure for the current file system. It points | ||
377 | * to the inode hash table. | ||
378 | * tp -- a pointer to the current transaction if there is one. This is | ||
379 | * simply passed through to the xfs_iread() call. | ||
380 | * ino -- the number of the inode desired. This is the unique identifier | ||
381 | * within the file system for the inode being requested. | ||
382 | * lock_flags -- flags indicating how to lock the inode. See the comment | ||
383 | * for xfs_ilock() for a list of valid values. | ||
384 | */ | ||
385 | int | ||
386 | xfs_iget( | ||
387 | xfs_mount_t *mp, | ||
388 | xfs_trans_t *tp, | ||
389 | xfs_ino_t ino, | ||
390 | uint flags, | ||
391 | uint lock_flags, | ||
392 | xfs_inode_t **ipp) | ||
393 | { | ||
394 | xfs_inode_t *ip; | ||
395 | int error; | ||
396 | xfs_perag_t *pag; | ||
397 | xfs_agino_t agino; | ||
398 | |||
399 | /* | ||
400 | * xfs_reclaim_inode() uses the ILOCK to ensure an inode | ||
401 | * doesn't get freed while it's being referenced during a | ||
402 | * radix tree traversal here. It assumes this function | ||
403 | * aqcuires only the ILOCK (and therefore it has no need to | ||
404 | * involve the IOLOCK in this synchronization). | ||
405 | */ | ||
406 | ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0); | ||
407 | |||
408 | /* reject inode numbers outside existing AGs */ | ||
409 | if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) | ||
410 | return EINVAL; | ||
411 | |||
412 | /* get the perag structure and ensure that it's inode capable */ | ||
413 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); | ||
414 | agino = XFS_INO_TO_AGINO(mp, ino); | ||
415 | |||
416 | again: | ||
417 | error = 0; | ||
418 | rcu_read_lock(); | ||
419 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); | ||
420 | |||
421 | if (ip) { | ||
422 | error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); | ||
423 | if (error) | ||
424 | goto out_error_or_again; | ||
425 | } else { | ||
426 | rcu_read_unlock(); | ||
427 | XFS_STATS_INC(xs_ig_missed); | ||
428 | |||
429 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, | ||
430 | flags, lock_flags); | ||
431 | if (error) | ||
432 | goto out_error_or_again; | ||
433 | } | ||
434 | xfs_perag_put(pag); | ||
435 | |||
436 | *ipp = ip; | ||
437 | |||
438 | /* | ||
439 | * If we have a real type for an on-disk inode, we can set ops(&unlock) | ||
440 | * now. If it's a new inode being created, xfs_ialloc will handle it. | ||
441 | */ | ||
442 | if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0) | ||
443 | xfs_setup_inode(ip); | ||
444 | return 0; | ||
445 | |||
446 | out_error_or_again: | ||
447 | if (error == EAGAIN) { | ||
448 | delay(1); | ||
449 | goto again; | ||
450 | } | ||
451 | xfs_perag_put(pag); | ||
452 | return error; | ||
453 | } | ||
454 | |||
455 | /* | ||
456 | * This is a wrapper routine around the xfs_ilock() routine | ||
457 | * used to centralize some grungy code. It is used in places | ||
458 | * that wish to lock the inode solely for reading the extents. | ||
459 | * The reason these places can't just call xfs_ilock(SHARED) | ||
460 | * is that the inode lock also guards to bringing in of the | ||
461 | * extents from disk for a file in b-tree format. If the inode | ||
462 | * is in b-tree format, then we need to lock the inode exclusively | ||
463 | * until the extents are read in. Locking it exclusively all | ||
464 | * the time would limit our parallelism unnecessarily, though. | ||
465 | * What we do instead is check to see if the extents have been | ||
466 | * read in yet, and only lock the inode exclusively if they | ||
467 | * have not. | ||
468 | * | ||
469 | * The function returns a value which should be given to the | ||
470 | * corresponding xfs_iunlock_map_shared(). This value is | ||
471 | * the mode in which the lock was actually taken. | ||
472 | */ | ||
473 | uint | ||
474 | xfs_ilock_map_shared( | ||
475 | xfs_inode_t *ip) | ||
476 | { | ||
477 | uint lock_mode; | ||
478 | |||
479 | if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) && | ||
480 | ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) { | ||
481 | lock_mode = XFS_ILOCK_EXCL; | ||
482 | } else { | ||
483 | lock_mode = XFS_ILOCK_SHARED; | ||
484 | } | ||
485 | |||
486 | xfs_ilock(ip, lock_mode); | ||
487 | |||
488 | return lock_mode; | ||
489 | } | ||
490 | |||
491 | /* | ||
492 | * This is simply the unlock routine to go with xfs_ilock_map_shared(). | ||
493 | * All it does is call xfs_iunlock() with the given lock_mode. | ||
494 | */ | ||
495 | void | ||
496 | xfs_iunlock_map_shared( | ||
497 | xfs_inode_t *ip, | ||
498 | unsigned int lock_mode) | ||
499 | { | ||
500 | xfs_iunlock(ip, lock_mode); | ||
501 | } | ||
502 | |||
503 | /* | ||
504 | * The xfs inode contains 2 locks: a multi-reader lock called the | ||
505 | * i_iolock and a multi-reader lock called the i_lock. This routine | ||
506 | * allows either or both of the locks to be obtained. | ||
507 | * | ||
508 | * The 2 locks should always be ordered so that the IO lock is | ||
509 | * obtained first in order to prevent deadlock. | ||
510 | * | ||
511 | * ip -- the inode being locked | ||
512 | * lock_flags -- this parameter indicates the inode's locks | ||
513 | * to be locked. It can be: | ||
514 | * XFS_IOLOCK_SHARED, | ||
515 | * XFS_IOLOCK_EXCL, | ||
516 | * XFS_ILOCK_SHARED, | ||
517 | * XFS_ILOCK_EXCL, | ||
518 | * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, | ||
519 | * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, | ||
520 | * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, | ||
521 | * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | ||
522 | */ | ||
523 | void | ||
524 | xfs_ilock( | ||
525 | xfs_inode_t *ip, | ||
526 | uint lock_flags) | ||
527 | { | ||
528 | /* | ||
529 | * You can't set both SHARED and EXCL for the same lock, | ||
530 | * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, | ||
531 | * and XFS_ILOCK_EXCL are valid values to set in lock_flags. | ||
532 | */ | ||
533 | ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != | ||
534 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); | ||
535 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != | ||
536 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); | ||
537 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); | ||
538 | |||
539 | if (lock_flags & XFS_IOLOCK_EXCL) | ||
540 | mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); | ||
541 | else if (lock_flags & XFS_IOLOCK_SHARED) | ||
542 | mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); | ||
543 | |||
544 | if (lock_flags & XFS_ILOCK_EXCL) | ||
545 | mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); | ||
546 | else if (lock_flags & XFS_ILOCK_SHARED) | ||
547 | mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); | ||
548 | |||
549 | trace_xfs_ilock(ip, lock_flags, _RET_IP_); | ||
550 | } | ||
551 | |||
552 | /* | ||
553 | * This is just like xfs_ilock(), except that the caller | ||
554 | * is guaranteed not to sleep. It returns 1 if it gets | ||
555 | * the requested locks and 0 otherwise. If the IO lock is | ||
556 | * obtained but the inode lock cannot be, then the IO lock | ||
557 | * is dropped before returning. | ||
558 | * | ||
559 | * ip -- the inode being locked | ||
560 | * lock_flags -- this parameter indicates the inode's locks to be | ||
561 | * to be locked. See the comment for xfs_ilock() for a list | ||
562 | * of valid values. | ||
563 | */ | ||
564 | int | ||
565 | xfs_ilock_nowait( | ||
566 | xfs_inode_t *ip, | ||
567 | uint lock_flags) | ||
568 | { | ||
569 | /* | ||
570 | * You can't set both SHARED and EXCL for the same lock, | ||
571 | * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, | ||
572 | * and XFS_ILOCK_EXCL are valid values to set in lock_flags. | ||
573 | */ | ||
574 | ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != | ||
575 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); | ||
576 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != | ||
577 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); | ||
578 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); | ||
579 | |||
580 | if (lock_flags & XFS_IOLOCK_EXCL) { | ||
581 | if (!mrtryupdate(&ip->i_iolock)) | ||
582 | goto out; | ||
583 | } else if (lock_flags & XFS_IOLOCK_SHARED) { | ||
584 | if (!mrtryaccess(&ip->i_iolock)) | ||
585 | goto out; | ||
586 | } | ||
587 | if (lock_flags & XFS_ILOCK_EXCL) { | ||
588 | if (!mrtryupdate(&ip->i_lock)) | ||
589 | goto out_undo_iolock; | ||
590 | } else if (lock_flags & XFS_ILOCK_SHARED) { | ||
591 | if (!mrtryaccess(&ip->i_lock)) | ||
592 | goto out_undo_iolock; | ||
593 | } | ||
594 | trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_); | ||
595 | return 1; | ||
596 | |||
597 | out_undo_iolock: | ||
598 | if (lock_flags & XFS_IOLOCK_EXCL) | ||
599 | mrunlock_excl(&ip->i_iolock); | ||
600 | else if (lock_flags & XFS_IOLOCK_SHARED) | ||
601 | mrunlock_shared(&ip->i_iolock); | ||
602 | out: | ||
603 | return 0; | ||
604 | } | ||
605 | |||
606 | /* | ||
607 | * xfs_iunlock() is used to drop the inode locks acquired with | ||
608 | * xfs_ilock() and xfs_ilock_nowait(). The caller must pass | ||
609 | * in the flags given to xfs_ilock() or xfs_ilock_nowait() so | ||
610 | * that we know which locks to drop. | ||
611 | * | ||
612 | * ip -- the inode being unlocked | ||
613 | * lock_flags -- this parameter indicates the inode's locks to be | ||
614 | * to be unlocked. See the comment for xfs_ilock() for a list | ||
615 | * of valid values for this parameter. | ||
616 | * | ||
617 | */ | ||
618 | void | ||
619 | xfs_iunlock( | ||
620 | xfs_inode_t *ip, | ||
621 | uint lock_flags) | ||
622 | { | ||
623 | /* | ||
624 | * You can't set both SHARED and EXCL for the same lock, | ||
625 | * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, | ||
626 | * and XFS_ILOCK_EXCL are valid values to set in lock_flags. | ||
627 | */ | ||
628 | ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != | ||
629 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); | ||
630 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != | ||
631 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); | ||
632 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); | ||
633 | ASSERT(lock_flags != 0); | ||
634 | |||
635 | if (lock_flags & XFS_IOLOCK_EXCL) | ||
636 | mrunlock_excl(&ip->i_iolock); | ||
637 | else if (lock_flags & XFS_IOLOCK_SHARED) | ||
638 | mrunlock_shared(&ip->i_iolock); | ||
639 | |||
640 | if (lock_flags & XFS_ILOCK_EXCL) | ||
641 | mrunlock_excl(&ip->i_lock); | ||
642 | else if (lock_flags & XFS_ILOCK_SHARED) | ||
643 | mrunlock_shared(&ip->i_lock); | ||
644 | |||
645 | trace_xfs_iunlock(ip, lock_flags, _RET_IP_); | ||
646 | } | ||
647 | |||
648 | /* | ||
649 | * give up write locks. the i/o lock cannot be held nested | ||
650 | * if it is being demoted. | ||
651 | */ | ||
652 | void | ||
653 | xfs_ilock_demote( | ||
654 | xfs_inode_t *ip, | ||
655 | uint lock_flags) | ||
656 | { | ||
657 | ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); | ||
658 | ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); | ||
659 | |||
660 | if (lock_flags & XFS_ILOCK_EXCL) | ||
661 | mrdemote(&ip->i_lock); | ||
662 | if (lock_flags & XFS_IOLOCK_EXCL) | ||
663 | mrdemote(&ip->i_iolock); | ||
664 | |||
665 | trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_); | ||
666 | } | ||
667 | |||
668 | #ifdef DEBUG | ||
669 | int | ||
670 | xfs_isilocked( | ||
671 | xfs_inode_t *ip, | ||
672 | uint lock_flags) | ||
673 | { | ||
674 | if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) { | ||
675 | if (!(lock_flags & XFS_ILOCK_SHARED)) | ||
676 | return !!ip->i_lock.mr_writer; | ||
677 | return rwsem_is_locked(&ip->i_lock.mr_lock); | ||
678 | } | ||
679 | |||
680 | if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { | ||
681 | if (!(lock_flags & XFS_IOLOCK_SHARED)) | ||
682 | return !!ip->i_iolock.mr_writer; | ||
683 | return rwsem_is_locked(&ip->i_iolock.mr_lock); | ||
684 | } | ||
685 | |||
686 | ASSERT(0); | ||
687 | return 0; | ||
688 | } | ||
689 | #endif | ||
690 | |||
691 | void | ||
692 | __xfs_iflock( | ||
693 | struct xfs_inode *ip) | ||
694 | { | ||
695 | wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT); | ||
696 | DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); | ||
697 | |||
698 | do { | ||
699 | prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE); | ||
700 | if (xfs_isiflocked(ip)) | ||
701 | io_schedule(); | ||
702 | } while (!xfs_iflock_nowait(ip)); | ||
703 | |||
704 | finish_wait(wq, &wait.wait); | ||
705 | } | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 1938b41ee9f5..66282dcb821b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include "xfs_filestream.h" | 45 | #include "xfs_filestream.h" |
46 | #include "xfs_vnodeops.h" | 46 | #include "xfs_vnodeops.h" |
47 | #include "xfs_trace.h" | 47 | #include "xfs_trace.h" |
48 | #include "xfs_icache.h" | ||
48 | 49 | ||
49 | kmem_zone_t *xfs_ifork_zone; | 50 | kmem_zone_t *xfs_ifork_zone; |
50 | kmem_zone_t *xfs_inode_zone; | 51 | kmem_zone_t *xfs_inode_zone; |
@@ -74,6 +75,256 @@ xfs_get_extsz_hint( | |||
74 | return 0; | 75 | return 0; |
75 | } | 76 | } |
76 | 77 | ||
78 | /* | ||
79 | * This is a wrapper routine around the xfs_ilock() routine used to centralize | ||
80 | * some grungy code. It is used in places that wish to lock the inode solely | ||
81 | * for reading the extents. The reason these places can't just call | ||
82 | * xfs_ilock(SHARED) is that the inode lock also guards to bringing in of the | ||
83 | * extents from disk for a file in b-tree format. If the inode is in b-tree | ||
84 | * format, then we need to lock the inode exclusively until the extents are read | ||
85 | * in. Locking it exclusively all the time would limit our parallelism | ||
86 | * unnecessarily, though. What we do instead is check to see if the extents | ||
87 | * have been read in yet, and only lock the inode exclusively if they have not. | ||
88 | * | ||
89 | * The function returns a value which should be given to the corresponding | ||
90 | * xfs_iunlock_map_shared(). This value is the mode in which the lock was | ||
91 | * actually taken. | ||
92 | */ | ||
93 | uint | ||
94 | xfs_ilock_map_shared( | ||
95 | xfs_inode_t *ip) | ||
96 | { | ||
97 | uint lock_mode; | ||
98 | |||
99 | if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) && | ||
100 | ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) { | ||
101 | lock_mode = XFS_ILOCK_EXCL; | ||
102 | } else { | ||
103 | lock_mode = XFS_ILOCK_SHARED; | ||
104 | } | ||
105 | |||
106 | xfs_ilock(ip, lock_mode); | ||
107 | |||
108 | return lock_mode; | ||
109 | } | ||
110 | |||
111 | /* | ||
112 | * This is simply the unlock routine to go with xfs_ilock_map_shared(). | ||
113 | * All it does is call xfs_iunlock() with the given lock_mode. | ||
114 | */ | ||
115 | void | ||
116 | xfs_iunlock_map_shared( | ||
117 | xfs_inode_t *ip, | ||
118 | unsigned int lock_mode) | ||
119 | { | ||
120 | xfs_iunlock(ip, lock_mode); | ||
121 | } | ||
122 | |||
123 | /* | ||
124 | * The xfs inode contains 2 locks: a multi-reader lock called the | ||
125 | * i_iolock and a multi-reader lock called the i_lock. This routine | ||
126 | * allows either or both of the locks to be obtained. | ||
127 | * | ||
128 | * The 2 locks should always be ordered so that the IO lock is | ||
129 | * obtained first in order to prevent deadlock. | ||
130 | * | ||
131 | * ip -- the inode being locked | ||
132 | * lock_flags -- this parameter indicates the inode's locks | ||
133 | * to be locked. It can be: | ||
134 | * XFS_IOLOCK_SHARED, | ||
135 | * XFS_IOLOCK_EXCL, | ||
136 | * XFS_ILOCK_SHARED, | ||
137 | * XFS_ILOCK_EXCL, | ||
138 | * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, | ||
139 | * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, | ||
140 | * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, | ||
141 | * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | ||
142 | */ | ||
143 | void | ||
144 | xfs_ilock( | ||
145 | xfs_inode_t *ip, | ||
146 | uint lock_flags) | ||
147 | { | ||
148 | trace_xfs_ilock(ip, lock_flags, _RET_IP_); | ||
149 | |||
150 | /* | ||
151 | * You can't set both SHARED and EXCL for the same lock, | ||
152 | * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, | ||
153 | * and XFS_ILOCK_EXCL are valid values to set in lock_flags. | ||
154 | */ | ||
155 | ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != | ||
156 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); | ||
157 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != | ||
158 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); | ||
159 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); | ||
160 | |||
161 | if (lock_flags & XFS_IOLOCK_EXCL) | ||
162 | mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); | ||
163 | else if (lock_flags & XFS_IOLOCK_SHARED) | ||
164 | mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); | ||
165 | |||
166 | if (lock_flags & XFS_ILOCK_EXCL) | ||
167 | mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); | ||
168 | else if (lock_flags & XFS_ILOCK_SHARED) | ||
169 | mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * This is just like xfs_ilock(), except that the caller | ||
174 | * is guaranteed not to sleep. It returns 1 if it gets | ||
175 | * the requested locks and 0 otherwise. If the IO lock is | ||
176 | * obtained but the inode lock cannot be, then the IO lock | ||
177 | * is dropped before returning. | ||
178 | * | ||
179 | * ip -- the inode being locked | ||
180 | * lock_flags -- this parameter indicates the inode's locks to be | ||
181 | * to be locked. See the comment for xfs_ilock() for a list | ||
182 | * of valid values. | ||
183 | */ | ||
184 | int | ||
185 | xfs_ilock_nowait( | ||
186 | xfs_inode_t *ip, | ||
187 | uint lock_flags) | ||
188 | { | ||
189 | trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_); | ||
190 | |||
191 | /* | ||
192 | * You can't set both SHARED and EXCL for the same lock, | ||
193 | * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, | ||
194 | * and XFS_ILOCK_EXCL are valid values to set in lock_flags. | ||
195 | */ | ||
196 | ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != | ||
197 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); | ||
198 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != | ||
199 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); | ||
200 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); | ||
201 | |||
202 | if (lock_flags & XFS_IOLOCK_EXCL) { | ||
203 | if (!mrtryupdate(&ip->i_iolock)) | ||
204 | goto out; | ||
205 | } else if (lock_flags & XFS_IOLOCK_SHARED) { | ||
206 | if (!mrtryaccess(&ip->i_iolock)) | ||
207 | goto out; | ||
208 | } | ||
209 | if (lock_flags & XFS_ILOCK_EXCL) { | ||
210 | if (!mrtryupdate(&ip->i_lock)) | ||
211 | goto out_undo_iolock; | ||
212 | } else if (lock_flags & XFS_ILOCK_SHARED) { | ||
213 | if (!mrtryaccess(&ip->i_lock)) | ||
214 | goto out_undo_iolock; | ||
215 | } | ||
216 | return 1; | ||
217 | |||
218 | out_undo_iolock: | ||
219 | if (lock_flags & XFS_IOLOCK_EXCL) | ||
220 | mrunlock_excl(&ip->i_iolock); | ||
221 | else if (lock_flags & XFS_IOLOCK_SHARED) | ||
222 | mrunlock_shared(&ip->i_iolock); | ||
223 | out: | ||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | /* | ||
228 | * xfs_iunlock() is used to drop the inode locks acquired with | ||
229 | * xfs_ilock() and xfs_ilock_nowait(). The caller must pass | ||
230 | * in the flags given to xfs_ilock() or xfs_ilock_nowait() so | ||
231 | * that we know which locks to drop. | ||
232 | * | ||
233 | * ip -- the inode being unlocked | ||
234 | * lock_flags -- this parameter indicates the inode's locks to be | ||
235 | * to be unlocked. See the comment for xfs_ilock() for a list | ||
236 | * of valid values for this parameter. | ||
237 | * | ||
238 | */ | ||
239 | void | ||
240 | xfs_iunlock( | ||
241 | xfs_inode_t *ip, | ||
242 | uint lock_flags) | ||
243 | { | ||
244 | /* | ||
245 | * You can't set both SHARED and EXCL for the same lock, | ||
246 | * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, | ||
247 | * and XFS_ILOCK_EXCL are valid values to set in lock_flags. | ||
248 | */ | ||
249 | ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != | ||
250 | (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); | ||
251 | ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != | ||
252 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); | ||
253 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); | ||
254 | ASSERT(lock_flags != 0); | ||
255 | |||
256 | if (lock_flags & XFS_IOLOCK_EXCL) | ||
257 | mrunlock_excl(&ip->i_iolock); | ||
258 | else if (lock_flags & XFS_IOLOCK_SHARED) | ||
259 | mrunlock_shared(&ip->i_iolock); | ||
260 | |||
261 | if (lock_flags & XFS_ILOCK_EXCL) | ||
262 | mrunlock_excl(&ip->i_lock); | ||
263 | else if (lock_flags & XFS_ILOCK_SHARED) | ||
264 | mrunlock_shared(&ip->i_lock); | ||
265 | |||
266 | trace_xfs_iunlock(ip, lock_flags, _RET_IP_); | ||
267 | } | ||
268 | |||
269 | /* | ||
270 | * give up write locks. the i/o lock cannot be held nested | ||
271 | * if it is being demoted. | ||
272 | */ | ||
273 | void | ||
274 | xfs_ilock_demote( | ||
275 | xfs_inode_t *ip, | ||
276 | uint lock_flags) | ||
277 | { | ||
278 | ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); | ||
279 | ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); | ||
280 | |||
281 | if (lock_flags & XFS_ILOCK_EXCL) | ||
282 | mrdemote(&ip->i_lock); | ||
283 | if (lock_flags & XFS_IOLOCK_EXCL) | ||
284 | mrdemote(&ip->i_iolock); | ||
285 | |||
286 | trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_); | ||
287 | } | ||
288 | |||
289 | #ifdef DEBUG | ||
290 | int | ||
291 | xfs_isilocked( | ||
292 | xfs_inode_t *ip, | ||
293 | uint lock_flags) | ||
294 | { | ||
295 | if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) { | ||
296 | if (!(lock_flags & XFS_ILOCK_SHARED)) | ||
297 | return !!ip->i_lock.mr_writer; | ||
298 | return rwsem_is_locked(&ip->i_lock.mr_lock); | ||
299 | } | ||
300 | |||
301 | if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { | ||
302 | if (!(lock_flags & XFS_IOLOCK_SHARED)) | ||
303 | return !!ip->i_iolock.mr_writer; | ||
304 | return rwsem_is_locked(&ip->i_iolock.mr_lock); | ||
305 | } | ||
306 | |||
307 | ASSERT(0); | ||
308 | return 0; | ||
309 | } | ||
310 | #endif | ||
311 | |||
312 | void | ||
313 | __xfs_iflock( | ||
314 | struct xfs_inode *ip) | ||
315 | { | ||
316 | wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT); | ||
317 | DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); | ||
318 | |||
319 | do { | ||
320 | prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE); | ||
321 | if (xfs_isiflocked(ip)) | ||
322 | io_schedule(); | ||
323 | } while (!xfs_iflock_nowait(ip)); | ||
324 | |||
325 | finish_wait(wq, &wait.wait); | ||
326 | } | ||
327 | |||
77 | #ifdef DEBUG | 328 | #ifdef DEBUG |
78 | /* | 329 | /* |
79 | * Make sure that the extents in the given memory buffer | 330 | * Make sure that the extents in the given memory buffer |
@@ -131,6 +382,65 @@ xfs_inobp_check( | |||
131 | } | 382 | } |
132 | #endif | 383 | #endif |
133 | 384 | ||
385 | static void | ||
386 | xfs_inode_buf_verify( | ||
387 | struct xfs_buf *bp) | ||
388 | { | ||
389 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
390 | int i; | ||
391 | int ni; | ||
392 | |||
393 | /* | ||
394 | * Validate the magic number and version of every inode in the buffer | ||
395 | */ | ||
396 | ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; | ||
397 | for (i = 0; i < ni; i++) { | ||
398 | int di_ok; | ||
399 | xfs_dinode_t *dip; | ||
400 | |||
401 | dip = (struct xfs_dinode *)xfs_buf_offset(bp, | ||
402 | (i << mp->m_sb.sb_inodelog)); | ||
403 | di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && | ||
404 | XFS_DINODE_GOOD_VERSION(dip->di_version); | ||
405 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, | ||
406 | XFS_ERRTAG_ITOBP_INOTOBP, | ||
407 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
408 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
409 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, | ||
410 | mp, dip); | ||
411 | #ifdef DEBUG | ||
412 | xfs_emerg(mp, | ||
413 | "bad inode magic/vsn daddr %lld #%d (magic=%x)", | ||
414 | (unsigned long long)bp->b_bn, i, | ||
415 | be16_to_cpu(dip->di_magic)); | ||
416 | ASSERT(0); | ||
417 | #endif | ||
418 | } | ||
419 | } | ||
420 | xfs_inobp_check(mp, bp); | ||
421 | } | ||
422 | |||
423 | |||
424 | static void | ||
425 | xfs_inode_buf_read_verify( | ||
426 | struct xfs_buf *bp) | ||
427 | { | ||
428 | xfs_inode_buf_verify(bp); | ||
429 | } | ||
430 | |||
431 | static void | ||
432 | xfs_inode_buf_write_verify( | ||
433 | struct xfs_buf *bp) | ||
434 | { | ||
435 | xfs_inode_buf_verify(bp); | ||
436 | } | ||
437 | |||
438 | const struct xfs_buf_ops xfs_inode_buf_ops = { | ||
439 | .verify_read = xfs_inode_buf_read_verify, | ||
440 | .verify_write = xfs_inode_buf_write_verify, | ||
441 | }; | ||
442 | |||
443 | |||
134 | /* | 444 | /* |
135 | * This routine is called to map an inode to the buffer containing the on-disk | 445 | * This routine is called to map an inode to the buffer containing the on-disk |
136 | * version of the inode. It returns a pointer to the buffer containing the | 446 | * version of the inode. It returns a pointer to the buffer containing the |
@@ -145,71 +455,33 @@ xfs_imap_to_bp( | |||
145 | struct xfs_mount *mp, | 455 | struct xfs_mount *mp, |
146 | struct xfs_trans *tp, | 456 | struct xfs_trans *tp, |
147 | struct xfs_imap *imap, | 457 | struct xfs_imap *imap, |
148 | struct xfs_dinode **dipp, | 458 | struct xfs_dinode **dipp, |
149 | struct xfs_buf **bpp, | 459 | struct xfs_buf **bpp, |
150 | uint buf_flags, | 460 | uint buf_flags, |
151 | uint iget_flags) | 461 | uint iget_flags) |
152 | { | 462 | { |
153 | struct xfs_buf *bp; | 463 | struct xfs_buf *bp; |
154 | int error; | 464 | int error; |
155 | int i; | ||
156 | int ni; | ||
157 | 465 | ||
158 | buf_flags |= XBF_UNMAPPED; | 466 | buf_flags |= XBF_UNMAPPED; |
159 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, | 467 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, |
160 | (int)imap->im_len, buf_flags, &bp); | 468 | (int)imap->im_len, buf_flags, &bp, |
469 | &xfs_inode_buf_ops); | ||
161 | if (error) { | 470 | if (error) { |
162 | if (error != EAGAIN) { | 471 | if (error == EAGAIN) { |
163 | xfs_warn(mp, | ||
164 | "%s: xfs_trans_read_buf() returned error %d.", | ||
165 | __func__, error); | ||
166 | } else { | ||
167 | ASSERT(buf_flags & XBF_TRYLOCK); | 472 | ASSERT(buf_flags & XBF_TRYLOCK); |
473 | return error; | ||
168 | } | 474 | } |
169 | return error; | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * Validate the magic number and version of every inode in the buffer | ||
174 | * (if DEBUG kernel) or the first inode in the buffer, otherwise. | ||
175 | */ | ||
176 | #ifdef DEBUG | ||
177 | ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog; | ||
178 | #else /* usual case */ | ||
179 | ni = 1; | ||
180 | #endif | ||
181 | 475 | ||
182 | for (i = 0; i < ni; i++) { | 476 | if (error == EFSCORRUPTED && |
183 | int di_ok; | 477 | (iget_flags & XFS_IGET_UNTRUSTED)) |
184 | xfs_dinode_t *dip; | 478 | return XFS_ERROR(EINVAL); |
185 | 479 | ||
186 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, | 480 | xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.", |
187 | (i << mp->m_sb.sb_inodelog)); | 481 | __func__, error); |
188 | di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && | 482 | return error; |
189 | XFS_DINODE_GOOD_VERSION(dip->di_version); | ||
190 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, | ||
191 | XFS_ERRTAG_ITOBP_INOTOBP, | ||
192 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
193 | if (iget_flags & XFS_IGET_UNTRUSTED) { | ||
194 | xfs_trans_brelse(tp, bp); | ||
195 | return XFS_ERROR(EINVAL); | ||
196 | } | ||
197 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, | ||
198 | mp, dip); | ||
199 | #ifdef DEBUG | ||
200 | xfs_emerg(mp, | ||
201 | "bad inode magic/vsn daddr %lld #%d (magic=%x)", | ||
202 | (unsigned long long)imap->im_blkno, i, | ||
203 | be16_to_cpu(dip->di_magic)); | ||
204 | ASSERT(0); | ||
205 | #endif | ||
206 | xfs_trans_brelse(tp, bp); | ||
207 | return XFS_ERROR(EFSCORRUPTED); | ||
208 | } | ||
209 | } | 483 | } |
210 | 484 | ||
211 | xfs_inobp_check(mp, bp); | ||
212 | |||
213 | *bpp = bp; | 485 | *bpp = bp; |
214 | *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset); | 486 | *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset); |
215 | return 0; | 487 | return 0; |
@@ -853,16 +1125,16 @@ xfs_iread_extents( | |||
853 | * set according to the contents of the given cred structure. | 1125 | * set according to the contents of the given cred structure. |
854 | * | 1126 | * |
855 | * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() | 1127 | * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() |
856 | * has a free inode available, call xfs_iget() | 1128 | * has a free inode available, call xfs_iget() to obtain the in-core |
857 | * to obtain the in-core version of the allocated inode. Finally, | 1129 | * version of the allocated inode. Finally, fill in the inode and |
858 | * fill in the inode and log its initial contents. In this case, | 1130 | * log its initial contents. In this case, ialloc_context would be |
859 | * ialloc_context would be set to NULL and call_again set to false. | 1131 | * set to NULL. |
860 | * | 1132 | * |
861 | * If xfs_dialloc() does not have an available inode, | 1133 | * If xfs_dialloc() does not have an available inode, it will replenish |
862 | * it will replenish its supply by doing an allocation. Since we can | 1134 | * its supply by doing an allocation. Since we can only do one |
863 | * only do one allocation within a transaction without deadlocks, we | 1135 | * allocation within a transaction without deadlocks, we must commit |
864 | * must commit the current transaction before returning the inode itself. | 1136 | * the current transaction before returning the inode itself. |
865 | * In this case, therefore, we will set call_again to true and return. | 1137 | * In this case, therefore, we will set ialloc_context and return. |
866 | * The caller should then commit the current transaction, start a new | 1138 | * The caller should then commit the current transaction, start a new |
867 | * transaction, and call xfs_ialloc() again to actually get the inode. | 1139 | * transaction, and call xfs_ialloc() again to actually get the inode. |
868 | * | 1140 | * |
@@ -1514,6 +1786,18 @@ xfs_ifree_cluster( | |||
1514 | 1786 | ||
1515 | if (!bp) | 1787 | if (!bp) |
1516 | return ENOMEM; | 1788 | return ENOMEM; |
1789 | |||
1790 | /* | ||
1791 | * This buffer may not have been correctly initialised as we | ||
1792 | * didn't read it from disk. That's not important because we are | ||
1793 | * only using to mark the buffer as stale in the log, and to | ||
1794 | * attach stale cached inodes on it. That means it will never be | ||
1795 | * dispatched for IO. If it is, we want to know about it, and we | ||
1796 | * want it to fail. We can acheive this by adding a write | ||
1797 | * verifier to the buffer. | ||
1798 | */ | ||
1799 | bp->b_ops = &xfs_inode_buf_ops; | ||
1800 | |||
1517 | /* | 1801 | /* |
1518 | * Walk the inodes already attached to the buffer and mark them | 1802 | * Walk the inodes already attached to the buffer and mark them |
1519 | * stale. These will all have the flush locks held, so an | 1803 | * stale. These will all have the flush locks held, so an |
@@ -3661,3 +3945,40 @@ xfs_iext_irec_update_extoffs( | |||
3661 | ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; | 3945 | ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; |
3662 | } | 3946 | } |
3663 | } | 3947 | } |
3948 | |||
3949 | /* | ||
3950 | * Test whether it is appropriate to check an inode for and free post EOF | ||
3951 | * blocks. The 'force' parameter determines whether we should also consider | ||
3952 | * regular files that are marked preallocated or append-only. | ||
3953 | */ | ||
3954 | bool | ||
3955 | xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) | ||
3956 | { | ||
3957 | /* prealloc/delalloc exists only on regular files */ | ||
3958 | if (!S_ISREG(ip->i_d.di_mode)) | ||
3959 | return false; | ||
3960 | |||
3961 | /* | ||
3962 | * Zero sized files with no cached pages and delalloc blocks will not | ||
3963 | * have speculative prealloc/delalloc blocks to remove. | ||
3964 | */ | ||
3965 | if (VFS_I(ip)->i_size == 0 && | ||
3966 | VN_CACHED(VFS_I(ip)) == 0 && | ||
3967 | ip->i_delayed_blks == 0) | ||
3968 | return false; | ||
3969 | |||
3970 | /* If we haven't read in the extent list, then don't do it now. */ | ||
3971 | if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) | ||
3972 | return false; | ||
3973 | |||
3974 | /* | ||
3975 | * Do not free real preallocated or append-only files unless the file | ||
3976 | * has delalloc blocks and we are forced to remove them. | ||
3977 | */ | ||
3978 | if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) | ||
3979 | if (!force || ip->i_delayed_blks == 0) | ||
3980 | return false; | ||
3981 | |||
3982 | return true; | ||
3983 | } | ||
3984 | |||
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 94b32f906e79..22baf6ea4fac 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -496,11 +496,10 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) | |||
496 | (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \ | 496 | (((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \ |
497 | ((pip)->i_d.di_mode & S_ISGID)) | 497 | ((pip)->i_d.di_mode & S_ISGID)) |
498 | 498 | ||
499 | |||
499 | /* | 500 | /* |
500 | * xfs_iget.c prototypes. | 501 | * xfs_inode.c prototypes. |
501 | */ | 502 | */ |
502 | int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, | ||
503 | uint, uint, xfs_inode_t **); | ||
504 | void xfs_ilock(xfs_inode_t *, uint); | 503 | void xfs_ilock(xfs_inode_t *, uint); |
505 | int xfs_ilock_nowait(xfs_inode_t *, uint); | 504 | int xfs_ilock_nowait(xfs_inode_t *, uint); |
506 | void xfs_iunlock(xfs_inode_t *, uint); | 505 | void xfs_iunlock(xfs_inode_t *, uint); |
@@ -508,11 +507,6 @@ void xfs_ilock_demote(xfs_inode_t *, uint); | |||
508 | int xfs_isilocked(xfs_inode_t *, uint); | 507 | int xfs_isilocked(xfs_inode_t *, uint); |
509 | uint xfs_ilock_map_shared(xfs_inode_t *); | 508 | uint xfs_ilock_map_shared(xfs_inode_t *); |
510 | void xfs_iunlock_map_shared(xfs_inode_t *, uint); | 509 | void xfs_iunlock_map_shared(xfs_inode_t *, uint); |
511 | void xfs_inode_free(struct xfs_inode *ip); | ||
512 | |||
513 | /* | ||
514 | * xfs_inode.c prototypes. | ||
515 | */ | ||
516 | int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t, | 510 | int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t, |
517 | xfs_nlink_t, xfs_dev_t, prid_t, int, | 511 | xfs_nlink_t, xfs_dev_t, prid_t, int, |
518 | struct xfs_buf **, xfs_inode_t **); | 512 | struct xfs_buf **, xfs_inode_t **); |
@@ -591,6 +585,7 @@ void xfs_iext_irec_compact(xfs_ifork_t *); | |||
591 | void xfs_iext_irec_compact_pages(xfs_ifork_t *); | 585 | void xfs_iext_irec_compact_pages(xfs_ifork_t *); |
592 | void xfs_iext_irec_compact_full(xfs_ifork_t *); | 586 | void xfs_iext_irec_compact_full(xfs_ifork_t *); |
593 | void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int); | 587 | void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int); |
588 | bool xfs_can_free_eofblocks(struct xfs_inode *, bool); | ||
594 | 589 | ||
595 | #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) | 590 | #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) |
596 | 591 | ||
@@ -603,5 +598,6 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); | |||
603 | extern struct kmem_zone *xfs_ifork_zone; | 598 | extern struct kmem_zone *xfs_ifork_zone; |
604 | extern struct kmem_zone *xfs_inode_zone; | 599 | extern struct kmem_zone *xfs_inode_zone; |
605 | extern struct kmem_zone *xfs_ili_zone; | 600 | extern struct kmem_zone *xfs_ili_zone; |
601 | extern const struct xfs_buf_ops xfs_inode_buf_ops; | ||
606 | 602 | ||
607 | #endif /* __XFS_INODE_H__ */ | 603 | #endif /* __XFS_INODE_H__ */ |
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index c1df3c623de2..c1c3ef88a260 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include "xfs_inode_item.h" | 42 | #include "xfs_inode_item.h" |
43 | #include "xfs_export.h" | 43 | #include "xfs_export.h" |
44 | #include "xfs_trace.h" | 44 | #include "xfs_trace.h" |
45 | #include "xfs_icache.h" | ||
45 | 46 | ||
46 | #include <linux/capability.h> | 47 | #include <linux/capability.h> |
47 | #include <linux/dcache.h> | 48 | #include <linux/dcache.h> |
@@ -1602,6 +1603,26 @@ xfs_file_ioctl( | |||
1602 | error = xfs_errortag_clearall(mp, 1); | 1603 | error = xfs_errortag_clearall(mp, 1); |
1603 | return -error; | 1604 | return -error; |
1604 | 1605 | ||
1606 | case XFS_IOC_FREE_EOFBLOCKS: { | ||
1607 | struct xfs_eofblocks eofb; | ||
1608 | |||
1609 | if (copy_from_user(&eofb, arg, sizeof(eofb))) | ||
1610 | return -XFS_ERROR(EFAULT); | ||
1611 | |||
1612 | if (eofb.eof_version != XFS_EOFBLOCKS_VERSION) | ||
1613 | return -XFS_ERROR(EINVAL); | ||
1614 | |||
1615 | if (eofb.eof_flags & ~XFS_EOF_FLAGS_VALID) | ||
1616 | return -XFS_ERROR(EINVAL); | ||
1617 | |||
1618 | if (memchr_inv(&eofb.pad32, 0, sizeof(eofb.pad32)) || | ||
1619 | memchr_inv(eofb.pad64, 0, sizeof(eofb.pad64))) | ||
1620 | return -XFS_ERROR(EINVAL); | ||
1621 | |||
1622 | error = xfs_icache_free_eofblocks(mp, &eofb); | ||
1623 | return -error; | ||
1624 | } | ||
1625 | |||
1605 | default: | 1626 | default: |
1606 | return -ENOTTY; | 1627 | return -ENOTTY; |
1607 | } | 1628 | } |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 7f537663365b..add06b4e9a63 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include "xfs_utils.h" | 41 | #include "xfs_utils.h" |
42 | #include "xfs_iomap.h" | 42 | #include "xfs_iomap.h" |
43 | #include "xfs_trace.h" | 43 | #include "xfs_trace.h" |
44 | #include "xfs_icache.h" | ||
44 | 45 | ||
45 | 46 | ||
46 | #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ | 47 | #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ |
@@ -373,7 +374,7 @@ xfs_iomap_write_delay( | |||
373 | xfs_extlen_t extsz; | 374 | xfs_extlen_t extsz; |
374 | int nimaps; | 375 | int nimaps; |
375 | xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; | 376 | xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; |
376 | int prealloc, flushed = 0; | 377 | int prealloc; |
377 | int error; | 378 | int error; |
378 | 379 | ||
379 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 380 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
@@ -434,31 +435,29 @@ retry: | |||
434 | } | 435 | } |
435 | 436 | ||
436 | /* | 437 | /* |
437 | * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. For | 438 | * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. Retry |
438 | * ENOSPC, * flush all other inodes with delalloc blocks to free up | ||
439 | * some of the excess reserved metadata space. For both cases, retry | ||
440 | * without EOF preallocation. | 439 | * without EOF preallocation. |
441 | */ | 440 | */ |
442 | if (nimaps == 0) { | 441 | if (nimaps == 0) { |
443 | trace_xfs_delalloc_enospc(ip, offset, count); | 442 | trace_xfs_delalloc_enospc(ip, offset, count); |
444 | if (flushed) | 443 | if (prealloc) { |
445 | return XFS_ERROR(error ? error : ENOSPC); | 444 | prealloc = 0; |
446 | 445 | error = 0; | |
447 | if (error == ENOSPC) { | 446 | goto retry; |
448 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
449 | xfs_flush_inodes(ip); | ||
450 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
451 | } | 447 | } |
452 | 448 | return XFS_ERROR(error ? error : ENOSPC); | |
453 | flushed = 1; | ||
454 | error = 0; | ||
455 | prealloc = 0; | ||
456 | goto retry; | ||
457 | } | 449 | } |
458 | 450 | ||
459 | if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) | 451 | if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) |
460 | return xfs_alert_fsblock_zero(ip, &imap[0]); | 452 | return xfs_alert_fsblock_zero(ip, &imap[0]); |
461 | 453 | ||
454 | /* | ||
455 | * Tag the inode as speculatively preallocated so we can reclaim this | ||
456 | * space on demand, if necessary. | ||
457 | */ | ||
458 | if (prealloc) | ||
459 | xfs_inode_set_eofblocks_tag(ip); | ||
460 | |||
462 | *ret_imap = imap[0]; | 461 | *ret_imap = imap[0]; |
463 | return 0; | 462 | return 0; |
464 | } | 463 | } |
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 4e00cf091d2c..d82efaa2ac73 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include "xfs_vnodeops.h" | 38 | #include "xfs_vnodeops.h" |
39 | #include "xfs_inode_item.h" | 39 | #include "xfs_inode_item.h" |
40 | #include "xfs_trace.h" | 40 | #include "xfs_trace.h" |
41 | #include "xfs_icache.h" | ||
41 | 42 | ||
42 | #include <linux/capability.h> | 43 | #include <linux/capability.h> |
43 | #include <linux/xattr.h> | 44 | #include <linux/xattr.h> |
@@ -779,8 +780,8 @@ xfs_setattr_size( | |||
779 | * care about here. | 780 | * care about here. |
780 | */ | 781 | */ |
781 | if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) { | 782 | if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) { |
782 | error = xfs_flush_pages(ip, ip->i_d.di_size, newsize, 0, | 783 | error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, |
783 | FI_NONE); | 784 | ip->i_d.di_size, newsize); |
784 | if (error) | 785 | if (error) |
785 | goto out_unlock; | 786 | goto out_unlock; |
786 | } | 787 | } |
@@ -854,6 +855,9 @@ xfs_setattr_size( | |||
854 | * and do not wait the usual (long) time for writeout. | 855 | * and do not wait the usual (long) time for writeout. |
855 | */ | 856 | */ |
856 | xfs_iflags_set(ip, XFS_ITRUNCATED); | 857 | xfs_iflags_set(ip, XFS_ITRUNCATED); |
858 | |||
859 | /* A truncate down always removes post-EOF blocks. */ | ||
860 | xfs_inode_clear_eofblocks_tag(ip); | ||
857 | } | 861 | } |
858 | 862 | ||
859 | if (mask & ATTR_CTIME) { | 863 | if (mask & ATTR_CTIME) { |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 01d10a66e302..2ea7d402188d 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include "xfs_error.h" | 34 | #include "xfs_error.h" |
35 | #include "xfs_btree.h" | 35 | #include "xfs_btree.h" |
36 | #include "xfs_trace.h" | 36 | #include "xfs_trace.h" |
37 | #include "xfs_icache.h" | ||
37 | 38 | ||
38 | STATIC int | 39 | STATIC int |
39 | xfs_internal_inum( | 40 | xfs_internal_inum( |
@@ -395,7 +396,8 @@ xfs_bulkstat( | |||
395 | if (xfs_inobt_maskn(chunkidx, nicluster) | 396 | if (xfs_inobt_maskn(chunkidx, nicluster) |
396 | & ~r.ir_free) | 397 | & ~r.ir_free) |
397 | xfs_btree_reada_bufs(mp, agno, | 398 | xfs_btree_reada_bufs(mp, agno, |
398 | agbno, nbcluster); | 399 | agbno, nbcluster, |
400 | &xfs_inode_buf_ops); | ||
399 | } | 401 | } |
400 | irbp->ir_startino = r.ir_startino; | 402 | irbp->ir_startino = r.ir_startino; |
401 | irbp->ir_freecount = r.ir_freecount; | 403 | irbp->ir_freecount = r.ir_freecount; |
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 828662f70d64..fe7e4df85a7b 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <linux/kernel.h> | 44 | #include <linux/kernel.h> |
45 | #include <linux/blkdev.h> | 45 | #include <linux/blkdev.h> |
46 | #include <linux/slab.h> | 46 | #include <linux/slab.h> |
47 | #include <linux/crc32c.h> | ||
47 | #include <linux/module.h> | 48 | #include <linux/module.h> |
48 | #include <linux/mutex.h> | 49 | #include <linux/mutex.h> |
49 | #include <linux/file.h> | 50 | #include <linux/file.h> |
@@ -118,6 +119,7 @@ | |||
118 | #define xfs_rotorstep xfs_params.rotorstep.val | 119 | #define xfs_rotorstep xfs_params.rotorstep.val |
119 | #define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val | 120 | #define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val |
120 | #define xfs_fstrm_centisecs xfs_params.fstrm_timer.val | 121 | #define xfs_fstrm_centisecs xfs_params.fstrm_timer.val |
122 | #define xfs_eofb_secs xfs_params.eofb_timer.val | ||
121 | 123 | ||
122 | #define current_cpu() (raw_smp_processor_id()) | 124 | #define current_cpu() (raw_smp_processor_id()) |
123 | #define current_pid() (current->pid) | 125 | #define current_pid() (current->pid) |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 4dad756962d0..46bd9d52ab51 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -34,6 +34,8 @@ | |||
34 | #include "xfs_dinode.h" | 34 | #include "xfs_dinode.h" |
35 | #include "xfs_inode.h" | 35 | #include "xfs_inode.h" |
36 | #include "xfs_trace.h" | 36 | #include "xfs_trace.h" |
37 | #include "xfs_fsops.h" | ||
38 | #include "xfs_cksum.h" | ||
37 | 39 | ||
38 | kmem_zone_t *xfs_log_ticket_zone; | 40 | kmem_zone_t *xfs_log_ticket_zone; |
39 | 41 | ||
@@ -458,7 +460,8 @@ xfs_log_reserve( | |||
458 | tic->t_trans_type = t_type; | 460 | tic->t_trans_type = t_type; |
459 | *ticp = tic; | 461 | *ticp = tic; |
460 | 462 | ||
461 | xlog_grant_push_ail(log, tic->t_unit_res * tic->t_cnt); | 463 | xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt |
464 | : tic->t_unit_res); | ||
462 | 465 | ||
463 | trace_xfs_log_reserve(log, tic); | 466 | trace_xfs_log_reserve(log, tic); |
464 | 467 | ||
@@ -679,25 +682,29 @@ out: | |||
679 | } | 682 | } |
680 | 683 | ||
681 | /* | 684 | /* |
682 | * Finish the recovery of the file system. This is separate from | 685 | * Finish the recovery of the file system. This is separate from the |
683 | * the xfs_log_mount() call, because it depends on the code in | 686 | * xfs_log_mount() call, because it depends on the code in xfs_mountfs() to read |
684 | * xfs_mountfs() to read in the root and real-time bitmap inodes | 687 | * in the root and real-time bitmap inodes between calling xfs_log_mount() and |
685 | * between calling xfs_log_mount() and here. | 688 | * here. |
686 | * | 689 | * |
687 | * mp - ubiquitous xfs mount point structure | 690 | * If we finish recovery successfully, start the background log work. If we are |
691 | * not doing recovery, then we have a RO filesystem and we don't need to start | ||
692 | * it. | ||
688 | */ | 693 | */ |
689 | int | 694 | int |
690 | xfs_log_mount_finish(xfs_mount_t *mp) | 695 | xfs_log_mount_finish(xfs_mount_t *mp) |
691 | { | 696 | { |
692 | int error; | 697 | int error = 0; |
693 | 698 | ||
694 | if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) | 699 | if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { |
695 | error = xlog_recover_finish(mp->m_log); | 700 | error = xlog_recover_finish(mp->m_log); |
696 | else { | 701 | if (!error) |
697 | error = 0; | 702 | xfs_log_work_queue(mp); |
703 | } else { | ||
698 | ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); | 704 | ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); |
699 | } | 705 | } |
700 | 706 | ||
707 | |||
701 | return error; | 708 | return error; |
702 | } | 709 | } |
703 | 710 | ||
@@ -850,15 +857,49 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
850 | } /* xfs_log_unmount_write */ | 857 | } /* xfs_log_unmount_write */ |
851 | 858 | ||
852 | /* | 859 | /* |
853 | * Deallocate log structures for unmount/relocation. | 860 | * Empty the log for unmount/freeze. |
861 | * | ||
862 | * To do this, we first need to shut down the background log work so it is not | ||
863 | * trying to cover the log as we clean up. We then need to unpin all objects in | ||
864 | * the log so we can then flush them out. Once they have completed their IO and | ||
865 | * run the callbacks removing themselves from the AIL, we can write the unmount | ||
866 | * record. | ||
867 | */ | ||
868 | void | ||
869 | xfs_log_quiesce( | ||
870 | struct xfs_mount *mp) | ||
871 | { | ||
872 | cancel_delayed_work_sync(&mp->m_log->l_work); | ||
873 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
874 | |||
875 | /* | ||
876 | * The superblock buffer is uncached and while xfs_ail_push_all_sync() | ||
877 | * will push it, xfs_wait_buftarg() will not wait for it. Further, | ||
878 | * xfs_buf_iowait() cannot be used because it was pushed with the | ||
879 | * XBF_ASYNC flag set, so we need to use a lock/unlock pair to wait for | ||
880 | * the IO to complete. | ||
881 | */ | ||
882 | xfs_ail_push_all_sync(mp->m_ail); | ||
883 | xfs_wait_buftarg(mp->m_ddev_targp); | ||
884 | xfs_buf_lock(mp->m_sb_bp); | ||
885 | xfs_buf_unlock(mp->m_sb_bp); | ||
886 | |||
887 | xfs_log_unmount_write(mp); | ||
888 | } | ||
889 | |||
890 | /* | ||
891 | * Shut down and release the AIL and Log. | ||
854 | * | 892 | * |
855 | * We need to stop the aild from running before we destroy | 893 | * During unmount, we need to ensure we flush all the dirty metadata objects |
856 | * and deallocate the log as the aild references the log. | 894 | * from the AIL so that the log is empty before we write the unmount record to |
895 | * the log. Once this is done, we can tear down the AIL and the log. | ||
857 | */ | 896 | */ |
858 | void | 897 | void |
859 | xfs_log_unmount(xfs_mount_t *mp) | 898 | xfs_log_unmount( |
899 | struct xfs_mount *mp) | ||
860 | { | 900 | { |
861 | cancel_delayed_work_sync(&mp->m_sync_work); | 901 | xfs_log_quiesce(mp); |
902 | |||
862 | xfs_trans_ail_destroy(mp); | 903 | xfs_trans_ail_destroy(mp); |
863 | xlog_dealloc_log(mp->m_log); | 904 | xlog_dealloc_log(mp->m_log); |
864 | } | 905 | } |
@@ -1090,8 +1131,7 @@ xlog_iodone(xfs_buf_t *bp) | |||
1090 | * with it being freed after writing the unmount record to the | 1131 | * with it being freed after writing the unmount record to the |
1091 | * log. | 1132 | * log. |
1092 | */ | 1133 | */ |
1093 | 1134 | } | |
1094 | } /* xlog_iodone */ | ||
1095 | 1135 | ||
1096 | /* | 1136 | /* |
1097 | * Return size of each in-core log record buffer. | 1137 | * Return size of each in-core log record buffer. |
@@ -1161,6 +1201,40 @@ done: | |||
1161 | } /* xlog_get_iclog_buffer_size */ | 1201 | } /* xlog_get_iclog_buffer_size */ |
1162 | 1202 | ||
1163 | 1203 | ||
1204 | void | ||
1205 | xfs_log_work_queue( | ||
1206 | struct xfs_mount *mp) | ||
1207 | { | ||
1208 | queue_delayed_work(mp->m_log_workqueue, &mp->m_log->l_work, | ||
1209 | msecs_to_jiffies(xfs_syncd_centisecs * 10)); | ||
1210 | } | ||
1211 | |||
1212 | /* | ||
1213 | * Every sync period we need to unpin all items in the AIL and push them to | ||
1214 | * disk. If there is nothing dirty, then we might need to cover the log to | ||
1215 | * indicate that the filesystem is idle. | ||
1216 | */ | ||
1217 | void | ||
1218 | xfs_log_worker( | ||
1219 | struct work_struct *work) | ||
1220 | { | ||
1221 | struct xlog *log = container_of(to_delayed_work(work), | ||
1222 | struct xlog, l_work); | ||
1223 | struct xfs_mount *mp = log->l_mp; | ||
1224 | |||
1225 | /* dgc: errors ignored - not fatal and nowhere to report them */ | ||
1226 | if (xfs_log_need_covered(mp)) | ||
1227 | xfs_fs_log_dummy(mp); | ||
1228 | else | ||
1229 | xfs_log_force(mp, 0); | ||
1230 | |||
1231 | /* start pushing all the metadata that is currently dirty */ | ||
1232 | xfs_ail_push_all(mp->m_ail); | ||
1233 | |||
1234 | /* queue us up again */ | ||
1235 | xfs_log_work_queue(mp); | ||
1236 | } | ||
1237 | |||
1164 | /* | 1238 | /* |
1165 | * This routine initializes some of the log structure for a given mount point. | 1239 | * This routine initializes some of the log structure for a given mount point. |
1166 | * Its primary purpose is to fill in enough, so recovery can occur. However, | 1240 | * Its primary purpose is to fill in enough, so recovery can occur. However, |
@@ -1195,6 +1269,7 @@ xlog_alloc_log( | |||
1195 | log->l_logBBsize = num_bblks; | 1269 | log->l_logBBsize = num_bblks; |
1196 | log->l_covered_state = XLOG_STATE_COVER_IDLE; | 1270 | log->l_covered_state = XLOG_STATE_COVER_IDLE; |
1197 | log->l_flags |= XLOG_ACTIVE_RECOVERY; | 1271 | log->l_flags |= XLOG_ACTIVE_RECOVERY; |
1272 | INIT_DELAYED_WORK(&log->l_work, xfs_log_worker); | ||
1198 | 1273 | ||
1199 | log->l_prev_block = -1; | 1274 | log->l_prev_block = -1; |
1200 | /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ | 1275 | /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ |
@@ -1417,6 +1492,84 @@ xlog_grant_push_ail( | |||
1417 | } | 1492 | } |
1418 | 1493 | ||
1419 | /* | 1494 | /* |
1495 | * Stamp cycle number in every block | ||
1496 | */ | ||
1497 | STATIC void | ||
1498 | xlog_pack_data( | ||
1499 | struct xlog *log, | ||
1500 | struct xlog_in_core *iclog, | ||
1501 | int roundoff) | ||
1502 | { | ||
1503 | int i, j, k; | ||
1504 | int size = iclog->ic_offset + roundoff; | ||
1505 | __be32 cycle_lsn; | ||
1506 | xfs_caddr_t dp; | ||
1507 | |||
1508 | cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn); | ||
1509 | |||
1510 | dp = iclog->ic_datap; | ||
1511 | for (i = 0; i < BTOBB(size); i++) { | ||
1512 | if (i >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) | ||
1513 | break; | ||
1514 | iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp; | ||
1515 | *(__be32 *)dp = cycle_lsn; | ||
1516 | dp += BBSIZE; | ||
1517 | } | ||
1518 | |||
1519 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | ||
1520 | xlog_in_core_2_t *xhdr = iclog->ic_data; | ||
1521 | |||
1522 | for ( ; i < BTOBB(size); i++) { | ||
1523 | j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | ||
1524 | k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | ||
1525 | xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp; | ||
1526 | *(__be32 *)dp = cycle_lsn; | ||
1527 | dp += BBSIZE; | ||
1528 | } | ||
1529 | |||
1530 | for (i = 1; i < log->l_iclog_heads; i++) | ||
1531 | xhdr[i].hic_xheader.xh_cycle = cycle_lsn; | ||
1532 | } | ||
1533 | } | ||
1534 | |||
1535 | /* | ||
1536 | * Calculate the checksum for a log buffer. | ||
1537 | * | ||
1538 | * This is a little more complicated than it should be because the various | ||
1539 | * headers and the actual data are non-contiguous. | ||
1540 | */ | ||
1541 | __le32 | ||
1542 | xlog_cksum( | ||
1543 | struct xlog *log, | ||
1544 | struct xlog_rec_header *rhead, | ||
1545 | char *dp, | ||
1546 | int size) | ||
1547 | { | ||
1548 | __uint32_t crc; | ||
1549 | |||
1550 | /* first generate the crc for the record header ... */ | ||
1551 | crc = xfs_start_cksum((char *)rhead, | ||
1552 | sizeof(struct xlog_rec_header), | ||
1553 | offsetof(struct xlog_rec_header, h_crc)); | ||
1554 | |||
1555 | /* ... then for additional cycle data for v2 logs ... */ | ||
1556 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | ||
1557 | union xlog_in_core2 *xhdr = (union xlog_in_core2 *)rhead; | ||
1558 | int i; | ||
1559 | |||
1560 | for (i = 1; i < log->l_iclog_heads; i++) { | ||
1561 | crc = crc32c(crc, &xhdr[i].hic_xheader, | ||
1562 | sizeof(struct xlog_rec_ext_header)); | ||
1563 | } | ||
1564 | } | ||
1565 | |||
1566 | /* ... and finally for the payload */ | ||
1567 | crc = crc32c(crc, dp, size); | ||
1568 | |||
1569 | return xfs_end_cksum(crc); | ||
1570 | } | ||
1571 | |||
1572 | /* | ||
1420 | * The bdstrat callback function for log bufs. This gives us a central | 1573 | * The bdstrat callback function for log bufs. This gives us a central |
1421 | * place to trap bufs in case we get hit by a log I/O error and need to | 1574 | * place to trap bufs in case we get hit by a log I/O error and need to |
1422 | * shutdown. Actually, in practice, even when we didn't get a log error, | 1575 | * shutdown. Actually, in practice, even when we didn't get a log error, |
@@ -1476,7 +1629,6 @@ xlog_sync( | |||
1476 | struct xlog *log, | 1629 | struct xlog *log, |
1477 | struct xlog_in_core *iclog) | 1630 | struct xlog_in_core *iclog) |
1478 | { | 1631 | { |
1479 | xfs_caddr_t dptr; /* pointer to byte sized element */ | ||
1480 | xfs_buf_t *bp; | 1632 | xfs_buf_t *bp; |
1481 | int i; | 1633 | int i; |
1482 | uint count; /* byte count of bwrite */ | 1634 | uint count; /* byte count of bwrite */ |
@@ -1485,6 +1637,7 @@ xlog_sync( | |||
1485 | int split = 0; /* split write into two regions */ | 1637 | int split = 0; /* split write into two regions */ |
1486 | int error; | 1638 | int error; |
1487 | int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); | 1639 | int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); |
1640 | int size; | ||
1488 | 1641 | ||
1489 | XFS_STATS_INC(xs_log_writes); | 1642 | XFS_STATS_INC(xs_log_writes); |
1490 | ASSERT(atomic_read(&iclog->ic_refcnt) == 0); | 1643 | ASSERT(atomic_read(&iclog->ic_refcnt) == 0); |
@@ -1515,13 +1668,10 @@ xlog_sync( | |||
1515 | xlog_pack_data(log, iclog, roundoff); | 1668 | xlog_pack_data(log, iclog, roundoff); |
1516 | 1669 | ||
1517 | /* real byte length */ | 1670 | /* real byte length */ |
1518 | if (v2) { | 1671 | size = iclog->ic_offset; |
1519 | iclog->ic_header.h_len = | 1672 | if (v2) |
1520 | cpu_to_be32(iclog->ic_offset + roundoff); | 1673 | size += roundoff; |
1521 | } else { | 1674 | iclog->ic_header.h_len = cpu_to_be32(size); |
1522 | iclog->ic_header.h_len = | ||
1523 | cpu_to_be32(iclog->ic_offset); | ||
1524 | } | ||
1525 | 1675 | ||
1526 | bp = iclog->ic_bp; | 1676 | bp = iclog->ic_bp; |
1527 | XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); | 1677 | XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); |
@@ -1530,12 +1680,36 @@ xlog_sync( | |||
1530 | 1680 | ||
1531 | /* Do we need to split this write into 2 parts? */ | 1681 | /* Do we need to split this write into 2 parts? */ |
1532 | if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) { | 1682 | if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) { |
1683 | char *dptr; | ||
1684 | |||
1533 | split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp))); | 1685 | split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp))); |
1534 | count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)); | 1686 | count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)); |
1535 | iclog->ic_bwritecnt = 2; /* split into 2 writes */ | 1687 | iclog->ic_bwritecnt = 2; |
1688 | |||
1689 | /* | ||
1690 | * Bump the cycle numbers at the start of each block in the | ||
1691 | * part of the iclog that ends up in the buffer that gets | ||
1692 | * written to the start of the log. | ||
1693 | * | ||
1694 | * Watch out for the header magic number case, though. | ||
1695 | */ | ||
1696 | dptr = (char *)&iclog->ic_header + count; | ||
1697 | for (i = 0; i < split; i += BBSIZE) { | ||
1698 | __uint32_t cycle = be32_to_cpu(*(__be32 *)dptr); | ||
1699 | if (++cycle == XLOG_HEADER_MAGIC_NUM) | ||
1700 | cycle++; | ||
1701 | *(__be32 *)dptr = cpu_to_be32(cycle); | ||
1702 | |||
1703 | dptr += BBSIZE; | ||
1704 | } | ||
1536 | } else { | 1705 | } else { |
1537 | iclog->ic_bwritecnt = 1; | 1706 | iclog->ic_bwritecnt = 1; |
1538 | } | 1707 | } |
1708 | |||
1709 | /* calculcate the checksum */ | ||
1710 | iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header, | ||
1711 | iclog->ic_datap, size); | ||
1712 | |||
1539 | bp->b_io_length = BTOBB(count); | 1713 | bp->b_io_length = BTOBB(count); |
1540 | bp->b_fspriv = iclog; | 1714 | bp->b_fspriv = iclog; |
1541 | XFS_BUF_ZEROFLAGS(bp); | 1715 | XFS_BUF_ZEROFLAGS(bp); |
@@ -1589,19 +1763,6 @@ xlog_sync( | |||
1589 | bp->b_flags |= XBF_SYNCIO; | 1763 | bp->b_flags |= XBF_SYNCIO; |
1590 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) | 1764 | if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) |
1591 | bp->b_flags |= XBF_FUA; | 1765 | bp->b_flags |= XBF_FUA; |
1592 | dptr = bp->b_addr; | ||
1593 | /* | ||
1594 | * Bump the cycle numbers at the start of each block | ||
1595 | * since this part of the buffer is at the start of | ||
1596 | * a new cycle. Watch out for the header magic number | ||
1597 | * case, though. | ||
1598 | */ | ||
1599 | for (i = 0; i < split; i += BBSIZE) { | ||
1600 | be32_add_cpu((__be32 *)dptr, 1); | ||
1601 | if (be32_to_cpu(*(__be32 *)dptr) == XLOG_HEADER_MAGIC_NUM) | ||
1602 | be32_add_cpu((__be32 *)dptr, 1); | ||
1603 | dptr += BBSIZE; | ||
1604 | } | ||
1605 | 1766 | ||
1606 | ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); | 1767 | ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); |
1607 | ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); | 1768 | ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); |
@@ -1618,7 +1779,6 @@ xlog_sync( | |||
1618 | return 0; | 1779 | return 0; |
1619 | } /* xlog_sync */ | 1780 | } /* xlog_sync */ |
1620 | 1781 | ||
1621 | |||
1622 | /* | 1782 | /* |
1623 | * Deallocate a log structure | 1783 | * Deallocate a log structure |
1624 | */ | 1784 | */ |
@@ -3713,3 +3873,4 @@ xlog_iclogs_empty( | |||
3713 | } while (iclog != log->l_iclog); | 3873 | } while (iclog != log->l_iclog); |
3714 | return 1; | 3874 | return 1; |
3715 | } | 3875 | } |
3876 | |||
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 748d312850e2..5caee96059df 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -181,5 +181,9 @@ int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, | |||
181 | xfs_lsn_t *commit_lsn, int flags); | 181 | xfs_lsn_t *commit_lsn, int flags); |
182 | bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); | 182 | bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); |
183 | 183 | ||
184 | void xfs_log_work_queue(struct xfs_mount *mp); | ||
185 | void xfs_log_worker(struct work_struct *work); | ||
186 | void xfs_log_quiesce(struct xfs_mount *mp); | ||
187 | |||
184 | #endif | 188 | #endif |
185 | #endif /* __XFS_LOG_H__ */ | 189 | #endif /* __XFS_LOG_H__ */ |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 18a801d76a42..16d8d12ea3b4 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -139,7 +139,6 @@ static inline uint xlog_get_client_id(__be32 i) | |||
139 | /* | 139 | /* |
140 | * Flags for log structure | 140 | * Flags for log structure |
141 | */ | 141 | */ |
142 | #define XLOG_CHKSUM_MISMATCH 0x1 /* used only during recovery */ | ||
143 | #define XLOG_ACTIVE_RECOVERY 0x2 /* in the middle of recovery */ | 142 | #define XLOG_ACTIVE_RECOVERY 0x2 /* in the middle of recovery */ |
144 | #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ | 143 | #define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ |
145 | #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being | 144 | #define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being |
@@ -291,7 +290,7 @@ typedef struct xlog_rec_header { | |||
291 | __be32 h_len; /* len in bytes; should be 64-bit aligned: 4 */ | 290 | __be32 h_len; /* len in bytes; should be 64-bit aligned: 4 */ |
292 | __be64 h_lsn; /* lsn of this LR : 8 */ | 291 | __be64 h_lsn; /* lsn of this LR : 8 */ |
293 | __be64 h_tail_lsn; /* lsn of 1st LR w/ buffers not committed: 8 */ | 292 | __be64 h_tail_lsn; /* lsn of 1st LR w/ buffers not committed: 8 */ |
294 | __be32 h_chksum; /* may not be used; non-zero if used : 4 */ | 293 | __le32 h_crc; /* crc of log record : 4 */ |
295 | __be32 h_prev_block; /* block number to previous LR : 4 */ | 294 | __be32 h_prev_block; /* block number to previous LR : 4 */ |
296 | __be32 h_num_logops; /* number of log operations in this LR : 4 */ | 295 | __be32 h_num_logops; /* number of log operations in this LR : 4 */ |
297 | __be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; | 296 | __be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; |
@@ -495,6 +494,7 @@ struct xlog { | |||
495 | struct xfs_buf *l_xbuf; /* extra buffer for log | 494 | struct xfs_buf *l_xbuf; /* extra buffer for log |
496 | * wrapping */ | 495 | * wrapping */ |
497 | struct xfs_buftarg *l_targ; /* buftarg of log */ | 496 | struct xfs_buftarg *l_targ; /* buftarg of log */ |
497 | struct delayed_work l_work; /* background flush work */ | ||
498 | uint l_flags; | 498 | uint l_flags; |
499 | uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ | 499 | uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ |
500 | struct list_head *l_buf_cancel_table; | 500 | struct list_head *l_buf_cancel_table; |
@@ -554,11 +554,9 @@ xlog_recover( | |||
554 | extern int | 554 | extern int |
555 | xlog_recover_finish( | 555 | xlog_recover_finish( |
556 | struct xlog *log); | 556 | struct xlog *log); |
557 | extern void | 557 | |
558 | xlog_pack_data( | 558 | extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, |
559 | struct xlog *log, | 559 | char *dp, int size); |
560 | struct xlog_in_core *iclog, | ||
561 | int); | ||
562 | 560 | ||
563 | extern kmem_zone_t *xfs_log_ticket_zone; | 561 | extern kmem_zone_t *xfs_log_ticket_zone; |
564 | struct xlog_ticket * | 562 | struct xlog_ticket * |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index d308749fabf1..96fcbb85ff83 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -41,7 +41,9 @@ | |||
41 | #include "xfs_trans_priv.h" | 41 | #include "xfs_trans_priv.h" |
42 | #include "xfs_quota.h" | 42 | #include "xfs_quota.h" |
43 | #include "xfs_utils.h" | 43 | #include "xfs_utils.h" |
44 | #include "xfs_cksum.h" | ||
44 | #include "xfs_trace.h" | 45 | #include "xfs_trace.h" |
46 | #include "xfs_icache.h" | ||
45 | 47 | ||
46 | STATIC int | 48 | STATIC int |
47 | xlog_find_zeroed( | 49 | xlog_find_zeroed( |
@@ -2143,7 +2145,7 @@ xlog_recover_buffer_pass2( | |||
2143 | buf_flags |= XBF_UNMAPPED; | 2145 | buf_flags |= XBF_UNMAPPED; |
2144 | 2146 | ||
2145 | bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, | 2147 | bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, |
2146 | buf_flags); | 2148 | buf_flags, NULL); |
2147 | if (!bp) | 2149 | if (!bp) |
2148 | return XFS_ERROR(ENOMEM); | 2150 | return XFS_ERROR(ENOMEM); |
2149 | error = bp->b_error; | 2151 | error = bp->b_error; |
@@ -2236,7 +2238,8 @@ xlog_recover_inode_pass2( | |||
2236 | } | 2238 | } |
2237 | trace_xfs_log_recover_inode_recover(log, in_f); | 2239 | trace_xfs_log_recover_inode_recover(log, in_f); |
2238 | 2240 | ||
2239 | bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0); | 2241 | bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0, |
2242 | NULL); | ||
2240 | if (!bp) { | 2243 | if (!bp) { |
2241 | error = ENOMEM; | 2244 | error = ENOMEM; |
2242 | goto error; | 2245 | goto error; |
@@ -2547,7 +2550,8 @@ xlog_recover_dquot_pass2( | |||
2547 | ASSERT(dq_f->qlf_len == 1); | 2550 | ASSERT(dq_f->qlf_len == 1); |
2548 | 2551 | ||
2549 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno, | 2552 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno, |
2550 | XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp); | 2553 | XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp, |
2554 | NULL); | ||
2551 | if (error) | 2555 | if (error) |
2552 | return error; | 2556 | return error; |
2553 | 2557 | ||
@@ -3213,80 +3217,58 @@ xlog_recover_process_iunlinks( | |||
3213 | mp->m_dmevmask = mp_dmevmask; | 3217 | mp->m_dmevmask = mp_dmevmask; |
3214 | } | 3218 | } |
3215 | 3219 | ||
3216 | |||
3217 | #ifdef DEBUG | ||
3218 | STATIC void | ||
3219 | xlog_pack_data_checksum( | ||
3220 | struct xlog *log, | ||
3221 | struct xlog_in_core *iclog, | ||
3222 | int size) | ||
3223 | { | ||
3224 | int i; | ||
3225 | __be32 *up; | ||
3226 | uint chksum = 0; | ||
3227 | |||
3228 | up = (__be32 *)iclog->ic_datap; | ||
3229 | /* divide length by 4 to get # words */ | ||
3230 | for (i = 0; i < (size >> 2); i++) { | ||
3231 | chksum ^= be32_to_cpu(*up); | ||
3232 | up++; | ||
3233 | } | ||
3234 | iclog->ic_header.h_chksum = cpu_to_be32(chksum); | ||
3235 | } | ||
3236 | #else | ||
3237 | #define xlog_pack_data_checksum(log, iclog, size) | ||
3238 | #endif | ||
3239 | |||
3240 | /* | 3220 | /* |
3241 | * Stamp cycle number in every block | 3221 | * Upack the log buffer data and crc check it. If the check fails, issue a |
3222 | * warning if and only if the CRC in the header is non-zero. This makes the | ||
3223 | * check an advisory warning, and the zero CRC check will prevent failure | ||
3224 | * warnings from being emitted when upgrading the kernel from one that does not | ||
3225 | * add CRCs by default. | ||
3226 | * | ||
3227 | * When filesystems are CRC enabled, this CRC mismatch becomes a fatal log | ||
3228 | * corruption failure | ||
3242 | */ | 3229 | */ |
3243 | void | 3230 | STATIC int |
3244 | xlog_pack_data( | 3231 | xlog_unpack_data_crc( |
3245 | struct xlog *log, | 3232 | struct xlog_rec_header *rhead, |
3246 | struct xlog_in_core *iclog, | 3233 | xfs_caddr_t dp, |
3247 | int roundoff) | 3234 | struct xlog *log) |
3248 | { | 3235 | { |
3249 | int i, j, k; | 3236 | __le32 crc; |
3250 | int size = iclog->ic_offset + roundoff; | 3237 | |
3251 | __be32 cycle_lsn; | 3238 | crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len)); |
3252 | xfs_caddr_t dp; | 3239 | if (crc != rhead->h_crc) { |
3253 | 3240 | if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { | |
3254 | xlog_pack_data_checksum(log, iclog, size); | 3241 | xfs_alert(log->l_mp, |
3255 | 3242 | "log record CRC mismatch: found 0x%x, expected 0x%x.\n", | |
3256 | cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn); | 3243 | le32_to_cpu(rhead->h_crc), |
3257 | 3244 | le32_to_cpu(crc)); | |
3258 | dp = iclog->ic_datap; | 3245 | xfs_hex_dump(dp, 32); |
3259 | for (i = 0; i < BTOBB(size) && | ||
3260 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { | ||
3261 | iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp; | ||
3262 | *(__be32 *)dp = cycle_lsn; | ||
3263 | dp += BBSIZE; | ||
3264 | } | ||
3265 | |||
3266 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | ||
3267 | xlog_in_core_2_t *xhdr = iclog->ic_data; | ||
3268 | |||
3269 | for ( ; i < BTOBB(size); i++) { | ||
3270 | j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | ||
3271 | k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); | ||
3272 | xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp; | ||
3273 | *(__be32 *)dp = cycle_lsn; | ||
3274 | dp += BBSIZE; | ||
3275 | } | 3246 | } |
3276 | 3247 | ||
3277 | for (i = 1; i < log->l_iclog_heads; i++) { | 3248 | /* |
3278 | xhdr[i].hic_xheader.xh_cycle = cycle_lsn; | 3249 | * If we've detected a log record corruption, then we can't |
3279 | } | 3250 | * recover past this point. Abort recovery if we are enforcing |
3251 | * CRC protection by punting an error back up the stack. | ||
3252 | */ | ||
3253 | if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) | ||
3254 | return EFSCORRUPTED; | ||
3280 | } | 3255 | } |
3256 | |||
3257 | return 0; | ||
3281 | } | 3258 | } |
3282 | 3259 | ||
3283 | STATIC void | 3260 | STATIC int |
3284 | xlog_unpack_data( | 3261 | xlog_unpack_data( |
3285 | struct xlog_rec_header *rhead, | 3262 | struct xlog_rec_header *rhead, |
3286 | xfs_caddr_t dp, | 3263 | xfs_caddr_t dp, |
3287 | struct xlog *log) | 3264 | struct xlog *log) |
3288 | { | 3265 | { |
3289 | int i, j, k; | 3266 | int i, j, k; |
3267 | int error; | ||
3268 | |||
3269 | error = xlog_unpack_data_crc(rhead, dp, log); | ||
3270 | if (error) | ||
3271 | return error; | ||
3290 | 3272 | ||
3291 | for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && | 3273 | for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && |
3292 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { | 3274 | i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { |
@@ -3303,6 +3285,8 @@ xlog_unpack_data( | |||
3303 | dp += BBSIZE; | 3285 | dp += BBSIZE; |
3304 | } | 3286 | } |
3305 | } | 3287 | } |
3288 | |||
3289 | return 0; | ||
3306 | } | 3290 | } |
3307 | 3291 | ||
3308 | STATIC int | 3292 | STATIC int |
@@ -3434,9 +3418,13 @@ xlog_do_recovery_pass( | |||
3434 | if (error) | 3418 | if (error) |
3435 | goto bread_err2; | 3419 | goto bread_err2; |
3436 | 3420 | ||
3437 | xlog_unpack_data(rhead, offset, log); | 3421 | error = xlog_unpack_data(rhead, offset, log); |
3438 | if ((error = xlog_recover_process_data(log, | 3422 | if (error) |
3439 | rhash, rhead, offset, pass))) | 3423 | goto bread_err2; |
3424 | |||
3425 | error = xlog_recover_process_data(log, | ||
3426 | rhash, rhead, offset, pass); | ||
3427 | if (error) | ||
3440 | goto bread_err2; | 3428 | goto bread_err2; |
3441 | blk_no += bblks + hblks; | 3429 | blk_no += bblks + hblks; |
3442 | } | 3430 | } |
@@ -3546,9 +3534,14 @@ xlog_do_recovery_pass( | |||
3546 | if (error) | 3534 | if (error) |
3547 | goto bread_err2; | 3535 | goto bread_err2; |
3548 | } | 3536 | } |
3549 | xlog_unpack_data(rhead, offset, log); | 3537 | |
3550 | if ((error = xlog_recover_process_data(log, rhash, | 3538 | error = xlog_unpack_data(rhead, offset, log); |
3551 | rhead, offset, pass))) | 3539 | if (error) |
3540 | goto bread_err2; | ||
3541 | |||
3542 | error = xlog_recover_process_data(log, rhash, | ||
3543 | rhead, offset, pass); | ||
3544 | if (error) | ||
3552 | goto bread_err2; | 3545 | goto bread_err2; |
3553 | blk_no += bblks; | 3546 | blk_no += bblks; |
3554 | } | 3547 | } |
@@ -3573,9 +3566,13 @@ xlog_do_recovery_pass( | |||
3573 | if (error) | 3566 | if (error) |
3574 | goto bread_err2; | 3567 | goto bread_err2; |
3575 | 3568 | ||
3576 | xlog_unpack_data(rhead, offset, log); | 3569 | error = xlog_unpack_data(rhead, offset, log); |
3577 | if ((error = xlog_recover_process_data(log, rhash, | 3570 | if (error) |
3578 | rhead, offset, pass))) | 3571 | goto bread_err2; |
3572 | |||
3573 | error = xlog_recover_process_data(log, rhash, | ||
3574 | rhead, offset, pass); | ||
3575 | if (error) | ||
3579 | goto bread_err2; | 3576 | goto bread_err2; |
3580 | blk_no += bblks + hblks; | 3577 | blk_no += bblks + hblks; |
3581 | } | 3578 | } |
@@ -3689,13 +3686,14 @@ xlog_do_recover( | |||
3689 | 3686 | ||
3690 | /* | 3687 | /* |
3691 | * Now that we've finished replaying all buffer and inode | 3688 | * Now that we've finished replaying all buffer and inode |
3692 | * updates, re-read in the superblock. | 3689 | * updates, re-read in the superblock and reverify it. |
3693 | */ | 3690 | */ |
3694 | bp = xfs_getsb(log->l_mp, 0); | 3691 | bp = xfs_getsb(log->l_mp, 0); |
3695 | XFS_BUF_UNDONE(bp); | 3692 | XFS_BUF_UNDONE(bp); |
3696 | ASSERT(!(XFS_BUF_ISWRITE(bp))); | 3693 | ASSERT(!(XFS_BUF_ISWRITE(bp))); |
3697 | XFS_BUF_READ(bp); | 3694 | XFS_BUF_READ(bp); |
3698 | XFS_BUF_UNASYNC(bp); | 3695 | XFS_BUF_UNASYNC(bp); |
3696 | bp->b_ops = &xfs_sb_buf_ops; | ||
3699 | xfsbdstrat(log->l_mp, bp); | 3697 | xfsbdstrat(log->l_mp, bp); |
3700 | error = xfs_buf_iowait(bp); | 3698 | error = xfs_buf_iowait(bp); |
3701 | if (error) { | 3699 | if (error) { |
@@ -3707,7 +3705,7 @@ xlog_do_recover( | |||
3707 | 3705 | ||
3708 | /* Convert superblock from on-disk format */ | 3706 | /* Convert superblock from on-disk format */ |
3709 | sbp = &log->l_mp->m_sb; | 3707 | sbp = &log->l_mp->m_sb; |
3710 | xfs_sb_from_disk(log->l_mp, XFS_BUF_TO_SBP(bp)); | 3708 | xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); |
3711 | ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); | 3709 | ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); |
3712 | ASSERT(xfs_sb_good_version(sbp)); | 3710 | ASSERT(xfs_sb_good_version(sbp)); |
3713 | xfs_buf_relse(bp); | 3711 | xfs_buf_relse(bp); |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index b2bd3a0e6376..da508463ff10 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include "xfs_fsops.h" | 42 | #include "xfs_fsops.h" |
43 | #include "xfs_utils.h" | 43 | #include "xfs_utils.h" |
44 | #include "xfs_trace.h" | 44 | #include "xfs_trace.h" |
45 | #include "xfs_icache.h" | ||
45 | 46 | ||
46 | 47 | ||
47 | #ifdef HAVE_PERCPU_SB | 48 | #ifdef HAVE_PERCPU_SB |
@@ -303,9 +304,8 @@ STATIC int | |||
303 | xfs_mount_validate_sb( | 304 | xfs_mount_validate_sb( |
304 | xfs_mount_t *mp, | 305 | xfs_mount_t *mp, |
305 | xfs_sb_t *sbp, | 306 | xfs_sb_t *sbp, |
306 | int flags) | 307 | bool check_inprogress) |
307 | { | 308 | { |
308 | int loud = !(flags & XFS_MFSI_QUIET); | ||
309 | 309 | ||
310 | /* | 310 | /* |
311 | * If the log device and data device have the | 311 | * If the log device and data device have the |
@@ -315,21 +315,18 @@ xfs_mount_validate_sb( | |||
315 | * a volume filesystem in a non-volume manner. | 315 | * a volume filesystem in a non-volume manner. |
316 | */ | 316 | */ |
317 | if (sbp->sb_magicnum != XFS_SB_MAGIC) { | 317 | if (sbp->sb_magicnum != XFS_SB_MAGIC) { |
318 | if (loud) | 318 | xfs_warn(mp, "bad magic number"); |
319 | xfs_warn(mp, "bad magic number"); | ||
320 | return XFS_ERROR(EWRONGFS); | 319 | return XFS_ERROR(EWRONGFS); |
321 | } | 320 | } |
322 | 321 | ||
323 | if (!xfs_sb_good_version(sbp)) { | 322 | if (!xfs_sb_good_version(sbp)) { |
324 | if (loud) | 323 | xfs_warn(mp, "bad version"); |
325 | xfs_warn(mp, "bad version"); | ||
326 | return XFS_ERROR(EWRONGFS); | 324 | return XFS_ERROR(EWRONGFS); |
327 | } | 325 | } |
328 | 326 | ||
329 | if (unlikely( | 327 | if (unlikely( |
330 | sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { | 328 | sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { |
331 | if (loud) | 329 | xfs_warn(mp, |
332 | xfs_warn(mp, | ||
333 | "filesystem is marked as having an external log; " | 330 | "filesystem is marked as having an external log; " |
334 | "specify logdev on the mount command line."); | 331 | "specify logdev on the mount command line."); |
335 | return XFS_ERROR(EINVAL); | 332 | return XFS_ERROR(EINVAL); |
@@ -337,8 +334,7 @@ xfs_mount_validate_sb( | |||
337 | 334 | ||
338 | if (unlikely( | 335 | if (unlikely( |
339 | sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { | 336 | sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { |
340 | if (loud) | 337 | xfs_warn(mp, |
341 | xfs_warn(mp, | ||
342 | "filesystem is marked as having an internal log; " | 338 | "filesystem is marked as having an internal log; " |
343 | "do not specify logdev on the mount command line."); | 339 | "do not specify logdev on the mount command line."); |
344 | return XFS_ERROR(EINVAL); | 340 | return XFS_ERROR(EINVAL); |
@@ -372,8 +368,7 @@ xfs_mount_validate_sb( | |||
372 | sbp->sb_dblocks == 0 || | 368 | sbp->sb_dblocks == 0 || |
373 | sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || | 369 | sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || |
374 | sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) { | 370 | sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) { |
375 | if (loud) | 371 | XFS_CORRUPTION_ERROR("SB sanity check failed", |
376 | XFS_CORRUPTION_ERROR("SB sanity check failed", | ||
377 | XFS_ERRLEVEL_LOW, mp, sbp); | 372 | XFS_ERRLEVEL_LOW, mp, sbp); |
378 | return XFS_ERROR(EFSCORRUPTED); | 373 | return XFS_ERROR(EFSCORRUPTED); |
379 | } | 374 | } |
@@ -382,12 +377,10 @@ xfs_mount_validate_sb( | |||
382 | * Until this is fixed only page-sized or smaller data blocks work. | 377 | * Until this is fixed only page-sized or smaller data blocks work. |
383 | */ | 378 | */ |
384 | if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { | 379 | if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { |
385 | if (loud) { | 380 | xfs_warn(mp, |
386 | xfs_warn(mp, | ||
387 | "File system with blocksize %d bytes. " | 381 | "File system with blocksize %d bytes. " |
388 | "Only pagesize (%ld) or less will currently work.", | 382 | "Only pagesize (%ld) or less will currently work.", |
389 | sbp->sb_blocksize, PAGE_SIZE); | 383 | sbp->sb_blocksize, PAGE_SIZE); |
390 | } | ||
391 | return XFS_ERROR(ENOSYS); | 384 | return XFS_ERROR(ENOSYS); |
392 | } | 385 | } |
393 | 386 | ||
@@ -401,23 +394,20 @@ xfs_mount_validate_sb( | |||
401 | case 2048: | 394 | case 2048: |
402 | break; | 395 | break; |
403 | default: | 396 | default: |
404 | if (loud) | 397 | xfs_warn(mp, "inode size of %d bytes not supported", |
405 | xfs_warn(mp, "inode size of %d bytes not supported", | ||
406 | sbp->sb_inodesize); | 398 | sbp->sb_inodesize); |
407 | return XFS_ERROR(ENOSYS); | 399 | return XFS_ERROR(ENOSYS); |
408 | } | 400 | } |
409 | 401 | ||
410 | if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || | 402 | if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || |
411 | xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { | 403 | xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { |
412 | if (loud) | 404 | xfs_warn(mp, |
413 | xfs_warn(mp, | ||
414 | "file system too large to be mounted on this system."); | 405 | "file system too large to be mounted on this system."); |
415 | return XFS_ERROR(EFBIG); | 406 | return XFS_ERROR(EFBIG); |
416 | } | 407 | } |
417 | 408 | ||
418 | if (unlikely(sbp->sb_inprogress)) { | 409 | if (check_inprogress && sbp->sb_inprogress) { |
419 | if (loud) | 410 | xfs_warn(mp, "Offline file system operation in progress!"); |
420 | xfs_warn(mp, "file system busy"); | ||
421 | return XFS_ERROR(EFSCORRUPTED); | 411 | return XFS_ERROR(EFSCORRUPTED); |
422 | } | 412 | } |
423 | 413 | ||
@@ -425,9 +415,7 @@ xfs_mount_validate_sb( | |||
425 | * Version 1 directory format has never worked on Linux. | 415 | * Version 1 directory format has never worked on Linux. |
426 | */ | 416 | */ |
427 | if (unlikely(!xfs_sb_version_hasdirv2(sbp))) { | 417 | if (unlikely(!xfs_sb_version_hasdirv2(sbp))) { |
428 | if (loud) | 418 | xfs_warn(mp, "file system using version 1 directory format"); |
429 | xfs_warn(mp, | ||
430 | "file system using version 1 directory format"); | ||
431 | return XFS_ERROR(ENOSYS); | 419 | return XFS_ERROR(ENOSYS); |
432 | } | 420 | } |
433 | 421 | ||
@@ -520,11 +508,9 @@ out_unwind: | |||
520 | 508 | ||
521 | void | 509 | void |
522 | xfs_sb_from_disk( | 510 | xfs_sb_from_disk( |
523 | struct xfs_mount *mp, | 511 | struct xfs_sb *to, |
524 | xfs_dsb_t *from) | 512 | xfs_dsb_t *from) |
525 | { | 513 | { |
526 | struct xfs_sb *to = &mp->m_sb; | ||
527 | |||
528 | to->sb_magicnum = be32_to_cpu(from->sb_magicnum); | 514 | to->sb_magicnum = be32_to_cpu(from->sb_magicnum); |
529 | to->sb_blocksize = be32_to_cpu(from->sb_blocksize); | 515 | to->sb_blocksize = be32_to_cpu(from->sb_blocksize); |
530 | to->sb_dblocks = be64_to_cpu(from->sb_dblocks); | 516 | to->sb_dblocks = be64_to_cpu(from->sb_dblocks); |
@@ -626,6 +612,72 @@ xfs_sb_to_disk( | |||
626 | } | 612 | } |
627 | } | 613 | } |
628 | 614 | ||
615 | static void | ||
616 | xfs_sb_verify( | ||
617 | struct xfs_buf *bp) | ||
618 | { | ||
619 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
620 | struct xfs_sb sb; | ||
621 | int error; | ||
622 | |||
623 | xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp)); | ||
624 | |||
625 | /* | ||
626 | * Only check the in progress field for the primary superblock as | ||
627 | * mkfs.xfs doesn't clear it from secondary superblocks. | ||
628 | */ | ||
629 | error = xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR); | ||
630 | if (error) | ||
631 | xfs_buf_ioerror(bp, error); | ||
632 | } | ||
633 | |||
634 | static void | ||
635 | xfs_sb_read_verify( | ||
636 | struct xfs_buf *bp) | ||
637 | { | ||
638 | xfs_sb_verify(bp); | ||
639 | } | ||
640 | |||
641 | /* | ||
642 | * We may be probed for a filesystem match, so we may not want to emit | ||
643 | * messages when the superblock buffer is not actually an XFS superblock. | ||
644 | * If we find an XFS superblock, the run a normal, noisy mount because we are | ||
645 | * really going to mount it and want to know about errors. | ||
646 | */ | ||
647 | static void | ||
648 | xfs_sb_quiet_read_verify( | ||
649 | struct xfs_buf *bp) | ||
650 | { | ||
651 | struct xfs_sb sb; | ||
652 | |||
653 | xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp)); | ||
654 | |||
655 | if (sb.sb_magicnum == XFS_SB_MAGIC) { | ||
656 | /* XFS filesystem, verify noisily! */ | ||
657 | xfs_sb_read_verify(bp); | ||
658 | return; | ||
659 | } | ||
660 | /* quietly fail */ | ||
661 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
662 | } | ||
663 | |||
664 | static void | ||
665 | xfs_sb_write_verify( | ||
666 | struct xfs_buf *bp) | ||
667 | { | ||
668 | xfs_sb_verify(bp); | ||
669 | } | ||
670 | |||
671 | const struct xfs_buf_ops xfs_sb_buf_ops = { | ||
672 | .verify_read = xfs_sb_read_verify, | ||
673 | .verify_write = xfs_sb_write_verify, | ||
674 | }; | ||
675 | |||
676 | static const struct xfs_buf_ops xfs_sb_quiet_buf_ops = { | ||
677 | .verify_read = xfs_sb_quiet_read_verify, | ||
678 | .verify_write = xfs_sb_write_verify, | ||
679 | }; | ||
680 | |||
629 | /* | 681 | /* |
630 | * xfs_readsb | 682 | * xfs_readsb |
631 | * | 683 | * |
@@ -651,26 +703,27 @@ xfs_readsb(xfs_mount_t *mp, int flags) | |||
651 | 703 | ||
652 | reread: | 704 | reread: |
653 | bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, | 705 | bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, |
654 | BTOBB(sector_size), 0); | 706 | BTOBB(sector_size), 0, |
707 | loud ? &xfs_sb_buf_ops | ||
708 | : &xfs_sb_quiet_buf_ops); | ||
655 | if (!bp) { | 709 | if (!bp) { |
656 | if (loud) | 710 | if (loud) |
657 | xfs_warn(mp, "SB buffer read failed"); | 711 | xfs_warn(mp, "SB buffer read failed"); |
658 | return EIO; | 712 | return EIO; |
659 | } | 713 | } |
660 | 714 | if (bp->b_error) { | |
661 | /* | 715 | error = bp->b_error; |
662 | * Initialize the mount structure from the superblock. | ||
663 | * But first do some basic consistency checking. | ||
664 | */ | ||
665 | xfs_sb_from_disk(mp, XFS_BUF_TO_SBP(bp)); | ||
666 | error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); | ||
667 | if (error) { | ||
668 | if (loud) | 716 | if (loud) |
669 | xfs_warn(mp, "SB validate failed"); | 717 | xfs_warn(mp, "SB validate failed"); |
670 | goto release_buf; | 718 | goto release_buf; |
671 | } | 719 | } |
672 | 720 | ||
673 | /* | 721 | /* |
722 | * Initialize the mount structure from the superblock. | ||
723 | */ | ||
724 | xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); | ||
725 | |||
726 | /* | ||
674 | * We must be able to do sector-sized and sector-aligned IO. | 727 | * We must be able to do sector-sized and sector-aligned IO. |
675 | */ | 728 | */ |
676 | if (sector_size > mp->m_sb.sb_sectsize) { | 729 | if (sector_size > mp->m_sb.sb_sectsize) { |
@@ -1001,7 +1054,7 @@ xfs_check_sizes(xfs_mount_t *mp) | |||
1001 | } | 1054 | } |
1002 | bp = xfs_buf_read_uncached(mp->m_ddev_targp, | 1055 | bp = xfs_buf_read_uncached(mp->m_ddev_targp, |
1003 | d - XFS_FSS_TO_BB(mp, 1), | 1056 | d - XFS_FSS_TO_BB(mp, 1), |
1004 | XFS_FSS_TO_BB(mp, 1), 0); | 1057 | XFS_FSS_TO_BB(mp, 1), 0, NULL); |
1005 | if (!bp) { | 1058 | if (!bp) { |
1006 | xfs_warn(mp, "last sector read failed"); | 1059 | xfs_warn(mp, "last sector read failed"); |
1007 | return EIO; | 1060 | return EIO; |
@@ -1016,7 +1069,7 @@ xfs_check_sizes(xfs_mount_t *mp) | |||
1016 | } | 1069 | } |
1017 | bp = xfs_buf_read_uncached(mp->m_logdev_targp, | 1070 | bp = xfs_buf_read_uncached(mp->m_logdev_targp, |
1018 | d - XFS_FSB_TO_BB(mp, 1), | 1071 | d - XFS_FSB_TO_BB(mp, 1), |
1019 | XFS_FSB_TO_BB(mp, 1), 0); | 1072 | XFS_FSB_TO_BB(mp, 1), 0, NULL); |
1020 | if (!bp) { | 1073 | if (!bp) { |
1021 | xfs_warn(mp, "log device read failed"); | 1074 | xfs_warn(mp, "log device read failed"); |
1022 | return EIO; | 1075 | return EIO; |
@@ -1427,6 +1480,8 @@ xfs_unmountfs( | |||
1427 | __uint64_t resblks; | 1480 | __uint64_t resblks; |
1428 | int error; | 1481 | int error; |
1429 | 1482 | ||
1483 | cancel_delayed_work_sync(&mp->m_eofblocks_work); | ||
1484 | |||
1430 | xfs_qm_unmount_quotas(mp); | 1485 | xfs_qm_unmount_quotas(mp); |
1431 | xfs_rtunmount_inodes(mp); | 1486 | xfs_rtunmount_inodes(mp); |
1432 | IRELE(mp->m_rootip); | 1487 | IRELE(mp->m_rootip); |
@@ -1450,21 +1505,16 @@ xfs_unmountfs( | |||
1450 | 1505 | ||
1451 | /* | 1506 | /* |
1452 | * And reclaim all inodes. At this point there should be no dirty | 1507 | * And reclaim all inodes. At this point there should be no dirty |
1453 | * inode, and none should be pinned or locked, but use synchronous | 1508 | * inodes and none should be pinned or locked, but use synchronous |
1454 | * reclaim just to be sure. | 1509 | * reclaim just to be sure. We can stop background inode reclaim |
1510 | * here as well if it is still running. | ||
1455 | */ | 1511 | */ |
1512 | cancel_delayed_work_sync(&mp->m_reclaim_work); | ||
1456 | xfs_reclaim_inodes(mp, SYNC_WAIT); | 1513 | xfs_reclaim_inodes(mp, SYNC_WAIT); |
1457 | 1514 | ||
1458 | xfs_qm_unmount(mp); | 1515 | xfs_qm_unmount(mp); |
1459 | 1516 | ||
1460 | /* | 1517 | /* |
1461 | * Flush out the log synchronously so that we know for sure | ||
1462 | * that nothing is pinned. This is important because bflush() | ||
1463 | * will skip pinned buffers. | ||
1464 | */ | ||
1465 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
1466 | |||
1467 | /* | ||
1468 | * Unreserve any blocks we have so that when we unmount we don't account | 1518 | * Unreserve any blocks we have so that when we unmount we don't account |
1469 | * the reserved free space as used. This is really only necessary for | 1519 | * the reserved free space as used. This is really only necessary for |
1470 | * lazy superblock counting because it trusts the incore superblock | 1520 | * lazy superblock counting because it trusts the incore superblock |
@@ -1489,23 +1539,6 @@ xfs_unmountfs( | |||
1489 | xfs_warn(mp, "Unable to update superblock counters. " | 1539 | xfs_warn(mp, "Unable to update superblock counters. " |
1490 | "Freespace may not be correct on next mount."); | 1540 | "Freespace may not be correct on next mount."); |
1491 | 1541 | ||
1492 | /* | ||
1493 | * At this point we might have modified the superblock again and thus | ||
1494 | * added an item to the AIL, thus flush it again. | ||
1495 | */ | ||
1496 | xfs_ail_push_all_sync(mp->m_ail); | ||
1497 | xfs_wait_buftarg(mp->m_ddev_targp); | ||
1498 | |||
1499 | /* | ||
1500 | * The superblock buffer is uncached and xfsaild_push() will lock and | ||
1501 | * set the XBF_ASYNC flag on the buffer. We cannot do xfs_buf_iowait() | ||
1502 | * here but a lock on the superblock buffer will block until iodone() | ||
1503 | * has completed. | ||
1504 | */ | ||
1505 | xfs_buf_lock(mp->m_sb_bp); | ||
1506 | xfs_buf_unlock(mp->m_sb_bp); | ||
1507 | |||
1508 | xfs_log_unmount_write(mp); | ||
1509 | xfs_log_unmount(mp); | 1542 | xfs_log_unmount(mp); |
1510 | xfs_uuid_unmount(mp); | 1543 | xfs_uuid_unmount(mp); |
1511 | 1544 | ||
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index deee09e534dc..bab8314507e4 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -51,8 +51,6 @@ typedef struct xfs_trans_reservations { | |||
51 | 51 | ||
52 | #else /* __KERNEL__ */ | 52 | #else /* __KERNEL__ */ |
53 | 53 | ||
54 | #include "xfs_sync.h" | ||
55 | |||
56 | struct xlog; | 54 | struct xlog; |
57 | struct xfs_inode; | 55 | struct xfs_inode; |
58 | struct xfs_mru_cache; | 56 | struct xfs_mru_cache; |
@@ -197,9 +195,9 @@ typedef struct xfs_mount { | |||
197 | struct mutex m_icsb_mutex; /* balancer sync lock */ | 195 | struct mutex m_icsb_mutex; /* balancer sync lock */ |
198 | #endif | 196 | #endif |
199 | struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ | 197 | struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ |
200 | struct delayed_work m_sync_work; /* background sync work */ | ||
201 | struct delayed_work m_reclaim_work; /* background inode reclaim */ | 198 | struct delayed_work m_reclaim_work; /* background inode reclaim */ |
202 | struct work_struct m_flush_work; /* background inode flush */ | 199 | struct delayed_work m_eofblocks_work; /* background eof blocks |
200 | trimming */ | ||
203 | __int64_t m_update_flags; /* sb flags we need to update | 201 | __int64_t m_update_flags; /* sb flags we need to update |
204 | on the next remount,rw */ | 202 | on the next remount,rw */ |
205 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ | 203 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ |
@@ -209,6 +207,9 @@ typedef struct xfs_mount { | |||
209 | struct workqueue_struct *m_data_workqueue; | 207 | struct workqueue_struct *m_data_workqueue; |
210 | struct workqueue_struct *m_unwritten_workqueue; | 208 | struct workqueue_struct *m_unwritten_workqueue; |
211 | struct workqueue_struct *m_cil_workqueue; | 209 | struct workqueue_struct *m_cil_workqueue; |
210 | struct workqueue_struct *m_reclaim_workqueue; | ||
211 | struct workqueue_struct *m_log_workqueue; | ||
212 | struct workqueue_struct *m_eofblocks_workqueue; | ||
212 | } xfs_mount_t; | 213 | } xfs_mount_t; |
213 | 214 | ||
214 | /* | 215 | /* |
@@ -387,7 +388,9 @@ extern void xfs_set_low_space_thresholds(struct xfs_mount *); | |||
387 | extern void xfs_mod_sb(struct xfs_trans *, __int64_t); | 388 | extern void xfs_mod_sb(struct xfs_trans *, __int64_t); |
388 | extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t, | 389 | extern int xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t, |
389 | xfs_agnumber_t *); | 390 | xfs_agnumber_t *); |
390 | extern void xfs_sb_from_disk(struct xfs_mount *, struct xfs_dsb *); | 391 | extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); |
391 | extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); | 392 | extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); |
392 | 393 | ||
394 | extern const struct xfs_buf_ops xfs_sb_buf_ops; | ||
395 | |||
393 | #endif /* __XFS_MOUNT_H__ */ | 396 | #endif /* __XFS_MOUNT_H__ */ |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 2e86fa0cfc0d..60eff4763156 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include "xfs_utils.h" | 40 | #include "xfs_utils.h" |
41 | #include "xfs_qm.h" | 41 | #include "xfs_qm.h" |
42 | #include "xfs_trace.h" | 42 | #include "xfs_trace.h" |
43 | #include "xfs_icache.h" | ||
43 | 44 | ||
44 | /* | 45 | /* |
45 | * The global quota manager. There is only one of these for the entire | 46 | * The global quota manager. There is only one of these for the entire |
@@ -891,7 +892,8 @@ xfs_qm_dqiter_bufs( | |||
891 | while (blkcnt--) { | 892 | while (blkcnt--) { |
892 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, | 893 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, |
893 | XFS_FSB_TO_DADDR(mp, bno), | 894 | XFS_FSB_TO_DADDR(mp, bno), |
894 | mp->m_quotainfo->qi_dqchunklen, 0, &bp); | 895 | mp->m_quotainfo->qi_dqchunklen, 0, &bp, |
896 | &xfs_dquot_buf_ops); | ||
895 | if (error) | 897 | if (error) |
896 | break; | 898 | break; |
897 | 899 | ||
@@ -978,7 +980,8 @@ xfs_qm_dqiterate( | |||
978 | while (rablkcnt--) { | 980 | while (rablkcnt--) { |
979 | xfs_buf_readahead(mp->m_ddev_targp, | 981 | xfs_buf_readahead(mp->m_ddev_targp, |
980 | XFS_FSB_TO_DADDR(mp, rablkno), | 982 | XFS_FSB_TO_DADDR(mp, rablkno), |
981 | mp->m_quotainfo->qi_dqchunklen); | 983 | mp->m_quotainfo->qi_dqchunklen, |
984 | NULL); | ||
982 | rablkno++; | 985 | rablkno++; |
983 | } | 986 | } |
984 | } | 987 | } |
@@ -1453,7 +1456,7 @@ xfs_qm_dqreclaim_one( | |||
1453 | int error; | 1456 | int error; |
1454 | 1457 | ||
1455 | if (!xfs_dqlock_nowait(dqp)) | 1458 | if (!xfs_dqlock_nowait(dqp)) |
1456 | goto out_busy; | 1459 | goto out_move_tail; |
1457 | 1460 | ||
1458 | /* | 1461 | /* |
1459 | * This dquot has acquired a reference in the meantime remove it from | 1462 | * This dquot has acquired a reference in the meantime remove it from |
@@ -1476,7 +1479,7 @@ xfs_qm_dqreclaim_one( | |||
1476 | * getting flushed to disk, we don't want to reclaim it. | 1479 | * getting flushed to disk, we don't want to reclaim it. |
1477 | */ | 1480 | */ |
1478 | if (!xfs_dqflock_nowait(dqp)) | 1481 | if (!xfs_dqflock_nowait(dqp)) |
1479 | goto out_busy; | 1482 | goto out_unlock_move_tail; |
1480 | 1483 | ||
1481 | if (XFS_DQ_IS_DIRTY(dqp)) { | 1484 | if (XFS_DQ_IS_DIRTY(dqp)) { |
1482 | struct xfs_buf *bp = NULL; | 1485 | struct xfs_buf *bp = NULL; |
@@ -1487,7 +1490,7 @@ xfs_qm_dqreclaim_one( | |||
1487 | if (error) { | 1490 | if (error) { |
1488 | xfs_warn(mp, "%s: dquot %p flush failed", | 1491 | xfs_warn(mp, "%s: dquot %p flush failed", |
1489 | __func__, dqp); | 1492 | __func__, dqp); |
1490 | goto out_busy; | 1493 | goto out_unlock_move_tail; |
1491 | } | 1494 | } |
1492 | 1495 | ||
1493 | xfs_buf_delwri_queue(bp, buffer_list); | 1496 | xfs_buf_delwri_queue(bp, buffer_list); |
@@ -1496,7 +1499,7 @@ xfs_qm_dqreclaim_one( | |||
1496 | * Give the dquot another try on the freelist, as the | 1499 | * Give the dquot another try on the freelist, as the |
1497 | * flushing will take some time. | 1500 | * flushing will take some time. |
1498 | */ | 1501 | */ |
1499 | goto out_busy; | 1502 | goto out_unlock_move_tail; |
1500 | } | 1503 | } |
1501 | xfs_dqfunlock(dqp); | 1504 | xfs_dqfunlock(dqp); |
1502 | 1505 | ||
@@ -1515,14 +1518,13 @@ xfs_qm_dqreclaim_one( | |||
1515 | XFS_STATS_INC(xs_qm_dqreclaims); | 1518 | XFS_STATS_INC(xs_qm_dqreclaims); |
1516 | return; | 1519 | return; |
1517 | 1520 | ||
1518 | out_busy: | ||
1519 | xfs_dqunlock(dqp); | ||
1520 | |||
1521 | /* | 1521 | /* |
1522 | * Move the dquot to the tail of the list so that we don't spin on it. | 1522 | * Move the dquot to the tail of the list so that we don't spin on it. |
1523 | */ | 1523 | */ |
1524 | out_unlock_move_tail: | ||
1525 | xfs_dqunlock(dqp); | ||
1526 | out_move_tail: | ||
1524 | list_move_tail(&dqp->q_lru, &qi->qi_lru_list); | 1527 | list_move_tail(&dqp->q_lru, &qi->qi_lru_list); |
1525 | |||
1526 | trace_xfs_dqreclaim_busy(dqp); | 1528 | trace_xfs_dqreclaim_busy(dqp); |
1527 | XFS_STATS_INC(xs_qm_dqreclaim_misses); | 1529 | XFS_STATS_INC(xs_qm_dqreclaim_misses); |
1528 | } | 1530 | } |
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 858a3b186110..5f53e75409b8 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include "xfs_utils.h" | 40 | #include "xfs_utils.h" |
41 | #include "xfs_qm.h" | 41 | #include "xfs_qm.h" |
42 | #include "xfs_trace.h" | 42 | #include "xfs_trace.h" |
43 | #include "xfs_icache.h" | ||
43 | 44 | ||
44 | STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); | 45 | STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); |
45 | STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, | 46 | STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, |
@@ -845,7 +846,8 @@ STATIC int | |||
845 | xfs_dqrele_inode( | 846 | xfs_dqrele_inode( |
846 | struct xfs_inode *ip, | 847 | struct xfs_inode *ip, |
847 | struct xfs_perag *pag, | 848 | struct xfs_perag *pag, |
848 | int flags) | 849 | int flags, |
850 | void *args) | ||
849 | { | 851 | { |
850 | /* skip quota inodes */ | 852 | /* skip quota inodes */ |
851 | if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || | 853 | if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || |
@@ -881,5 +883,5 @@ xfs_qm_dqrele_all_inodes( | |||
881 | uint flags) | 883 | uint flags) |
882 | { | 884 | { |
883 | ASSERT(mp->m_quotainfo); | 885 | ASSERT(mp->m_quotainfo); |
884 | xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags); | 886 | xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, NULL); |
885 | } | 887 | } |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index ca28a4ba4b54..98dc670d3ee0 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include "xfs_utils.h" | 38 | #include "xfs_utils.h" |
39 | #include "xfs_trace.h" | 39 | #include "xfs_trace.h" |
40 | #include "xfs_buf.h" | 40 | #include "xfs_buf.h" |
41 | #include "xfs_icache.h" | ||
41 | 42 | ||
42 | 43 | ||
43 | /* | 44 | /* |
@@ -869,7 +870,7 @@ xfs_rtbuf_get( | |||
869 | ASSERT(map.br_startblock != NULLFSBLOCK); | 870 | ASSERT(map.br_startblock != NULLFSBLOCK); |
870 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, | 871 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, |
871 | XFS_FSB_TO_DADDR(mp, map.br_startblock), | 872 | XFS_FSB_TO_DADDR(mp, map.br_startblock), |
872 | mp->m_bsize, 0, &bp); | 873 | mp->m_bsize, 0, &bp, NULL); |
873 | if (error) | 874 | if (error) |
874 | return error; | 875 | return error; |
875 | ASSERT(!xfs_buf_geterror(bp)); | 876 | ASSERT(!xfs_buf_geterror(bp)); |
@@ -1872,9 +1873,14 @@ xfs_growfs_rt( | |||
1872 | */ | 1873 | */ |
1873 | bp = xfs_buf_read_uncached(mp->m_rtdev_targp, | 1874 | bp = xfs_buf_read_uncached(mp->m_rtdev_targp, |
1874 | XFS_FSB_TO_BB(mp, nrblocks - 1), | 1875 | XFS_FSB_TO_BB(mp, nrblocks - 1), |
1875 | XFS_FSB_TO_BB(mp, 1), 0); | 1876 | XFS_FSB_TO_BB(mp, 1), 0, NULL); |
1876 | if (!bp) | 1877 | if (!bp) |
1877 | return EIO; | 1878 | return EIO; |
1879 | if (bp->b_error) { | ||
1880 | error = bp->b_error; | ||
1881 | xfs_buf_relse(bp); | ||
1882 | return error; | ||
1883 | } | ||
1878 | xfs_buf_relse(bp); | 1884 | xfs_buf_relse(bp); |
1879 | 1885 | ||
1880 | /* | 1886 | /* |
@@ -2219,9 +2225,11 @@ xfs_rtmount_init( | |||
2219 | } | 2225 | } |
2220 | bp = xfs_buf_read_uncached(mp->m_rtdev_targp, | 2226 | bp = xfs_buf_read_uncached(mp->m_rtdev_targp, |
2221 | d - XFS_FSB_TO_BB(mp, 1), | 2227 | d - XFS_FSB_TO_BB(mp, 1), |
2222 | XFS_FSB_TO_BB(mp, 1), 0); | 2228 | XFS_FSB_TO_BB(mp, 1), 0, NULL); |
2223 | if (!bp) { | 2229 | if (!bp || bp->b_error) { |
2224 | xfs_warn(mp, "realtime device size check failed"); | 2230 | xfs_warn(mp, "realtime device size check failed"); |
2231 | if (bp) | ||
2232 | xfs_buf_relse(bp); | ||
2225 | return EIO; | 2233 | return EIO; |
2226 | } | 2234 | } |
2227 | xfs_buf_relse(bp); | 2235 | xfs_buf_relse(bp); |
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index f429d9d5d325..a05b45175fb0 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h | |||
@@ -81,6 +81,7 @@ struct xfs_mount; | |||
81 | #define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ | 81 | #define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ |
82 | #define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */ | 82 | #define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */ |
83 | #define XFS_SB_VERSION2_PROJID32BIT 0x00000080 /* 32 bit project id */ | 83 | #define XFS_SB_VERSION2_PROJID32BIT 0x00000080 /* 32 bit project id */ |
84 | #define XFS_SB_VERSION2_CRCBIT 0x00000100 /* metadata CRCs */ | ||
84 | 85 | ||
85 | #define XFS_SB_VERSION2_OKREALFBITS \ | 86 | #define XFS_SB_VERSION2_OKREALFBITS \ |
86 | (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ | 87 | (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ |
@@ -503,6 +504,12 @@ static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp) | |||
503 | (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT); | 504 | (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT); |
504 | } | 505 | } |
505 | 506 | ||
507 | static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp) | ||
508 | { | ||
509 | return (xfs_sb_version_hasmorebits(sbp) && | ||
510 | (sbp->sb_features2 & XFS_SB_VERSION2_CRCBIT)); | ||
511 | } | ||
512 | |||
506 | /* | 513 | /* |
507 | * end of superblock version macros | 514 | * end of superblock version macros |
508 | */ | 515 | */ |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 26a09bd7f975..ab8839b26272 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -49,7 +49,7 @@ | |||
49 | #include "xfs_extfree_item.h" | 49 | #include "xfs_extfree_item.h" |
50 | #include "xfs_mru_cache.h" | 50 | #include "xfs_mru_cache.h" |
51 | #include "xfs_inode_item.h" | 51 | #include "xfs_inode_item.h" |
52 | #include "xfs_sync.h" | 52 | #include "xfs_icache.h" |
53 | #include "xfs_trace.h" | 53 | #include "xfs_trace.h" |
54 | 54 | ||
55 | #include <linux/namei.h> | 55 | #include <linux/namei.h> |
@@ -863,8 +863,30 @@ xfs_init_mount_workqueues( | |||
863 | WQ_MEM_RECLAIM, 0, mp->m_fsname); | 863 | WQ_MEM_RECLAIM, 0, mp->m_fsname); |
864 | if (!mp->m_cil_workqueue) | 864 | if (!mp->m_cil_workqueue) |
865 | goto out_destroy_unwritten; | 865 | goto out_destroy_unwritten; |
866 | |||
867 | mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", | ||
868 | WQ_NON_REENTRANT, 0, mp->m_fsname); | ||
869 | if (!mp->m_reclaim_workqueue) | ||
870 | goto out_destroy_cil; | ||
871 | |||
872 | mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", | ||
873 | WQ_NON_REENTRANT, 0, mp->m_fsname); | ||
874 | if (!mp->m_log_workqueue) | ||
875 | goto out_destroy_reclaim; | ||
876 | |||
877 | mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", | ||
878 | WQ_NON_REENTRANT, 0, mp->m_fsname); | ||
879 | if (!mp->m_eofblocks_workqueue) | ||
880 | goto out_destroy_log; | ||
881 | |||
866 | return 0; | 882 | return 0; |
867 | 883 | ||
884 | out_destroy_log: | ||
885 | destroy_workqueue(mp->m_log_workqueue); | ||
886 | out_destroy_reclaim: | ||
887 | destroy_workqueue(mp->m_reclaim_workqueue); | ||
888 | out_destroy_cil: | ||
889 | destroy_workqueue(mp->m_cil_workqueue); | ||
868 | out_destroy_unwritten: | 890 | out_destroy_unwritten: |
869 | destroy_workqueue(mp->m_unwritten_workqueue); | 891 | destroy_workqueue(mp->m_unwritten_workqueue); |
870 | out_destroy_data_iodone_queue: | 892 | out_destroy_data_iodone_queue: |
@@ -877,11 +899,32 @@ STATIC void | |||
877 | xfs_destroy_mount_workqueues( | 899 | xfs_destroy_mount_workqueues( |
878 | struct xfs_mount *mp) | 900 | struct xfs_mount *mp) |
879 | { | 901 | { |
902 | destroy_workqueue(mp->m_eofblocks_workqueue); | ||
903 | destroy_workqueue(mp->m_log_workqueue); | ||
904 | destroy_workqueue(mp->m_reclaim_workqueue); | ||
880 | destroy_workqueue(mp->m_cil_workqueue); | 905 | destroy_workqueue(mp->m_cil_workqueue); |
881 | destroy_workqueue(mp->m_data_workqueue); | 906 | destroy_workqueue(mp->m_data_workqueue); |
882 | destroy_workqueue(mp->m_unwritten_workqueue); | 907 | destroy_workqueue(mp->m_unwritten_workqueue); |
883 | } | 908 | } |
884 | 909 | ||
910 | /* | ||
911 | * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK | ||
912 | * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting | ||
913 | * for IO to complete so that we effectively throttle multiple callers to the | ||
914 | * rate at which IO is completing. | ||
915 | */ | ||
916 | void | ||
917 | xfs_flush_inodes( | ||
918 | struct xfs_mount *mp) | ||
919 | { | ||
920 | struct super_block *sb = mp->m_super; | ||
921 | |||
922 | if (down_read_trylock(&sb->s_umount)) { | ||
923 | sync_inodes_sb(sb); | ||
924 | up_read(&sb->s_umount); | ||
925 | } | ||
926 | } | ||
927 | |||
885 | /* Catch misguided souls that try to use this interface on XFS */ | 928 | /* Catch misguided souls that try to use this interface on XFS */ |
886 | STATIC struct inode * | 929 | STATIC struct inode * |
887 | xfs_fs_alloc_inode( | 930 | xfs_fs_alloc_inode( |
@@ -1006,9 +1049,8 @@ xfs_fs_put_super( | |||
1006 | struct xfs_mount *mp = XFS_M(sb); | 1049 | struct xfs_mount *mp = XFS_M(sb); |
1007 | 1050 | ||
1008 | xfs_filestream_unmount(mp); | 1051 | xfs_filestream_unmount(mp); |
1009 | cancel_delayed_work_sync(&mp->m_sync_work); | ||
1010 | xfs_unmountfs(mp); | 1052 | xfs_unmountfs(mp); |
1011 | xfs_syncd_stop(mp); | 1053 | |
1012 | xfs_freesb(mp); | 1054 | xfs_freesb(mp); |
1013 | xfs_icsb_destroy_counters(mp); | 1055 | xfs_icsb_destroy_counters(mp); |
1014 | xfs_destroy_mount_workqueues(mp); | 1056 | xfs_destroy_mount_workqueues(mp); |
@@ -1023,7 +1065,6 @@ xfs_fs_sync_fs( | |||
1023 | int wait) | 1065 | int wait) |
1024 | { | 1066 | { |
1025 | struct xfs_mount *mp = XFS_M(sb); | 1067 | struct xfs_mount *mp = XFS_M(sb); |
1026 | int error; | ||
1027 | 1068 | ||
1028 | /* | 1069 | /* |
1029 | * Doing anything during the async pass would be counterproductive. | 1070 | * Doing anything during the async pass would be counterproductive. |
@@ -1031,17 +1072,14 @@ xfs_fs_sync_fs( | |||
1031 | if (!wait) | 1072 | if (!wait) |
1032 | return 0; | 1073 | return 0; |
1033 | 1074 | ||
1034 | error = xfs_quiesce_data(mp); | 1075 | xfs_log_force(mp, XFS_LOG_SYNC); |
1035 | if (error) | ||
1036 | return -error; | ||
1037 | |||
1038 | if (laptop_mode) { | 1076 | if (laptop_mode) { |
1039 | /* | 1077 | /* |
1040 | * The disk must be active because we're syncing. | 1078 | * The disk must be active because we're syncing. |
1041 | * We schedule xfssyncd now (now that the disk is | 1079 | * We schedule log work now (now that the disk is |
1042 | * active) instead of later (when it might not be). | 1080 | * active) instead of later (when it might not be). |
1043 | */ | 1081 | */ |
1044 | flush_delayed_work(&mp->m_sync_work); | 1082 | flush_delayed_work(&mp->m_log->l_work); |
1045 | } | 1083 | } |
1046 | 1084 | ||
1047 | return 0; | 1085 | return 0; |
@@ -1118,6 +1156,48 @@ xfs_restore_resvblks(struct xfs_mount *mp) | |||
1118 | xfs_reserve_blocks(mp, &resblks, NULL); | 1156 | xfs_reserve_blocks(mp, &resblks, NULL); |
1119 | } | 1157 | } |
1120 | 1158 | ||
1159 | /* | ||
1160 | * Trigger writeback of all the dirty metadata in the file system. | ||
1161 | * | ||
1162 | * This ensures that the metadata is written to their location on disk rather | ||
1163 | * than just existing in transactions in the log. This means after a quiesce | ||
1164 | * there is no log replay required to write the inodes to disk - this is the | ||
1165 | * primary difference between a sync and a quiesce. | ||
1166 | * | ||
1167 | * Note: xfs_log_quiesce() stops background log work - the callers must ensure | ||
1168 | * it is started again when appropriate. | ||
1169 | */ | ||
1170 | void | ||
1171 | xfs_quiesce_attr( | ||
1172 | struct xfs_mount *mp) | ||
1173 | { | ||
1174 | int error = 0; | ||
1175 | |||
1176 | /* wait for all modifications to complete */ | ||
1177 | while (atomic_read(&mp->m_active_trans) > 0) | ||
1178 | delay(100); | ||
1179 | |||
1180 | /* force the log to unpin objects from the now complete transactions */ | ||
1181 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
1182 | |||
1183 | /* reclaim inodes to do any IO before the freeze completes */ | ||
1184 | xfs_reclaim_inodes(mp, 0); | ||
1185 | xfs_reclaim_inodes(mp, SYNC_WAIT); | ||
1186 | |||
1187 | /* Push the superblock and write an unmount record */ | ||
1188 | error = xfs_log_sbcount(mp); | ||
1189 | if (error) | ||
1190 | xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " | ||
1191 | "Frozen image may not be consistent."); | ||
1192 | /* | ||
1193 | * Just warn here till VFS can correctly support | ||
1194 | * read-only remount without racing. | ||
1195 | */ | ||
1196 | WARN_ON(atomic_read(&mp->m_active_trans) != 0); | ||
1197 | |||
1198 | xfs_log_quiesce(mp); | ||
1199 | } | ||
1200 | |||
1121 | STATIC int | 1201 | STATIC int |
1122 | xfs_fs_remount( | 1202 | xfs_fs_remount( |
1123 | struct super_block *sb, | 1203 | struct super_block *sb, |
@@ -1198,20 +1278,18 @@ xfs_fs_remount( | |||
1198 | * value if it is non-zero, otherwise go with the default. | 1278 | * value if it is non-zero, otherwise go with the default. |
1199 | */ | 1279 | */ |
1200 | xfs_restore_resvblks(mp); | 1280 | xfs_restore_resvblks(mp); |
1281 | xfs_log_work_queue(mp); | ||
1201 | } | 1282 | } |
1202 | 1283 | ||
1203 | /* rw -> ro */ | 1284 | /* rw -> ro */ |
1204 | if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { | 1285 | if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { |
1205 | /* | 1286 | /* |
1206 | * After we have synced the data but before we sync the | 1287 | * Before we sync the metadata, we need to free up the reserve |
1207 | * metadata, we need to free up the reserve block pool so that | 1288 | * block pool so that the used block count in the superblock on |
1208 | * the used block count in the superblock on disk is correct at | 1289 | * disk is correct at the end of the remount. Stash the current |
1209 | * the end of the remount. Stash the current reserve pool size | 1290 | * reserve pool size so that if we get remounted rw, we can |
1210 | * so that if we get remounted rw, we can return it to the same | 1291 | * return it to the same size. |
1211 | * size. | ||
1212 | */ | 1292 | */ |
1213 | |||
1214 | xfs_quiesce_data(mp); | ||
1215 | xfs_save_resvblks(mp); | 1293 | xfs_save_resvblks(mp); |
1216 | xfs_quiesce_attr(mp); | 1294 | xfs_quiesce_attr(mp); |
1217 | mp->m_flags |= XFS_MOUNT_RDONLY; | 1295 | mp->m_flags |= XFS_MOUNT_RDONLY; |
@@ -1243,6 +1321,7 @@ xfs_fs_unfreeze( | |||
1243 | struct xfs_mount *mp = XFS_M(sb); | 1321 | struct xfs_mount *mp = XFS_M(sb); |
1244 | 1322 | ||
1245 | xfs_restore_resvblks(mp); | 1323 | xfs_restore_resvblks(mp); |
1324 | xfs_log_work_queue(mp); | ||
1246 | return 0; | 1325 | return 0; |
1247 | } | 1326 | } |
1248 | 1327 | ||
@@ -1321,6 +1400,8 @@ xfs_fs_fill_super( | |||
1321 | spin_lock_init(&mp->m_sb_lock); | 1400 | spin_lock_init(&mp->m_sb_lock); |
1322 | mutex_init(&mp->m_growlock); | 1401 | mutex_init(&mp->m_growlock); |
1323 | atomic_set(&mp->m_active_trans, 0); | 1402 | atomic_set(&mp->m_active_trans, 0); |
1403 | INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); | ||
1404 | INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); | ||
1324 | 1405 | ||
1325 | mp->m_super = sb; | 1406 | mp->m_super = sb; |
1326 | sb->s_fs_info = mp; | 1407 | sb->s_fs_info = mp; |
@@ -1371,10 +1452,6 @@ xfs_fs_fill_super( | |||
1371 | /* | 1452 | /* |
1372 | * we must configure the block size in the superblock before we run the | 1453 | * we must configure the block size in the superblock before we run the |
1373 | * full mount process as the mount process can lookup and cache inodes. | 1454 | * full mount process as the mount process can lookup and cache inodes. |
1374 | * For the same reason we must also initialise the syncd and register | ||
1375 | * the inode cache shrinker so that inodes can be reclaimed during | ||
1376 | * operations like a quotacheck that iterate all inodes in the | ||
1377 | * filesystem. | ||
1378 | */ | 1455 | */ |
1379 | sb->s_magic = XFS_SB_MAGIC; | 1456 | sb->s_magic = XFS_SB_MAGIC; |
1380 | sb->s_blocksize = mp->m_sb.sb_blocksize; | 1457 | sb->s_blocksize = mp->m_sb.sb_blocksize; |
@@ -1384,13 +1461,9 @@ xfs_fs_fill_super( | |||
1384 | sb->s_time_gran = 1; | 1461 | sb->s_time_gran = 1; |
1385 | set_posix_acl_flag(sb); | 1462 | set_posix_acl_flag(sb); |
1386 | 1463 | ||
1387 | error = xfs_syncd_init(mp); | ||
1388 | if (error) | ||
1389 | goto out_filestream_unmount; | ||
1390 | |||
1391 | error = xfs_mountfs(mp); | 1464 | error = xfs_mountfs(mp); |
1392 | if (error) | 1465 | if (error) |
1393 | goto out_syncd_stop; | 1466 | goto out_filestream_unmount; |
1394 | 1467 | ||
1395 | root = igrab(VFS_I(mp->m_rootip)); | 1468 | root = igrab(VFS_I(mp->m_rootip)); |
1396 | if (!root) { | 1469 | if (!root) { |
@@ -1408,8 +1481,7 @@ xfs_fs_fill_super( | |||
1408 | } | 1481 | } |
1409 | 1482 | ||
1410 | return 0; | 1483 | return 0; |
1411 | out_syncd_stop: | 1484 | |
1412 | xfs_syncd_stop(mp); | ||
1413 | out_filestream_unmount: | 1485 | out_filestream_unmount: |
1414 | xfs_filestream_unmount(mp); | 1486 | xfs_filestream_unmount(mp); |
1415 | out_free_sb: | 1487 | out_free_sb: |
@@ -1429,7 +1501,6 @@ out_destroy_workqueues: | |||
1429 | out_unmount: | 1501 | out_unmount: |
1430 | xfs_filestream_unmount(mp); | 1502 | xfs_filestream_unmount(mp); |
1431 | xfs_unmountfs(mp); | 1503 | xfs_unmountfs(mp); |
1432 | xfs_syncd_stop(mp); | ||
1433 | goto out_free_sb; | 1504 | goto out_free_sb; |
1434 | } | 1505 | } |
1435 | 1506 | ||
@@ -1625,16 +1696,6 @@ STATIC int __init | |||
1625 | xfs_init_workqueues(void) | 1696 | xfs_init_workqueues(void) |
1626 | { | 1697 | { |
1627 | /* | 1698 | /* |
1628 | * We never want to the same work item to run twice, reclaiming inodes | ||
1629 | * or idling the log is not going to get any faster by multiple CPUs | ||
1630 | * competing for ressources. Use the default large max_active value | ||
1631 | * so that even lots of filesystems can perform these task in parallel. | ||
1632 | */ | ||
1633 | xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0); | ||
1634 | if (!xfs_syncd_wq) | ||
1635 | return -ENOMEM; | ||
1636 | |||
1637 | /* | ||
1638 | * The allocation workqueue can be used in memory reclaim situations | 1699 | * The allocation workqueue can be used in memory reclaim situations |
1639 | * (writepage path), and parallelism is only limited by the number of | 1700 | * (writepage path), and parallelism is only limited by the number of |
1640 | * AGs in all the filesystems mounted. Hence use the default large | 1701 | * AGs in all the filesystems mounted. Hence use the default large |
@@ -1642,20 +1703,15 @@ xfs_init_workqueues(void) | |||
1642 | */ | 1703 | */ |
1643 | xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0); | 1704 | xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0); |
1644 | if (!xfs_alloc_wq) | 1705 | if (!xfs_alloc_wq) |
1645 | goto out_destroy_syncd; | 1706 | return -ENOMEM; |
1646 | 1707 | ||
1647 | return 0; | 1708 | return 0; |
1648 | |||
1649 | out_destroy_syncd: | ||
1650 | destroy_workqueue(xfs_syncd_wq); | ||
1651 | return -ENOMEM; | ||
1652 | } | 1709 | } |
1653 | 1710 | ||
1654 | STATIC void | 1711 | STATIC void |
1655 | xfs_destroy_workqueues(void) | 1712 | xfs_destroy_workqueues(void) |
1656 | { | 1713 | { |
1657 | destroy_workqueue(xfs_alloc_wq); | 1714 | destroy_workqueue(xfs_alloc_wq); |
1658 | destroy_workqueue(xfs_syncd_wq); | ||
1659 | } | 1715 | } |
1660 | 1716 | ||
1661 | STATIC int __init | 1717 | STATIC int __init |
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index 9de4a920ba05..bbe3d15a7904 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h | |||
@@ -74,6 +74,7 @@ struct block_device; | |||
74 | 74 | ||
75 | extern __uint64_t xfs_max_file_offset(unsigned int); | 75 | extern __uint64_t xfs_max_file_offset(unsigned int); |
76 | 76 | ||
77 | extern void xfs_flush_inodes(struct xfs_mount *mp); | ||
77 | extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); | 78 | extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); |
78 | extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *); | 79 | extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *); |
79 | extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *); | 80 | extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *); |
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index ee2d2adaa438..2801b5ce6cdb 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c | |||
@@ -202,6 +202,15 @@ static ctl_table xfs_table[] = { | |||
202 | .extra1 = &xfs_params.fstrm_timer.min, | 202 | .extra1 = &xfs_params.fstrm_timer.min, |
203 | .extra2 = &xfs_params.fstrm_timer.max, | 203 | .extra2 = &xfs_params.fstrm_timer.max, |
204 | }, | 204 | }, |
205 | { | ||
206 | .procname = "speculative_prealloc_lifetime", | ||
207 | .data = &xfs_params.eofb_timer.val, | ||
208 | .maxlen = sizeof(int), | ||
209 | .mode = 0644, | ||
210 | .proc_handler = proc_dointvec_minmax, | ||
211 | .extra1 = &xfs_params.eofb_timer.min, | ||
212 | .extra2 = &xfs_params.eofb_timer.max, | ||
213 | }, | ||
205 | /* please keep this the last entry */ | 214 | /* please keep this the last entry */ |
206 | #ifdef CONFIG_PROC_FS | 215 | #ifdef CONFIG_PROC_FS |
207 | { | 216 | { |
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h index b9937d450f8e..bd8e157c20ef 100644 --- a/fs/xfs/xfs_sysctl.h +++ b/fs/xfs/xfs_sysctl.h | |||
@@ -47,6 +47,7 @@ typedef struct xfs_param { | |||
47 | xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */ | 47 | xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */ |
48 | xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */ | 48 | xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */ |
49 | xfs_sysctl_val_t fstrm_timer; /* Filestream dir-AG assoc'n timeout. */ | 49 | xfs_sysctl_val_t fstrm_timer; /* Filestream dir-AG assoc'n timeout. */ |
50 | xfs_sysctl_val_t eofb_timer; /* Interval between eofb scan wakeups */ | ||
50 | } xfs_param_t; | 51 | } xfs_param_t; |
51 | 52 | ||
52 | /* | 53 | /* |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 7d36ccf57f93..2e137d4a85ae 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -96,6 +96,8 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full); | |||
96 | DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add); | 96 | DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add); |
97 | DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk); | 97 | DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk); |
98 | DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound); | 98 | DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound); |
99 | DEFINE_ATTR_LIST_EVENT(xfs_attr_leaf_list); | ||
100 | DEFINE_ATTR_LIST_EVENT(xfs_attr_node_list); | ||
99 | 101 | ||
100 | DECLARE_EVENT_CLASS(xfs_perag_class, | 102 | DECLARE_EVENT_CLASS(xfs_perag_class, |
101 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, | 103 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, |
@@ -130,6 +132,8 @@ DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); | |||
130 | DEFINE_PERAG_REF_EVENT(xfs_perag_put); | 132 | DEFINE_PERAG_REF_EVENT(xfs_perag_put); |
131 | DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); | 133 | DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); |
132 | DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); | 134 | DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); |
135 | DEFINE_PERAG_REF_EVENT(xfs_perag_set_eofblocks); | ||
136 | DEFINE_PERAG_REF_EVENT(xfs_perag_clear_eofblocks); | ||
133 | 137 | ||
134 | TRACE_EVENT(xfs_attr_list_node_descend, | 138 | TRACE_EVENT(xfs_attr_list_node_descend, |
135 | TP_PROTO(struct xfs_attr_list_context *ctx, | 139 | TP_PROTO(struct xfs_attr_list_context *ctx, |
@@ -585,6 +589,10 @@ DEFINE_INODE_EVENT(xfs_update_time); | |||
585 | DEFINE_INODE_EVENT(xfs_dquot_dqalloc); | 589 | DEFINE_INODE_EVENT(xfs_dquot_dqalloc); |
586 | DEFINE_INODE_EVENT(xfs_dquot_dqdetach); | 590 | DEFINE_INODE_EVENT(xfs_dquot_dqdetach); |
587 | 591 | ||
592 | DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag); | ||
593 | DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag); | ||
594 | DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); | ||
595 | |||
588 | DECLARE_EVENT_CLASS(xfs_iref_class, | 596 | DECLARE_EVENT_CLASS(xfs_iref_class, |
589 | TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), | 597 | TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), |
590 | TP_ARGS(ip, caller_ip), | 598 | TP_ARGS(ip, caller_ip), |
@@ -1496,8 +1504,42 @@ DEFINE_DIR2_EVENT(xfs_dir2_node_replace); | |||
1496 | DEFINE_DIR2_EVENT(xfs_dir2_node_removename); | 1504 | DEFINE_DIR2_EVENT(xfs_dir2_node_removename); |
1497 | DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf); | 1505 | DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf); |
1498 | 1506 | ||
1507 | DECLARE_EVENT_CLASS(xfs_attr_class, | ||
1508 | TP_PROTO(struct xfs_da_args *args), | ||
1509 | TP_ARGS(args), | ||
1510 | TP_STRUCT__entry( | ||
1511 | __field(dev_t, dev) | ||
1512 | __field(xfs_ino_t, ino) | ||
1513 | __dynamic_array(char, name, args->namelen) | ||
1514 | __field(int, namelen) | ||
1515 | __field(int, valuelen) | ||
1516 | __field(xfs_dahash_t, hashval) | ||
1517 | __field(int, op_flags) | ||
1518 | ), | ||
1519 | TP_fast_assign( | ||
1520 | __entry->dev = VFS_I(args->dp)->i_sb->s_dev; | ||
1521 | __entry->ino = args->dp->i_ino; | ||
1522 | if (args->namelen) | ||
1523 | memcpy(__get_str(name), args->name, args->namelen); | ||
1524 | __entry->namelen = args->namelen; | ||
1525 | __entry->valuelen = args->valuelen; | ||
1526 | __entry->hashval = args->hashval; | ||
1527 | __entry->op_flags = args->op_flags; | ||
1528 | ), | ||
1529 | TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d valuelen %d " | ||
1530 | "hashval 0x%x op_flags %s", | ||
1531 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1532 | __entry->ino, | ||
1533 | __entry->namelen, | ||
1534 | __entry->namelen ? __get_str(name) : NULL, | ||
1535 | __entry->namelen, | ||
1536 | __entry->valuelen, | ||
1537 | __entry->hashval, | ||
1538 | __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) | ||
1539 | ) | ||
1540 | |||
1499 | #define DEFINE_ATTR_EVENT(name) \ | 1541 | #define DEFINE_ATTR_EVENT(name) \ |
1500 | DEFINE_EVENT(xfs_da_class, name, \ | 1542 | DEFINE_EVENT(xfs_attr_class, name, \ |
1501 | TP_PROTO(struct xfs_da_args *args), \ | 1543 | TP_PROTO(struct xfs_da_args *args), \ |
1502 | TP_ARGS(args)) | 1544 | TP_ARGS(args)) |
1503 | DEFINE_ATTR_EVENT(xfs_attr_sf_add); | 1545 | DEFINE_ATTR_EVENT(xfs_attr_sf_add); |
@@ -1511,10 +1553,14 @@ DEFINE_ATTR_EVENT(xfs_attr_sf_to_leaf); | |||
1511 | DEFINE_ATTR_EVENT(xfs_attr_leaf_add); | 1553 | DEFINE_ATTR_EVENT(xfs_attr_leaf_add); |
1512 | DEFINE_ATTR_EVENT(xfs_attr_leaf_add_old); | 1554 | DEFINE_ATTR_EVENT(xfs_attr_leaf_add_old); |
1513 | DEFINE_ATTR_EVENT(xfs_attr_leaf_add_new); | 1555 | DEFINE_ATTR_EVENT(xfs_attr_leaf_add_new); |
1556 | DEFINE_ATTR_EVENT(xfs_attr_leaf_add_work); | ||
1514 | DEFINE_ATTR_EVENT(xfs_attr_leaf_addname); | 1557 | DEFINE_ATTR_EVENT(xfs_attr_leaf_addname); |
1515 | DEFINE_ATTR_EVENT(xfs_attr_leaf_create); | 1558 | DEFINE_ATTR_EVENT(xfs_attr_leaf_create); |
1559 | DEFINE_ATTR_EVENT(xfs_attr_leaf_compact); | ||
1560 | DEFINE_ATTR_EVENT(xfs_attr_leaf_get); | ||
1516 | DEFINE_ATTR_EVENT(xfs_attr_leaf_lookup); | 1561 | DEFINE_ATTR_EVENT(xfs_attr_leaf_lookup); |
1517 | DEFINE_ATTR_EVENT(xfs_attr_leaf_replace); | 1562 | DEFINE_ATTR_EVENT(xfs_attr_leaf_replace); |
1563 | DEFINE_ATTR_EVENT(xfs_attr_leaf_remove); | ||
1518 | DEFINE_ATTR_EVENT(xfs_attr_leaf_removename); | 1564 | DEFINE_ATTR_EVENT(xfs_attr_leaf_removename); |
1519 | DEFINE_ATTR_EVENT(xfs_attr_leaf_split); | 1565 | DEFINE_ATTR_EVENT(xfs_attr_leaf_split); |
1520 | DEFINE_ATTR_EVENT(xfs_attr_leaf_split_before); | 1566 | DEFINE_ATTR_EVENT(xfs_attr_leaf_split_before); |
@@ -1526,12 +1572,21 @@ DEFINE_ATTR_EVENT(xfs_attr_leaf_to_sf); | |||
1526 | DEFINE_ATTR_EVENT(xfs_attr_leaf_to_node); | 1572 | DEFINE_ATTR_EVENT(xfs_attr_leaf_to_node); |
1527 | DEFINE_ATTR_EVENT(xfs_attr_leaf_rebalance); | 1573 | DEFINE_ATTR_EVENT(xfs_attr_leaf_rebalance); |
1528 | DEFINE_ATTR_EVENT(xfs_attr_leaf_unbalance); | 1574 | DEFINE_ATTR_EVENT(xfs_attr_leaf_unbalance); |
1575 | DEFINE_ATTR_EVENT(xfs_attr_leaf_toosmall); | ||
1529 | 1576 | ||
1530 | DEFINE_ATTR_EVENT(xfs_attr_node_addname); | 1577 | DEFINE_ATTR_EVENT(xfs_attr_node_addname); |
1578 | DEFINE_ATTR_EVENT(xfs_attr_node_get); | ||
1531 | DEFINE_ATTR_EVENT(xfs_attr_node_lookup); | 1579 | DEFINE_ATTR_EVENT(xfs_attr_node_lookup); |
1532 | DEFINE_ATTR_EVENT(xfs_attr_node_replace); | 1580 | DEFINE_ATTR_EVENT(xfs_attr_node_replace); |
1533 | DEFINE_ATTR_EVENT(xfs_attr_node_removename); | 1581 | DEFINE_ATTR_EVENT(xfs_attr_node_removename); |
1534 | 1582 | ||
1583 | DEFINE_ATTR_EVENT(xfs_attr_fillstate); | ||
1584 | DEFINE_ATTR_EVENT(xfs_attr_refillstate); | ||
1585 | |||
1586 | DEFINE_ATTR_EVENT(xfs_attr_rmtval_get); | ||
1587 | DEFINE_ATTR_EVENT(xfs_attr_rmtval_set); | ||
1588 | DEFINE_ATTR_EVENT(xfs_attr_rmtval_remove); | ||
1589 | |||
1535 | #define DEFINE_DA_EVENT(name) \ | 1590 | #define DEFINE_DA_EVENT(name) \ |
1536 | DEFINE_EVENT(xfs_da_class, name, \ | 1591 | DEFINE_EVENT(xfs_da_class, name, \ |
1537 | TP_PROTO(struct xfs_da_args *args), \ | 1592 | TP_PROTO(struct xfs_da_args *args), \ |
@@ -1550,9 +1605,12 @@ DEFINE_DA_EVENT(xfs_da_node_split); | |||
1550 | DEFINE_DA_EVENT(xfs_da_node_remove); | 1605 | DEFINE_DA_EVENT(xfs_da_node_remove); |
1551 | DEFINE_DA_EVENT(xfs_da_node_rebalance); | 1606 | DEFINE_DA_EVENT(xfs_da_node_rebalance); |
1552 | DEFINE_DA_EVENT(xfs_da_node_unbalance); | 1607 | DEFINE_DA_EVENT(xfs_da_node_unbalance); |
1608 | DEFINE_DA_EVENT(xfs_da_node_toosmall); | ||
1553 | DEFINE_DA_EVENT(xfs_da_swap_lastblock); | 1609 | DEFINE_DA_EVENT(xfs_da_swap_lastblock); |
1554 | DEFINE_DA_EVENT(xfs_da_grow_inode); | 1610 | DEFINE_DA_EVENT(xfs_da_grow_inode); |
1555 | DEFINE_DA_EVENT(xfs_da_shrink_inode); | 1611 | DEFINE_DA_EVENT(xfs_da_shrink_inode); |
1612 | DEFINE_DA_EVENT(xfs_da_fixhashpath); | ||
1613 | DEFINE_DA_EVENT(xfs_da_path_shift); | ||
1556 | 1614 | ||
1557 | DECLARE_EVENT_CLASS(xfs_dir2_space_class, | 1615 | DECLARE_EVENT_CLASS(xfs_dir2_space_class, |
1558 | TP_PROTO(struct xfs_da_args *args, int idx), | 1616 | TP_PROTO(struct xfs_da_args *args, int idx), |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index db056544cbb5..c6c0601abd7a 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -464,10 +464,7 @@ xfs_trans_get_buf( | |||
464 | int numblks, | 464 | int numblks, |
465 | uint flags) | 465 | uint flags) |
466 | { | 466 | { |
467 | struct xfs_buf_map map = { | 467 | DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); |
468 | .bm_bn = blkno, | ||
469 | .bm_len = numblks, | ||
470 | }; | ||
471 | return xfs_trans_get_buf_map(tp, target, &map, 1, flags); | 468 | return xfs_trans_get_buf_map(tp, target, &map, 1, flags); |
472 | } | 469 | } |
473 | 470 | ||
@@ -476,7 +473,8 @@ int xfs_trans_read_buf_map(struct xfs_mount *mp, | |||
476 | struct xfs_buftarg *target, | 473 | struct xfs_buftarg *target, |
477 | struct xfs_buf_map *map, int nmaps, | 474 | struct xfs_buf_map *map, int nmaps, |
478 | xfs_buf_flags_t flags, | 475 | xfs_buf_flags_t flags, |
479 | struct xfs_buf **bpp); | 476 | struct xfs_buf **bpp, |
477 | const struct xfs_buf_ops *ops); | ||
480 | 478 | ||
481 | static inline int | 479 | static inline int |
482 | xfs_trans_read_buf( | 480 | xfs_trans_read_buf( |
@@ -486,13 +484,12 @@ xfs_trans_read_buf( | |||
486 | xfs_daddr_t blkno, | 484 | xfs_daddr_t blkno, |
487 | int numblks, | 485 | int numblks, |
488 | xfs_buf_flags_t flags, | 486 | xfs_buf_flags_t flags, |
489 | struct xfs_buf **bpp) | 487 | struct xfs_buf **bpp, |
488 | const struct xfs_buf_ops *ops) | ||
490 | { | 489 | { |
491 | struct xfs_buf_map map = { | 490 | DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); |
492 | .bm_bn = blkno, | 491 | return xfs_trans_read_buf_map(mp, tp, target, &map, 1, |
493 | .bm_len = numblks, | 492 | flags, bpp, ops); |
494 | }; | ||
495 | return xfs_trans_read_buf_map(mp, tp, target, &map, 1, flags, bpp); | ||
496 | } | 493 | } |
497 | 494 | ||
498 | struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int); | 495 | struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int); |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 6311b99c267f..4fc17d479d42 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -257,7 +257,8 @@ xfs_trans_read_buf_map( | |||
257 | struct xfs_buf_map *map, | 257 | struct xfs_buf_map *map, |
258 | int nmaps, | 258 | int nmaps, |
259 | xfs_buf_flags_t flags, | 259 | xfs_buf_flags_t flags, |
260 | struct xfs_buf **bpp) | 260 | struct xfs_buf **bpp, |
261 | const struct xfs_buf_ops *ops) | ||
261 | { | 262 | { |
262 | xfs_buf_t *bp; | 263 | xfs_buf_t *bp; |
263 | xfs_buf_log_item_t *bip; | 264 | xfs_buf_log_item_t *bip; |
@@ -265,7 +266,7 @@ xfs_trans_read_buf_map( | |||
265 | 266 | ||
266 | *bpp = NULL; | 267 | *bpp = NULL; |
267 | if (!tp) { | 268 | if (!tp) { |
268 | bp = xfs_buf_read_map(target, map, nmaps, flags); | 269 | bp = xfs_buf_read_map(target, map, nmaps, flags, ops); |
269 | if (!bp) | 270 | if (!bp) |
270 | return (flags & XBF_TRYLOCK) ? | 271 | return (flags & XBF_TRYLOCK) ? |
271 | EAGAIN : XFS_ERROR(ENOMEM); | 272 | EAGAIN : XFS_ERROR(ENOMEM); |
@@ -312,7 +313,9 @@ xfs_trans_read_buf_map( | |||
312 | if (!(XFS_BUF_ISDONE(bp))) { | 313 | if (!(XFS_BUF_ISDONE(bp))) { |
313 | trace_xfs_trans_read_buf_io(bp, _RET_IP_); | 314 | trace_xfs_trans_read_buf_io(bp, _RET_IP_); |
314 | ASSERT(!XFS_BUF_ISASYNC(bp)); | 315 | ASSERT(!XFS_BUF_ISASYNC(bp)); |
316 | ASSERT(bp->b_iodone == NULL); | ||
315 | XFS_BUF_READ(bp); | 317 | XFS_BUF_READ(bp); |
318 | bp->b_ops = ops; | ||
316 | xfsbdstrat(tp->t_mountp, bp); | 319 | xfsbdstrat(tp->t_mountp, bp); |
317 | error = xfs_buf_iowait(bp); | 320 | error = xfs_buf_iowait(bp); |
318 | if (error) { | 321 | if (error) { |
@@ -349,7 +352,7 @@ xfs_trans_read_buf_map( | |||
349 | return 0; | 352 | return 0; |
350 | } | 353 | } |
351 | 354 | ||
352 | bp = xfs_buf_read_map(target, map, nmaps, flags); | 355 | bp = xfs_buf_read_map(target, map, nmaps, flags, ops); |
353 | if (bp == NULL) { | 356 | if (bp == NULL) { |
354 | *bpp = NULL; | 357 | *bpp = NULL; |
355 | return (flags & XBF_TRYLOCK) ? | 358 | return (flags & XBF_TRYLOCK) ? |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 2a5c637344b4..d95f565a390e 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include "xfs_filestream.h" | 47 | #include "xfs_filestream.h" |
48 | #include "xfs_vnodeops.h" | 48 | #include "xfs_vnodeops.h" |
49 | #include "xfs_trace.h" | 49 | #include "xfs_trace.h" |
50 | #include "xfs_icache.h" | ||
50 | 51 | ||
51 | /* | 52 | /* |
52 | * The maximum pathlen is 1024 bytes. Since the minimum file system | 53 | * The maximum pathlen is 1024 bytes. Since the minimum file system |
@@ -79,7 +80,7 @@ xfs_readlink_bmap( | |||
79 | d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); | 80 | d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); |
80 | byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); | 81 | byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); |
81 | 82 | ||
82 | bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); | 83 | bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0, NULL); |
83 | if (!bp) | 84 | if (!bp) |
84 | return XFS_ERROR(ENOMEM); | 85 | return XFS_ERROR(ENOMEM); |
85 | error = bp->b_error; | 86 | error = bp->b_error; |
@@ -150,7 +151,7 @@ xfs_readlink( | |||
150 | * when the link count isn't zero and by xfs_dm_punch_hole() when | 151 | * when the link count isn't zero and by xfs_dm_punch_hole() when |
151 | * punching a hole to EOF. | 152 | * punching a hole to EOF. |
152 | */ | 153 | */ |
153 | STATIC int | 154 | int |
154 | xfs_free_eofblocks( | 155 | xfs_free_eofblocks( |
155 | xfs_mount_t *mp, | 156 | xfs_mount_t *mp, |
156 | xfs_inode_t *ip, | 157 | xfs_inode_t *ip, |
@@ -199,7 +200,7 @@ xfs_free_eofblocks( | |||
199 | if (need_iolock) { | 200 | if (need_iolock) { |
200 | if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { | 201 | if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { |
201 | xfs_trans_cancel(tp, 0); | 202 | xfs_trans_cancel(tp, 0); |
202 | return 0; | 203 | return EAGAIN; |
203 | } | 204 | } |
204 | } | 205 | } |
205 | 206 | ||
@@ -237,6 +238,8 @@ xfs_free_eofblocks( | |||
237 | } else { | 238 | } else { |
238 | error = xfs_trans_commit(tp, | 239 | error = xfs_trans_commit(tp, |
239 | XFS_TRANS_RELEASE_LOG_RES); | 240 | XFS_TRANS_RELEASE_LOG_RES); |
241 | if (!error) | ||
242 | xfs_inode_clear_eofblocks_tag(ip); | ||
240 | } | 243 | } |
241 | 244 | ||
242 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 245 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
@@ -425,19 +428,18 @@ xfs_release( | |||
425 | truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); | 428 | truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); |
426 | if (truncated) { | 429 | if (truncated) { |
427 | xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); | 430 | xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); |
428 | if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) | 431 | if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) { |
429 | xfs_flush_pages(ip, 0, -1, XBF_ASYNC, FI_NONE); | 432 | error = -filemap_flush(VFS_I(ip)->i_mapping); |
433 | if (error) | ||
434 | return error; | ||
435 | } | ||
430 | } | 436 | } |
431 | } | 437 | } |
432 | 438 | ||
433 | if (ip->i_d.di_nlink == 0) | 439 | if (ip->i_d.di_nlink == 0) |
434 | return 0; | 440 | return 0; |
435 | 441 | ||
436 | if ((S_ISREG(ip->i_d.di_mode) && | 442 | if (xfs_can_free_eofblocks(ip, false)) { |
437 | (VFS_I(ip)->i_size > 0 || | ||
438 | (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) && | ||
439 | (ip->i_df.if_flags & XFS_IFEXTENTS)) && | ||
440 | (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { | ||
441 | 443 | ||
442 | /* | 444 | /* |
443 | * If we can't get the iolock just skip truncating the blocks | 445 | * If we can't get the iolock just skip truncating the blocks |
@@ -464,7 +466,7 @@ xfs_release( | |||
464 | return 0; | 466 | return 0; |
465 | 467 | ||
466 | error = xfs_free_eofblocks(mp, ip, true); | 468 | error = xfs_free_eofblocks(mp, ip, true); |
467 | if (error) | 469 | if (error && error != EAGAIN) |
468 | return error; | 470 | return error; |
469 | 471 | ||
470 | /* delalloc blocks after truncation means it really is dirty */ | 472 | /* delalloc blocks after truncation means it really is dirty */ |
@@ -513,13 +515,12 @@ xfs_inactive( | |||
513 | goto out; | 515 | goto out; |
514 | 516 | ||
515 | if (ip->i_d.di_nlink != 0) { | 517 | if (ip->i_d.di_nlink != 0) { |
516 | if ((S_ISREG(ip->i_d.di_mode) && | 518 | /* |
517 | (VFS_I(ip)->i_size > 0 || | 519 | * force is true because we are evicting an inode from the |
518 | (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) && | 520 | * cache. Post-eof blocks must be freed, lest we end up with |
519 | (ip->i_df.if_flags & XFS_IFEXTENTS) && | 521 | * broken free space accounting. |
520 | (!(ip->i_d.di_flags & | 522 | */ |
521 | (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || | 523 | if (xfs_can_free_eofblocks(ip, true)) { |
522 | ip->i_delayed_blks != 0))) { | ||
523 | error = xfs_free_eofblocks(mp, ip, false); | 524 | error = xfs_free_eofblocks(mp, ip, false); |
524 | if (error) | 525 | if (error) |
525 | return VN_INACTIVE_CACHE; | 526 | return VN_INACTIVE_CACHE; |
@@ -777,7 +778,7 @@ xfs_create( | |||
777 | XFS_TRANS_PERM_LOG_RES, log_count); | 778 | XFS_TRANS_PERM_LOG_RES, log_count); |
778 | if (error == ENOSPC) { | 779 | if (error == ENOSPC) { |
779 | /* flush outstanding delalloc blocks and retry */ | 780 | /* flush outstanding delalloc blocks and retry */ |
780 | xfs_flush_inodes(dp); | 781 | xfs_flush_inodes(mp); |
781 | error = xfs_trans_reserve(tp, resblks, log_res, 0, | 782 | error = xfs_trans_reserve(tp, resblks, log_res, 0, |
782 | XFS_TRANS_PERM_LOG_RES, log_count); | 783 | XFS_TRANS_PERM_LOG_RES, log_count); |
783 | } | 784 | } |
@@ -1957,12 +1958,11 @@ xfs_free_file_space( | |||
1957 | 1958 | ||
1958 | rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); | 1959 | rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); |
1959 | ioffset = offset & ~(rounding - 1); | 1960 | ioffset = offset & ~(rounding - 1); |
1960 | 1961 | error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, | |
1961 | if (VN_CACHED(VFS_I(ip)) != 0) { | 1962 | ioffset, -1); |
1962 | error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); | 1963 | if (error) |
1963 | if (error) | 1964 | goto out_unlock_iolock; |
1964 | goto out_unlock_iolock; | 1965 | truncate_pagecache_range(VFS_I(ip), ioffset, -1); |
1965 | } | ||
1966 | 1966 | ||
1967 | /* | 1967 | /* |
1968 | * Need to zero the stuff we're not freeing, on disk. | 1968 | * Need to zero the stuff we're not freeing, on disk. |
@@ -2095,6 +2095,73 @@ xfs_free_file_space( | |||
2095 | return error; | 2095 | return error; |
2096 | } | 2096 | } |
2097 | 2097 | ||
2098 | |||
2099 | STATIC int | ||
2100 | xfs_zero_file_space( | ||
2101 | struct xfs_inode *ip, | ||
2102 | xfs_off_t offset, | ||
2103 | xfs_off_t len, | ||
2104 | int attr_flags) | ||
2105 | { | ||
2106 | struct xfs_mount *mp = ip->i_mount; | ||
2107 | uint granularity; | ||
2108 | xfs_off_t start_boundary; | ||
2109 | xfs_off_t end_boundary; | ||
2110 | int error; | ||
2111 | |||
2112 | granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); | ||
2113 | |||
2114 | /* | ||
2115 | * Round the range of extents we are going to convert inwards. If the | ||
2116 | * offset is aligned, then it doesn't get changed so we zero from the | ||
2117 | * start of the block offset points to. | ||
2118 | */ | ||
2119 | start_boundary = round_up(offset, granularity); | ||
2120 | end_boundary = round_down(offset + len, granularity); | ||
2121 | |||
2122 | ASSERT(start_boundary >= offset); | ||
2123 | ASSERT(end_boundary <= offset + len); | ||
2124 | |||
2125 | if (!(attr_flags & XFS_ATTR_NOLOCK)) | ||
2126 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | ||
2127 | |||
2128 | if (start_boundary < end_boundary - 1) { | ||
2129 | /* punch out the page cache over the conversion range */ | ||
2130 | truncate_pagecache_range(VFS_I(ip), start_boundary, | ||
2131 | end_boundary - 1); | ||
2132 | /* convert the blocks */ | ||
2133 | error = xfs_alloc_file_space(ip, start_boundary, | ||
2134 | end_boundary - start_boundary - 1, | ||
2135 | XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT, | ||
2136 | attr_flags); | ||
2137 | if (error) | ||
2138 | goto out_unlock; | ||
2139 | |||
2140 | /* We've handled the interior of the range, now for the edges */ | ||
2141 | if (start_boundary != offset) | ||
2142 | error = xfs_iozero(ip, offset, start_boundary - offset); | ||
2143 | if (error) | ||
2144 | goto out_unlock; | ||
2145 | |||
2146 | if (end_boundary != offset + len) | ||
2147 | error = xfs_iozero(ip, end_boundary, | ||
2148 | offset + len - end_boundary); | ||
2149 | |||
2150 | } else { | ||
2151 | /* | ||
2152 | * It's either a sub-granularity range or the range spanned lies | ||
2153 | * partially across two adjacent blocks. | ||
2154 | */ | ||
2155 | error = xfs_iozero(ip, offset, len); | ||
2156 | } | ||
2157 | |||
2158 | out_unlock: | ||
2159 | if (!(attr_flags & XFS_ATTR_NOLOCK)) | ||
2160 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
2161 | return error; | ||
2162 | |||
2163 | } | ||
2164 | |||
2098 | /* | 2165 | /* |
2099 | * xfs_change_file_space() | 2166 | * xfs_change_file_space() |
2100 | * This routine allocates or frees disk space for the given file. | 2167 | * This routine allocates or frees disk space for the given file. |
@@ -2120,10 +2187,8 @@ xfs_change_file_space( | |||
2120 | xfs_fsize_t fsize; | 2187 | xfs_fsize_t fsize; |
2121 | int setprealloc; | 2188 | int setprealloc; |
2122 | xfs_off_t startoffset; | 2189 | xfs_off_t startoffset; |
2123 | xfs_off_t llen; | ||
2124 | xfs_trans_t *tp; | 2190 | xfs_trans_t *tp; |
2125 | struct iattr iattr; | 2191 | struct iattr iattr; |
2126 | int prealloc_type; | ||
2127 | 2192 | ||
2128 | if (!S_ISREG(ip->i_d.di_mode)) | 2193 | if (!S_ISREG(ip->i_d.di_mode)) |
2129 | return XFS_ERROR(EINVAL); | 2194 | return XFS_ERROR(EINVAL); |
@@ -2141,12 +2206,30 @@ xfs_change_file_space( | |||
2141 | return XFS_ERROR(EINVAL); | 2206 | return XFS_ERROR(EINVAL); |
2142 | } | 2207 | } |
2143 | 2208 | ||
2144 | llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len; | 2209 | /* |
2210 | * length of <= 0 for resv/unresv/zero is invalid. length for | ||
2211 | * alloc/free is ignored completely and we have no idea what userspace | ||
2212 | * might have set it to, so set it to zero to allow range | ||
2213 | * checks to pass. | ||
2214 | */ | ||
2215 | switch (cmd) { | ||
2216 | case XFS_IOC_ZERO_RANGE: | ||
2217 | case XFS_IOC_RESVSP: | ||
2218 | case XFS_IOC_RESVSP64: | ||
2219 | case XFS_IOC_UNRESVSP: | ||
2220 | case XFS_IOC_UNRESVSP64: | ||
2221 | if (bf->l_len <= 0) | ||
2222 | return XFS_ERROR(EINVAL); | ||
2223 | break; | ||
2224 | default: | ||
2225 | bf->l_len = 0; | ||
2226 | break; | ||
2227 | } | ||
2145 | 2228 | ||
2146 | if (bf->l_start < 0 || | 2229 | if (bf->l_start < 0 || |
2147 | bf->l_start > mp->m_super->s_maxbytes || | 2230 | bf->l_start > mp->m_super->s_maxbytes || |
2148 | bf->l_start + llen < 0 || | 2231 | bf->l_start + bf->l_len < 0 || |
2149 | bf->l_start + llen > mp->m_super->s_maxbytes) | 2232 | bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) |
2150 | return XFS_ERROR(EINVAL); | 2233 | return XFS_ERROR(EINVAL); |
2151 | 2234 | ||
2152 | bf->l_whence = 0; | 2235 | bf->l_whence = 0; |
@@ -2154,29 +2237,20 @@ xfs_change_file_space( | |||
2154 | startoffset = bf->l_start; | 2237 | startoffset = bf->l_start; |
2155 | fsize = XFS_ISIZE(ip); | 2238 | fsize = XFS_ISIZE(ip); |
2156 | 2239 | ||
2157 | /* | ||
2158 | * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve | ||
2159 | * file space. | ||
2160 | * These calls do NOT zero the data space allocated to the file, | ||
2161 | * nor do they change the file size. | ||
2162 | * | ||
2163 | * XFS_IOC_ALLOCSP and XFS_IOC_FREESP will allocate and free file | ||
2164 | * space. | ||
2165 | * These calls cause the new file data to be zeroed and the file | ||
2166 | * size to be changed. | ||
2167 | */ | ||
2168 | setprealloc = clrprealloc = 0; | 2240 | setprealloc = clrprealloc = 0; |
2169 | prealloc_type = XFS_BMAPI_PREALLOC; | ||
2170 | |||
2171 | switch (cmd) { | 2241 | switch (cmd) { |
2172 | case XFS_IOC_ZERO_RANGE: | 2242 | case XFS_IOC_ZERO_RANGE: |
2173 | prealloc_type |= XFS_BMAPI_CONVERT; | 2243 | error = xfs_zero_file_space(ip, startoffset, bf->l_len, |
2174 | xfs_tosspages(ip, startoffset, startoffset + bf->l_len, 0); | 2244 | attr_flags); |
2175 | /* FALLTHRU */ | 2245 | if (error) |
2246 | return error; | ||
2247 | setprealloc = 1; | ||
2248 | break; | ||
2249 | |||
2176 | case XFS_IOC_RESVSP: | 2250 | case XFS_IOC_RESVSP: |
2177 | case XFS_IOC_RESVSP64: | 2251 | case XFS_IOC_RESVSP64: |
2178 | error = xfs_alloc_file_space(ip, startoffset, bf->l_len, | 2252 | error = xfs_alloc_file_space(ip, startoffset, bf->l_len, |
2179 | prealloc_type, attr_flags); | 2253 | XFS_BMAPI_PREALLOC, attr_flags); |
2180 | if (error) | 2254 | if (error) |
2181 | return error; | 2255 | return error; |
2182 | setprealloc = 1; | 2256 | setprealloc = 1; |
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 447e146b2ba6..5163022d9808 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h | |||
@@ -48,14 +48,9 @@ int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, | |||
48 | int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); | 48 | int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); |
49 | int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, | 49 | int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, |
50 | int flags, struct attrlist_cursor_kern *cursor); | 50 | int flags, struct attrlist_cursor_kern *cursor); |
51 | void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first, | ||
52 | xfs_off_t last, int fiopt); | ||
53 | int xfs_flushinval_pages(struct xfs_inode *ip, xfs_off_t first, | ||
54 | xfs_off_t last, int fiopt); | ||
55 | int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first, | ||
56 | xfs_off_t last, uint64_t flags, int fiopt); | ||
57 | int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last); | ||
58 | 51 | ||
52 | int xfs_iozero(struct xfs_inode *, loff_t, size_t); | ||
59 | int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); | 53 | int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); |
54 | int xfs_free_eofblocks(struct xfs_mount *, struct xfs_inode *, bool); | ||
60 | 55 | ||
61 | #endif /* _XFS_VNODEOPS_H */ | 56 | #endif /* _XFS_VNODEOPS_H */ |