diff options
228 files changed, 6650 insertions, 3116 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index d449e632e6a0..8e2da1e06e3b 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -61,6 +61,7 @@ ata *); | |||
61 | ssize_t (*listxattr) (struct dentry *, char *, size_t); | 61 | ssize_t (*listxattr) (struct dentry *, char *, size_t); |
62 | int (*removexattr) (struct dentry *, const char *); | 62 | int (*removexattr) (struct dentry *, const char *); |
63 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); | 63 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); |
64 | void (*update_time)(struct inode *, struct timespec *, int); | ||
64 | 65 | ||
65 | locking rules: | 66 | locking rules: |
66 | all may block | 67 | all may block |
@@ -87,6 +88,8 @@ getxattr: no | |||
87 | listxattr: no | 88 | listxattr: no |
88 | removexattr: yes | 89 | removexattr: yes |
89 | fiemap: no | 90 | fiemap: no |
91 | update_time: no | ||
92 | |||
90 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on | 93 | Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on |
91 | victim. | 94 | victim. |
92 | cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. | 95 | cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. |
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index ef19f91a0f12..efd23f481704 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
@@ -363,6 +363,7 @@ struct inode_operations { | |||
363 | ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); | 363 | ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); |
364 | ssize_t (*listxattr) (struct dentry *, char *, size_t); | 364 | ssize_t (*listxattr) (struct dentry *, char *, size_t); |
365 | int (*removexattr) (struct dentry *, const char *); | 365 | int (*removexattr) (struct dentry *, const char *); |
366 | void (*update_time)(struct inode *, struct timespec *, int); | ||
366 | }; | 367 | }; |
367 | 368 | ||
368 | Again, all methods are called without any locks being held, unless | 369 | Again, all methods are called without any locks being held, unless |
@@ -471,6 +472,9 @@ otherwise noted. | |||
471 | removexattr: called by the VFS to remove an extended attribute from | 472 | removexattr: called by the VFS to remove an extended attribute from |
472 | a file. This method is called by removexattr(2) system call. | 473 | a file. This method is called by removexattr(2) system call. |
473 | 474 | ||
475 | update_time: called by the VFS to update a specific time or the i_version of | ||
476 | an inode. If this is not defined the VFS will update the inode itself | ||
477 | and call mark_inode_dirty_sync. | ||
474 | 478 | ||
475 | The Address Space Object | 479 | The Address Space Object |
476 | ======================== | 480 | ======================== |
diff --git a/arch/alpha/include/asm/posix_types.h b/arch/alpha/include/asm/posix_types.h index 24779fc95994..5a8a48320efe 100644 --- a/arch/alpha/include/asm/posix_types.h +++ b/arch/alpha/include/asm/posix_types.h | |||
@@ -10,9 +10,6 @@ | |||
10 | typedef unsigned int __kernel_ino_t; | 10 | typedef unsigned int __kernel_ino_t; |
11 | #define __kernel_ino_t __kernel_ino_t | 11 | #define __kernel_ino_t __kernel_ino_t |
12 | 12 | ||
13 | typedef unsigned int __kernel_nlink_t; | ||
14 | #define __kernel_nlink_t __kernel_nlink_t | ||
15 | |||
16 | typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ | 13 | typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ |
17 | 14 | ||
18 | #include <asm-generic/posix_types.h> | 15 | #include <asm-generic/posix_types.h> |
diff --git a/arch/arm/include/asm/posix_types.h b/arch/arm/include/asm/posix_types.h index efdf99045d87..d2de9cbbcd9b 100644 --- a/arch/arm/include/asm/posix_types.h +++ b/arch/arm/include/asm/posix_types.h | |||
@@ -22,9 +22,6 @@ | |||
22 | typedef unsigned short __kernel_mode_t; | 22 | typedef unsigned short __kernel_mode_t; |
23 | #define __kernel_mode_t __kernel_mode_t | 23 | #define __kernel_mode_t __kernel_mode_t |
24 | 24 | ||
25 | typedef unsigned short __kernel_nlink_t; | ||
26 | #define __kernel_nlink_t __kernel_nlink_t | ||
27 | |||
28 | typedef unsigned short __kernel_ipc_pid_t; | 25 | typedef unsigned short __kernel_ipc_pid_t; |
29 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t | 26 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t |
30 | 27 | ||
diff --git a/arch/avr32/include/asm/posix_types.h b/arch/avr32/include/asm/posix_types.h index 74667bfc88cc..9ba9e749b3f3 100644 --- a/arch/avr32/include/asm/posix_types.h +++ b/arch/avr32/include/asm/posix_types.h | |||
@@ -17,9 +17,6 @@ | |||
17 | typedef unsigned short __kernel_mode_t; | 17 | typedef unsigned short __kernel_mode_t; |
18 | #define __kernel_mode_t __kernel_mode_t | 18 | #define __kernel_mode_t __kernel_mode_t |
19 | 19 | ||
20 | typedef unsigned short __kernel_nlink_t; | ||
21 | #define __kernel_nlink_t __kernel_nlink_t | ||
22 | |||
23 | typedef unsigned short __kernel_ipc_pid_t; | 20 | typedef unsigned short __kernel_ipc_pid_t; |
24 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t | 21 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t |
25 | 22 | ||
diff --git a/arch/blackfin/include/asm/posix_types.h b/arch/blackfin/include/asm/posix_types.h index 41bc1875c4d7..1bd3436db6a7 100644 --- a/arch/blackfin/include/asm/posix_types.h +++ b/arch/blackfin/include/asm/posix_types.h | |||
@@ -10,9 +10,6 @@ | |||
10 | typedef unsigned short __kernel_mode_t; | 10 | typedef unsigned short __kernel_mode_t; |
11 | #define __kernel_mode_t __kernel_mode_t | 11 | #define __kernel_mode_t __kernel_mode_t |
12 | 12 | ||
13 | typedef unsigned short __kernel_nlink_t; | ||
14 | #define __kernel_nlink_t __kernel_nlink_t | ||
15 | |||
16 | typedef unsigned int __kernel_ipc_pid_t; | 13 | typedef unsigned int __kernel_ipc_pid_t; |
17 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t | 14 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t |
18 | 15 | ||
diff --git a/arch/cris/include/asm/posix_types.h b/arch/cris/include/asm/posix_types.h index 234891c74e2b..ce4e51793151 100644 --- a/arch/cris/include/asm/posix_types.h +++ b/arch/cris/include/asm/posix_types.h | |||
@@ -15,9 +15,6 @@ | |||
15 | typedef unsigned short __kernel_mode_t; | 15 | typedef unsigned short __kernel_mode_t; |
16 | #define __kernel_mode_t __kernel_mode_t | 16 | #define __kernel_mode_t __kernel_mode_t |
17 | 17 | ||
18 | typedef unsigned short __kernel_nlink_t; | ||
19 | #define __kernel_nlink_t __kernel_nlink_t | ||
20 | |||
21 | typedef unsigned short __kernel_ipc_pid_t; | 18 | typedef unsigned short __kernel_ipc_pid_t; |
22 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t | 19 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t |
23 | 20 | ||
diff --git a/arch/frv/include/asm/posix_types.h b/arch/frv/include/asm/posix_types.h index 3f34cb45fbb3..fe512af74a5a 100644 --- a/arch/frv/include/asm/posix_types.h +++ b/arch/frv/include/asm/posix_types.h | |||
@@ -10,9 +10,6 @@ | |||
10 | typedef unsigned short __kernel_mode_t; | 10 | typedef unsigned short __kernel_mode_t; |
11 | #define __kernel_mode_t __kernel_mode_t | 11 | #define __kernel_mode_t __kernel_mode_t |
12 | 12 | ||
13 | typedef unsigned short __kernel_nlink_t; | ||
14 | #define __kernel_nlink_t __kernel_nlink_t | ||
15 | |||
16 | typedef unsigned short __kernel_ipc_pid_t; | 13 | typedef unsigned short __kernel_ipc_pid_t; |
17 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t | 14 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t |
18 | 15 | ||
diff --git a/arch/h8300/include/asm/posix_types.h b/arch/h8300/include/asm/posix_types.h index bc4c34efb1ad..91e62ba4c7b0 100644 --- a/arch/h8300/include/asm/posix_types.h +++ b/arch/h8300/include/asm/posix_types.h | |||
@@ -10,9 +10,6 @@ | |||
10 | typedef unsigned short __kernel_mode_t; | 10 | typedef unsigned short __kernel_mode_t; |
11 | #define __kernel_mode_t __kernel_mode_t | 11 | #define __kernel_mode_t __kernel_mode_t |
12 | 12 | ||
13 | typedef unsigned short __kernel_nlink_t; | ||
14 | #define __kernel_nlink_t __kernel_nlink_t | ||
15 | |||
16 | typedef unsigned short __kernel_ipc_pid_t; | 13 | typedef unsigned short __kernel_ipc_pid_t; |
17 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t | 14 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t |
18 | 15 | ||
diff --git a/arch/ia64/include/asm/posix_types.h b/arch/ia64/include/asm/posix_types.h index 7323ab9467eb..99ee1d6510cf 100644 --- a/arch/ia64/include/asm/posix_types.h +++ b/arch/ia64/include/asm/posix_types.h | |||
@@ -1,9 +1,6 @@ | |||
1 | #ifndef _ASM_IA64_POSIX_TYPES_H | 1 | #ifndef _ASM_IA64_POSIX_TYPES_H |
2 | #define _ASM_IA64_POSIX_TYPES_H | 2 | #define _ASM_IA64_POSIX_TYPES_H |
3 | 3 | ||
4 | typedef unsigned int __kernel_nlink_t; | ||
5 | #define __kernel_nlink_t __kernel_nlink_t | ||
6 | |||
7 | typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ | 4 | typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ |
8 | 5 | ||
9 | #include <asm-generic/posix_types.h> | 6 | #include <asm-generic/posix_types.h> |
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index f00ba025375d..d7f558c1e711 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c | |||
@@ -604,12 +604,6 @@ pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) | |||
604 | spin_unlock(&(x)->ctx_lock); | 604 | spin_unlock(&(x)->ctx_lock); |
605 | } | 605 | } |
606 | 606 | ||
607 | static inline unsigned long | ||
608 | pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec) | ||
609 | { | ||
610 | return get_unmapped_area(file, addr, len, pgoff, flags); | ||
611 | } | ||
612 | |||
613 | /* forward declaration */ | 607 | /* forward declaration */ |
614 | static const struct dentry_operations pfmfs_dentry_operations; | 608 | static const struct dentry_operations pfmfs_dentry_operations; |
615 | 609 | ||
@@ -2333,8 +2327,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t | |||
2333 | down_write(&task->mm->mmap_sem); | 2327 | down_write(&task->mm->mmap_sem); |
2334 | 2328 | ||
2335 | /* find some free area in address space, must have mmap sem held */ | 2329 | /* find some free area in address space, must have mmap sem held */ |
2336 | vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0); | 2330 | vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS); |
2337 | if (vma->vm_start == 0UL) { | 2331 | if (IS_ERR_VALUE(vma->vm_start)) { |
2338 | DPRINT(("Cannot find unmapped area for size %ld\n", size)); | 2332 | DPRINT(("Cannot find unmapped area for size %ld\n", size)); |
2339 | up_write(&task->mm->mmap_sem); | 2333 | up_write(&task->mm->mmap_sem); |
2340 | goto error; | 2334 | goto error; |
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 609d50056a6c..d9439ef2f661 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c | |||
@@ -171,22 +171,9 @@ asmlinkage unsigned long | |||
171 | ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, | 171 | ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, |
172 | unsigned long new_addr) | 172 | unsigned long new_addr) |
173 | { | 173 | { |
174 | extern unsigned long do_mremap (unsigned long addr, | 174 | addr = sys_mremap(addr, old_len, new_len, flags, new_addr); |
175 | unsigned long old_len, | 175 | if (!IS_ERR((void *) addr)) |
176 | unsigned long new_len, | 176 | force_successful_syscall_return(); |
177 | unsigned long flags, | ||
178 | unsigned long new_addr); | ||
179 | |||
180 | down_write(¤t->mm->mmap_sem); | ||
181 | { | ||
182 | addr = do_mremap(addr, old_len, new_len, flags, new_addr); | ||
183 | } | ||
184 | up_write(¤t->mm->mmap_sem); | ||
185 | |||
186 | if (IS_ERR((void *) addr)) | ||
187 | return addr; | ||
188 | |||
189 | force_successful_syscall_return(); | ||
190 | return addr; | 177 | return addr; |
191 | } | 178 | } |
192 | 179 | ||
diff --git a/arch/m32r/include/asm/posix_types.h b/arch/m32r/include/asm/posix_types.h index 0195850e1f88..236de26a409b 100644 --- a/arch/m32r/include/asm/posix_types.h +++ b/arch/m32r/include/asm/posix_types.h | |||
@@ -10,9 +10,6 @@ | |||
10 | typedef unsigned short __kernel_mode_t; | 10 | typedef unsigned short __kernel_mode_t; |
11 | #define __kernel_mode_t __kernel_mode_t | 11 | #define __kernel_mode_t __kernel_mode_t |
12 | 12 | ||
13 | typedef unsigned short __kernel_nlink_t; | ||
14 | #define __kernel_nlink_t __kernel_nlink_t | ||
15 | |||
16 | typedef unsigned short __kernel_ipc_pid_t; | 13 | typedef unsigned short __kernel_ipc_pid_t; |
17 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t | 14 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t |
18 | 15 | ||
diff --git a/arch/m68k/include/asm/posix_types.h b/arch/m68k/include/asm/posix_types.h index 6373093be72b..cf4dbf70fdc7 100644 --- a/arch/m68k/include/asm/posix_types.h +++ b/arch/m68k/include/asm/posix_types.h | |||
@@ -10,9 +10,6 @@ | |||
10 | typedef unsigned short __kernel_mode_t; | 10 | typedef unsigned short __kernel_mode_t; |
11 | #define __kernel_mode_t __kernel_mode_t | 11 | #define __kernel_mode_t __kernel_mode_t |
12 | 12 | ||
13 | typedef unsigned short __kernel_nlink_t; | ||
14 | #define __kernel_nlink_t __kernel_nlink_t | ||
15 | |||
16 | typedef unsigned short __kernel_ipc_pid_t; | 13 | typedef unsigned short __kernel_ipc_pid_t; |
17 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t | 14 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t |
18 | 15 | ||
diff --git a/arch/mips/include/asm/posix_types.h b/arch/mips/include/asm/posix_types.h index e0308dcca135..fa03ec3fbf89 100644 --- a/arch/mips/include/asm/posix_types.h +++ b/arch/mips/include/asm/posix_types.h | |||
@@ -17,11 +17,6 @@ | |||
17 | * assume GCC is being used. | 17 | * assume GCC is being used. |
18 | */ | 18 | */ |
19 | 19 | ||
20 | #if (_MIPS_SZLONG == 64) | ||
21 | typedef unsigned int __kernel_nlink_t; | ||
22 | #define __kernel_nlink_t __kernel_nlink_t | ||
23 | #endif | ||
24 | |||
25 | typedef long __kernel_daddr_t; | 20 | typedef long __kernel_daddr_t; |
26 | #define __kernel_daddr_t __kernel_daddr_t | 21 | #define __kernel_daddr_t __kernel_daddr_t |
27 | 22 | ||
diff --git a/arch/mips/include/asm/stat.h b/arch/mips/include/asm/stat.h index 6e00f751ab6d..fe9a4c3ec5a1 100644 --- a/arch/mips/include/asm/stat.h +++ b/arch/mips/include/asm/stat.h | |||
@@ -20,7 +20,7 @@ struct stat { | |||
20 | long st_pad1[3]; /* Reserved for network id */ | 20 | long st_pad1[3]; /* Reserved for network id */ |
21 | ino_t st_ino; | 21 | ino_t st_ino; |
22 | mode_t st_mode; | 22 | mode_t st_mode; |
23 | nlink_t st_nlink; | 23 | __u32 st_nlink; |
24 | uid_t st_uid; | 24 | uid_t st_uid; |
25 | gid_t st_gid; | 25 | gid_t st_gid; |
26 | unsigned st_rdev; | 26 | unsigned st_rdev; |
@@ -55,7 +55,7 @@ struct stat64 { | |||
55 | unsigned long long st_ino; | 55 | unsigned long long st_ino; |
56 | 56 | ||
57 | mode_t st_mode; | 57 | mode_t st_mode; |
58 | nlink_t st_nlink; | 58 | __u32 st_nlink; |
59 | 59 | ||
60 | uid_t st_uid; | 60 | uid_t st_uid; |
61 | gid_t st_gid; | 61 | gid_t st_gid; |
@@ -96,7 +96,7 @@ struct stat { | |||
96 | unsigned long st_ino; | 96 | unsigned long st_ino; |
97 | 97 | ||
98 | mode_t st_mode; | 98 | mode_t st_mode; |
99 | nlink_t st_nlink; | 99 | __u32 st_nlink; |
100 | 100 | ||
101 | uid_t st_uid; | 101 | uid_t st_uid; |
102 | gid_t st_gid; | 102 | gid_t st_gid; |
diff --git a/arch/mn10300/include/asm/posix_types.h b/arch/mn10300/include/asm/posix_types.h index ab506181ec31..d31eeea480cf 100644 --- a/arch/mn10300/include/asm/posix_types.h +++ b/arch/mn10300/include/asm/posix_types.h | |||
@@ -20,9 +20,6 @@ | |||
20 | typedef unsigned short __kernel_mode_t; | 20 | typedef unsigned short __kernel_mode_t; |
21 | #define __kernel_mode_t __kernel_mode_t | 21 | #define __kernel_mode_t __kernel_mode_t |
22 | 22 | ||
23 | typedef unsigned short __kernel_nlink_t; | ||
24 | #define __kernel_nlink_t __kernel_nlink_t | ||
25 | |||
26 | typedef unsigned short __kernel_ipc_pid_t; | 23 | typedef unsigned short __kernel_ipc_pid_t; |
27 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t | 24 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t |
28 | 25 | ||
diff --git a/arch/parisc/include/asm/posix_types.h b/arch/parisc/include/asm/posix_types.h index 5212b0357daf..b9344256f76b 100644 --- a/arch/parisc/include/asm/posix_types.h +++ b/arch/parisc/include/asm/posix_types.h | |||
@@ -10,9 +10,6 @@ | |||
10 | typedef unsigned short __kernel_mode_t; | 10 | typedef unsigned short __kernel_mode_t; |
11 | #define __kernel_mode_t __kernel_mode_t | 11 | #define __kernel_mode_t __kernel_mode_t |
12 | 12 | ||
13 | typedef unsigned short __kernel_nlink_t; | ||
14 | #define __kernel_nlink_t __kernel_nlink_t | ||
15 | |||
16 | typedef unsigned short __kernel_ipc_pid_t; | 13 | typedef unsigned short __kernel_ipc_pid_t; |
17 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t | 14 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t |
18 | 15 | ||
diff --git a/arch/parisc/include/asm/stat.h b/arch/parisc/include/asm/stat.h index 9d5fbbc5c31f..d76fbda5d62c 100644 --- a/arch/parisc/include/asm/stat.h +++ b/arch/parisc/include/asm/stat.h | |||
@@ -7,7 +7,7 @@ struct stat { | |||
7 | unsigned int st_dev; /* dev_t is 32 bits on parisc */ | 7 | unsigned int st_dev; /* dev_t is 32 bits on parisc */ |
8 | ino_t st_ino; /* 32 bits */ | 8 | ino_t st_ino; /* 32 bits */ |
9 | mode_t st_mode; /* 16 bits */ | 9 | mode_t st_mode; /* 16 bits */ |
10 | nlink_t st_nlink; /* 16 bits */ | 10 | unsigned short st_nlink; /* 16 bits */ |
11 | unsigned short st_reserved1; /* old st_uid */ | 11 | unsigned short st_reserved1; /* old st_uid */ |
12 | unsigned short st_reserved2; /* old st_gid */ | 12 | unsigned short st_reserved2; /* old st_gid */ |
13 | unsigned int st_rdev; | 13 | unsigned int st_rdev; |
@@ -42,7 +42,7 @@ struct hpux_stat64 { | |||
42 | unsigned int st_dev; /* dev_t is 32 bits on parisc */ | 42 | unsigned int st_dev; /* dev_t is 32 bits on parisc */ |
43 | ino_t st_ino; /* 32 bits */ | 43 | ino_t st_ino; /* 32 bits */ |
44 | mode_t st_mode; /* 16 bits */ | 44 | mode_t st_mode; /* 16 bits */ |
45 | nlink_t st_nlink; /* 16 bits */ | 45 | unsigned short st_nlink; /* 16 bits */ |
46 | unsigned short st_reserved1; /* old st_uid */ | 46 | unsigned short st_reserved1; /* old st_uid */ |
47 | unsigned short st_reserved2; /* old st_gid */ | 47 | unsigned short st_reserved2; /* old st_gid */ |
48 | unsigned int st_rdev; | 48 | unsigned int st_rdev; |
diff --git a/arch/powerpc/include/asm/posix_types.h b/arch/powerpc/include/asm/posix_types.h index f1393252bbda..2958c5b97b2d 100644 --- a/arch/powerpc/include/asm/posix_types.h +++ b/arch/powerpc/include/asm/posix_types.h | |||
@@ -16,9 +16,6 @@ typedef int __kernel_ssize_t; | |||
16 | typedef long __kernel_ptrdiff_t; | 16 | typedef long __kernel_ptrdiff_t; |
17 | #define __kernel_size_t __kernel_size_t | 17 | #define __kernel_size_t __kernel_size_t |
18 | 18 | ||
19 | typedef unsigned short __kernel_nlink_t; | ||
20 | #define __kernel_nlink_t __kernel_nlink_t | ||
21 | |||
22 | typedef short __kernel_ipc_pid_t; | 19 | typedef short __kernel_ipc_pid_t; |
23 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t | 20 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t |
24 | #endif | 21 | #endif |
diff --git a/arch/powerpc/include/asm/stat.h b/arch/powerpc/include/asm/stat.h index e4edc510b530..10cfb558e0fd 100644 --- a/arch/powerpc/include/asm/stat.h +++ b/arch/powerpc/include/asm/stat.h | |||
@@ -30,11 +30,11 @@ struct stat { | |||
30 | unsigned long st_dev; | 30 | unsigned long st_dev; |
31 | ino_t st_ino; | 31 | ino_t st_ino; |
32 | #ifdef __powerpc64__ | 32 | #ifdef __powerpc64__ |
33 | nlink_t st_nlink; | 33 | unsigned short st_nlink; |
34 | mode_t st_mode; | 34 | mode_t st_mode; |
35 | #else | 35 | #else |
36 | mode_t st_mode; | 36 | mode_t st_mode; |
37 | nlink_t st_nlink; | 37 | unsigned short st_nlink; |
38 | #endif | 38 | #endif |
39 | uid_t st_uid; | 39 | uid_t st_uid; |
40 | gid_t st_gid; | 40 | gid_t st_gid; |
diff --git a/arch/s390/include/asm/posix_types.h b/arch/s390/include/asm/posix_types.h index edf8527ff08d..7be104c0f192 100644 --- a/arch/s390/include/asm/posix_types.h +++ b/arch/s390/include/asm/posix_types.h | |||
@@ -24,7 +24,6 @@ typedef unsigned short __kernel_old_dev_t; | |||
24 | 24 | ||
25 | typedef unsigned long __kernel_ino_t; | 25 | typedef unsigned long __kernel_ino_t; |
26 | typedef unsigned short __kernel_mode_t; | 26 | typedef unsigned short __kernel_mode_t; |
27 | typedef unsigned short __kernel_nlink_t; | ||
28 | typedef unsigned short __kernel_ipc_pid_t; | 27 | typedef unsigned short __kernel_ipc_pid_t; |
29 | typedef unsigned short __kernel_uid_t; | 28 | typedef unsigned short __kernel_uid_t; |
30 | typedef unsigned short __kernel_gid_t; | 29 | typedef unsigned short __kernel_gid_t; |
@@ -35,7 +34,6 @@ typedef int __kernel_ptrdiff_t; | |||
35 | 34 | ||
36 | typedef unsigned int __kernel_ino_t; | 35 | typedef unsigned int __kernel_ino_t; |
37 | typedef unsigned int __kernel_mode_t; | 36 | typedef unsigned int __kernel_mode_t; |
38 | typedef unsigned int __kernel_nlink_t; | ||
39 | typedef int __kernel_ipc_pid_t; | 37 | typedef int __kernel_ipc_pid_t; |
40 | typedef unsigned int __kernel_uid_t; | 38 | typedef unsigned int __kernel_uid_t; |
41 | typedef unsigned int __kernel_gid_t; | 39 | typedef unsigned int __kernel_gid_t; |
@@ -47,7 +45,6 @@ typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ | |||
47 | 45 | ||
48 | #define __kernel_ino_t __kernel_ino_t | 46 | #define __kernel_ino_t __kernel_ino_t |
49 | #define __kernel_mode_t __kernel_mode_t | 47 | #define __kernel_mode_t __kernel_mode_t |
50 | #define __kernel_nlink_t __kernel_nlink_t | ||
51 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t | 48 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t |
52 | #define __kernel_uid_t __kernel_uid_t | 49 | #define __kernel_uid_t __kernel_uid_t |
53 | #define __kernel_gid_t __kernel_gid_t | 50 | #define __kernel_gid_t __kernel_gid_t |
diff --git a/arch/sh/include/asm/posix_types_32.h b/arch/sh/include/asm/posix_types_32.h index abda58467ece..ba0bdc423b07 100644 --- a/arch/sh/include/asm/posix_types_32.h +++ b/arch/sh/include/asm/posix_types_32.h | |||
@@ -3,8 +3,6 @@ | |||
3 | 3 | ||
4 | typedef unsigned short __kernel_mode_t; | 4 | typedef unsigned short __kernel_mode_t; |
5 | #define __kernel_mode_t __kernel_mode_t | 5 | #define __kernel_mode_t __kernel_mode_t |
6 | typedef unsigned short __kernel_nlink_t; | ||
7 | #define __kernel_nlink_t __kernel_nlink_t | ||
8 | typedef unsigned short __kernel_ipc_pid_t; | 6 | typedef unsigned short __kernel_ipc_pid_t; |
9 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t | 7 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t |
10 | typedef unsigned short __kernel_uid_t; | 8 | typedef unsigned short __kernel_uid_t; |
diff --git a/arch/sh/include/asm/posix_types_64.h b/arch/sh/include/asm/posix_types_64.h index fcda07b4a616..244f7e950e17 100644 --- a/arch/sh/include/asm/posix_types_64.h +++ b/arch/sh/include/asm/posix_types_64.h | |||
@@ -3,8 +3,6 @@ | |||
3 | 3 | ||
4 | typedef unsigned short __kernel_mode_t; | 4 | typedef unsigned short __kernel_mode_t; |
5 | #define __kernel_mode_t __kernel_mode_t | 5 | #define __kernel_mode_t __kernel_mode_t |
6 | typedef unsigned short __kernel_nlink_t; | ||
7 | #define __kernel_nlink_t __kernel_nlink_t | ||
8 | typedef unsigned short __kernel_ipc_pid_t; | 6 | typedef unsigned short __kernel_ipc_pid_t; |
9 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t | 7 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t |
10 | typedef unsigned short __kernel_uid_t; | 8 | typedef unsigned short __kernel_uid_t; |
diff --git a/arch/sparc/include/asm/posix_types.h b/arch/sparc/include/asm/posix_types.h index 3070f25ae90a..156220ed99eb 100644 --- a/arch/sparc/include/asm/posix_types.h +++ b/arch/sparc/include/asm/posix_types.h | |||
@@ -9,8 +9,6 @@ | |||
9 | 9 | ||
10 | #if defined(__sparc__) && defined(__arch64__) | 10 | #if defined(__sparc__) && defined(__arch64__) |
11 | /* sparc 64 bit */ | 11 | /* sparc 64 bit */ |
12 | typedef unsigned int __kernel_nlink_t; | ||
13 | #define __kernel_nlink_t __kernel_nlink_t | ||
14 | 12 | ||
15 | typedef unsigned short __kernel_old_uid_t; | 13 | typedef unsigned short __kernel_old_uid_t; |
16 | typedef unsigned short __kernel_old_gid_t; | 14 | typedef unsigned short __kernel_old_gid_t; |
@@ -38,9 +36,6 @@ typedef unsigned short __kernel_gid_t; | |||
38 | typedef unsigned short __kernel_mode_t; | 36 | typedef unsigned short __kernel_mode_t; |
39 | #define __kernel_mode_t __kernel_mode_t | 37 | #define __kernel_mode_t __kernel_mode_t |
40 | 38 | ||
41 | typedef short __kernel_nlink_t; | ||
42 | #define __kernel_nlink_t __kernel_nlink_t | ||
43 | |||
44 | typedef long __kernel_daddr_t; | 39 | typedef long __kernel_daddr_t; |
45 | #define __kernel_daddr_t __kernel_daddr_t | 40 | #define __kernel_daddr_t __kernel_daddr_t |
46 | 41 | ||
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 3ee51f189a55..275f74fd6f6a 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c | |||
@@ -580,16 +580,9 @@ SYSCALL_DEFINE5(64_mremap, unsigned long, addr, unsigned long, old_len, | |||
580 | unsigned long, new_len, unsigned long, flags, | 580 | unsigned long, new_len, unsigned long, flags, |
581 | unsigned long, new_addr) | 581 | unsigned long, new_addr) |
582 | { | 582 | { |
583 | unsigned long ret = -EINVAL; | ||
584 | |||
585 | if (test_thread_flag(TIF_32BIT)) | 583 | if (test_thread_flag(TIF_32BIT)) |
586 | goto out; | 584 | return -EINVAL; |
587 | 585 | return sys_mremap(addr, old_len, new_len, flags, new_addr); | |
588 | down_write(¤t->mm->mmap_sem); | ||
589 | ret = do_mremap(addr, old_len, new_len, flags, new_addr); | ||
590 | up_write(¤t->mm->mmap_sem); | ||
591 | out: | ||
592 | return ret; | ||
593 | } | 586 | } |
594 | 587 | ||
595 | /* we come to here via sys_nis_syscall so it can setup the regs argument */ | 588 | /* we come to here via sys_nis_syscall so it can setup the regs argument */ |
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index 69adc08d36a5..6e74450ff0a1 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h | |||
@@ -44,7 +44,6 @@ typedef __kernel_uid32_t __compat_gid32_t; | |||
44 | typedef __kernel_mode_t compat_mode_t; | 44 | typedef __kernel_mode_t compat_mode_t; |
45 | typedef __kernel_dev_t compat_dev_t; | 45 | typedef __kernel_dev_t compat_dev_t; |
46 | typedef __kernel_loff_t compat_loff_t; | 46 | typedef __kernel_loff_t compat_loff_t; |
47 | typedef __kernel_nlink_t compat_nlink_t; | ||
48 | typedef __kernel_ipc_pid_t compat_ipc_pid_t; | 47 | typedef __kernel_ipc_pid_t compat_ipc_pid_t; |
49 | typedef __kernel_daddr_t compat_daddr_t; | 48 | typedef __kernel_daddr_t compat_daddr_t; |
50 | typedef __kernel_fsid_t compat_fsid_t; | 49 | typedef __kernel_fsid_t compat_fsid_t; |
diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/asm/posix_types_32.h index 99f262e04b91..8e525059e7d8 100644 --- a/arch/x86/include/asm/posix_types_32.h +++ b/arch/x86/include/asm/posix_types_32.h | |||
@@ -10,9 +10,6 @@ | |||
10 | typedef unsigned short __kernel_mode_t; | 10 | typedef unsigned short __kernel_mode_t; |
11 | #define __kernel_mode_t __kernel_mode_t | 11 | #define __kernel_mode_t __kernel_mode_t |
12 | 12 | ||
13 | typedef unsigned short __kernel_nlink_t; | ||
14 | #define __kernel_nlink_t __kernel_nlink_t | ||
15 | |||
16 | typedef unsigned short __kernel_ipc_pid_t; | 13 | typedef unsigned short __kernel_ipc_pid_t; |
17 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t | 14 | #define __kernel_ipc_pid_t __kernel_ipc_pid_t |
18 | 15 | ||
diff --git a/drivers/base/soc.c b/drivers/base/soc.c index ba29b2e73d48..72b5e7280d14 100644 --- a/drivers/base/soc.c +++ b/drivers/base/soc.c | |||
@@ -42,7 +42,7 @@ struct device *soc_device_to_device(struct soc_device *soc_dev) | |||
42 | return &soc_dev->dev; | 42 | return &soc_dev->dev; |
43 | } | 43 | } |
44 | 44 | ||
45 | static mode_t soc_attribute_mode(struct kobject *kobj, | 45 | static umode_t soc_attribute_mode(struct kobject *kobj, |
46 | struct attribute *attr, | 46 | struct attribute *attr, |
47 | int index) | 47 | int index) |
48 | { | 48 | { |
diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c index f920fb5e42b6..fa9439159ebd 100644 --- a/drivers/gpu/drm/i810/i810_dma.c +++ b/drivers/gpu/drm/i810/i810_dma.c | |||
@@ -130,11 +130,10 @@ static int i810_map_buffer(struct drm_buf *buf, struct drm_file *file_priv) | |||
130 | return -EINVAL; | 130 | return -EINVAL; |
131 | 131 | ||
132 | /* This is all entirely broken */ | 132 | /* This is all entirely broken */ |
133 | down_write(¤t->mm->mmap_sem); | ||
134 | old_fops = file_priv->filp->f_op; | 133 | old_fops = file_priv->filp->f_op; |
135 | file_priv->filp->f_op = &i810_buffer_fops; | 134 | file_priv->filp->f_op = &i810_buffer_fops; |
136 | dev_priv->mmap_buffer = buf; | 135 | dev_priv->mmap_buffer = buf; |
137 | buf_priv->virtual = (void *)do_mmap(file_priv->filp, 0, buf->total, | 136 | buf_priv->virtual = (void *)vm_mmap(file_priv->filp, 0, buf->total, |
138 | PROT_READ | PROT_WRITE, | 137 | PROT_READ | PROT_WRITE, |
139 | MAP_SHARED, buf->bus_address); | 138 | MAP_SHARED, buf->bus_address); |
140 | dev_priv->mmap_buffer = NULL; | 139 | dev_priv->mmap_buffer = NULL; |
@@ -145,7 +144,6 @@ static int i810_map_buffer(struct drm_buf *buf, struct drm_file *file_priv) | |||
145 | retcode = PTR_ERR(buf_priv->virtual); | 144 | retcode = PTR_ERR(buf_priv->virtual); |
146 | buf_priv->virtual = NULL; | 145 | buf_priv->virtual = NULL; |
147 | } | 146 | } |
148 | up_write(¤t->mm->mmap_sem); | ||
149 | 147 | ||
150 | return retcode; | 148 | return retcode; |
151 | } | 149 | } |
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index a1e6c990cd41..e3dd2a1e2bfc 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c | |||
@@ -68,24 +68,6 @@ static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode) | |||
68 | return current_fsgid(); | 68 | return current_fsgid(); |
69 | } | 69 | } |
70 | 70 | ||
71 | /** | ||
72 | * v9fs_dentry_from_dir_inode - helper function to get the dentry from | ||
73 | * dir inode. | ||
74 | * | ||
75 | */ | ||
76 | |||
77 | static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode) | ||
78 | { | ||
79 | struct dentry *dentry; | ||
80 | |||
81 | spin_lock(&inode->i_lock); | ||
82 | /* Directory should have only one entry. */ | ||
83 | BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry)); | ||
84 | dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); | ||
85 | spin_unlock(&inode->i_lock); | ||
86 | return dentry; | ||
87 | } | ||
88 | |||
89 | static int v9fs_test_inode_dotl(struct inode *inode, void *data) | 71 | static int v9fs_test_inode_dotl(struct inode *inode, void *data) |
90 | { | 72 | { |
91 | struct v9fs_inode *v9inode = V9FS_I(inode); | 73 | struct v9fs_inode *v9inode = V9FS_I(inode); |
@@ -415,7 +397,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, | |||
415 | if (dir->i_mode & S_ISGID) | 397 | if (dir->i_mode & S_ISGID) |
416 | omode |= S_ISGID; | 398 | omode |= S_ISGID; |
417 | 399 | ||
418 | dir_dentry = v9fs_dentry_from_dir_inode(dir); | 400 | dir_dentry = dentry->d_parent; |
419 | dfid = v9fs_fid_lookup(dir_dentry); | 401 | dfid = v9fs_fid_lookup(dir_dentry); |
420 | if (IS_ERR(dfid)) { | 402 | if (IS_ERR(dfid)) { |
421 | err = PTR_ERR(dfid); | 403 | err = PTR_ERR(dfid); |
@@ -793,7 +775,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, | |||
793 | dir->i_ino, old_dentry->d_name.name, dentry->d_name.name); | 775 | dir->i_ino, old_dentry->d_name.name, dentry->d_name.name); |
794 | 776 | ||
795 | v9ses = v9fs_inode2v9ses(dir); | 777 | v9ses = v9fs_inode2v9ses(dir); |
796 | dir_dentry = v9fs_dentry_from_dir_inode(dir); | 778 | dir_dentry = dentry->d_parent; |
797 | dfid = v9fs_fid_lookup(dir_dentry); | 779 | dfid = v9fs_fid_lookup(dir_dentry); |
798 | if (IS_ERR(dfid)) | 780 | if (IS_ERR(dfid)) |
799 | return PTR_ERR(dfid); | 781 | return PTR_ERR(dfid); |
@@ -858,7 +840,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, | |||
858 | return -EINVAL; | 840 | return -EINVAL; |
859 | 841 | ||
860 | v9ses = v9fs_inode2v9ses(dir); | 842 | v9ses = v9fs_inode2v9ses(dir); |
861 | dir_dentry = v9fs_dentry_from_dir_inode(dir); | 843 | dir_dentry = dentry->d_parent; |
862 | dfid = v9fs_fid_lookup(dir_dentry); | 844 | dfid = v9fs_fid_lookup(dir_dentry); |
863 | if (IS_ERR(dfid)) { | 845 | if (IS_ERR(dfid)) { |
864 | err = PTR_ERR(dfid); | 846 | err = PTR_ERR(dfid); |
diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 45a0ce45d7b4..1fceb320d2f2 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h | |||
@@ -18,14 +18,6 @@ | |||
18 | #define AFFS_GET_HASHENTRY(data,hashkey) be32_to_cpu(((struct dir_front *)data)->hashtable[hashkey]) | 18 | #define AFFS_GET_HASHENTRY(data,hashkey) be32_to_cpu(((struct dir_front *)data)->hashtable[hashkey]) |
19 | #define AFFS_BLOCK(sb, bh, blk) (AFFS_HEAD(bh)->table[AFFS_SB(sb)->s_hashsize-1-(blk)]) | 19 | #define AFFS_BLOCK(sb, bh, blk) (AFFS_HEAD(bh)->table[AFFS_SB(sb)->s_hashsize-1-(blk)]) |
20 | 20 | ||
21 | #ifdef __LITTLE_ENDIAN | ||
22 | #define BO_EXBITS 0x18UL | ||
23 | #elif defined(__BIG_ENDIAN) | ||
24 | #define BO_EXBITS 0x00UL | ||
25 | #else | ||
26 | #error Endianness must be known for affs to work. | ||
27 | #endif | ||
28 | |||
29 | #define AFFS_HEAD(bh) ((struct affs_head *)(bh)->b_data) | 21 | #define AFFS_HEAD(bh) ((struct affs_head *)(bh)->b_data) |
30 | #define AFFS_TAIL(sb, bh) ((struct affs_tail *)((bh)->b_data+(sb)->s_blocksize-sizeof(struct affs_tail))) | 22 | #define AFFS_TAIL(sb, bh) ((struct affs_tail *)((bh)->b_data+(sb)->s_blocksize-sizeof(struct affs_tail))) |
31 | #define AFFS_ROOT_HEAD(bh) ((struct affs_root_head *)(bh)->b_data) | 23 | #define AFFS_ROOT_HEAD(bh) ((struct affs_root_head *)(bh)->b_data) |
@@ -134,9 +134,9 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
134 | info->mmap_size = nr_pages * PAGE_SIZE; | 134 | info->mmap_size = nr_pages * PAGE_SIZE; |
135 | dprintk("attempting mmap of %lu bytes\n", info->mmap_size); | 135 | dprintk("attempting mmap of %lu bytes\n", info->mmap_size); |
136 | down_write(&ctx->mm->mmap_sem); | 136 | down_write(&ctx->mm->mmap_sem); |
137 | info->mmap_base = do_mmap(NULL, 0, info->mmap_size, | 137 | info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, |
138 | PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, | 138 | PROT_READ|PROT_WRITE, |
139 | 0); | 139 | MAP_ANONYMOUS|MAP_PRIVATE, 0); |
140 | if (IS_ERR((void *)info->mmap_base)) { | 140 | if (IS_ERR((void *)info->mmap_base)) { |
141 | up_write(&ctx->mm->mmap_sem); | 141 | up_write(&ctx->mm->mmap_sem); |
142 | info->mmap_size = 0; | 142 | info->mmap_size = 0; |
@@ -176,6 +176,11 @@ int notify_change(struct dentry * dentry, struct iattr * attr) | |||
176 | return -EPERM; | 176 | return -EPERM; |
177 | } | 177 | } |
178 | 178 | ||
179 | if ((ia_valid & ATTR_SIZE) && IS_I_VERSION(inode)) { | ||
180 | if (attr->ia_size != inode->i_size) | ||
181 | inode_inc_iversion(inode); | ||
182 | } | ||
183 | |||
179 | if ((ia_valid & ATTR_MODE)) { | 184 | if ((ia_valid & ATTR_MODE)) { |
180 | umode_t amode = attr->ia_mode; | 185 | umode_t amode = attr->ia_mode; |
181 | /* Flag setting protected by i_mutex */ | 186 | /* Flag setting protected by i_mutex */ |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index e658dd134b95..1b52956afe33 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -329,7 +329,6 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, | |||
329 | if (!size) | 329 | if (!size) |
330 | return addr; | 330 | return addr; |
331 | 331 | ||
332 | down_write(¤t->mm->mmap_sem); | ||
333 | /* | 332 | /* |
334 | * total_size is the size of the ELF (interpreter) image. | 333 | * total_size is the size of the ELF (interpreter) image. |
335 | * The _first_ mmap needs to know the full size, otherwise | 334 | * The _first_ mmap needs to know the full size, otherwise |
@@ -340,13 +339,12 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, | |||
340 | */ | 339 | */ |
341 | if (total_size) { | 340 | if (total_size) { |
342 | total_size = ELF_PAGEALIGN(total_size); | 341 | total_size = ELF_PAGEALIGN(total_size); |
343 | map_addr = do_mmap(filep, addr, total_size, prot, type, off); | 342 | map_addr = vm_mmap(filep, addr, total_size, prot, type, off); |
344 | if (!BAD_ADDR(map_addr)) | 343 | if (!BAD_ADDR(map_addr)) |
345 | do_munmap(current->mm, map_addr+size, total_size-size); | 344 | vm_munmap(map_addr+size, total_size-size); |
346 | } else | 345 | } else |
347 | map_addr = do_mmap(filep, addr, size, prot, type, off); | 346 | map_addr = vm_mmap(filep, addr, size, prot, type, off); |
348 | 347 | ||
349 | up_write(¤t->mm->mmap_sem); | ||
350 | return(map_addr); | 348 | return(map_addr); |
351 | } | 349 | } |
352 | 350 | ||
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 6b2daf99fab8..178cb70acc26 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -562,7 +562,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
562 | realdatastart = (unsigned long) -ENOMEM; | 562 | realdatastart = (unsigned long) -ENOMEM; |
563 | printk("Unable to allocate RAM for process data, errno %d\n", | 563 | printk("Unable to allocate RAM for process data, errno %d\n", |
564 | (int)-realdatastart); | 564 | (int)-realdatastart); |
565 | do_munmap(current->mm, textpos, text_len); | 565 | vm_munmap(textpos, text_len); |
566 | ret = realdatastart; | 566 | ret = realdatastart; |
567 | goto err; | 567 | goto err; |
568 | } | 568 | } |
@@ -586,8 +586,8 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
586 | } | 586 | } |
587 | if (IS_ERR_VALUE(result)) { | 587 | if (IS_ERR_VALUE(result)) { |
588 | printk("Unable to read data+bss, errno %d\n", (int)-result); | 588 | printk("Unable to read data+bss, errno %d\n", (int)-result); |
589 | do_munmap(current->mm, textpos, text_len); | 589 | vm_munmap(textpos, text_len); |
590 | do_munmap(current->mm, realdatastart, len); | 590 | vm_munmap(realdatastart, len); |
591 | ret = result; | 591 | ret = result; |
592 | goto err; | 592 | goto err; |
593 | } | 593 | } |
@@ -654,7 +654,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
654 | } | 654 | } |
655 | if (IS_ERR_VALUE(result)) { | 655 | if (IS_ERR_VALUE(result)) { |
656 | printk("Unable to read code+data+bss, errno %d\n",(int)-result); | 656 | printk("Unable to read code+data+bss, errno %d\n",(int)-result); |
657 | do_munmap(current->mm, textpos, text_len + data_len + extra + | 657 | vm_munmap(textpos, text_len + data_len + extra + |
658 | MAX_SHARED_LIBS * sizeof(unsigned long)); | 658 | MAX_SHARED_LIBS * sizeof(unsigned long)); |
659 | ret = result; | 659 | ret = result; |
660 | goto err; | 660 | goto err; |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 89b156d85d63..761e2cd8fed1 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -227,7 +227,11 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, | |||
227 | if (ret > 0) { | 227 | if (ret > 0) { |
228 | /* we need an acl */ | 228 | /* we need an acl */ |
229 | ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); | 229 | ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); |
230 | } else { | ||
231 | cache_no_acl(inode); | ||
230 | } | 232 | } |
233 | } else { | ||
234 | cache_no_acl(inode); | ||
231 | } | 235 | } |
232 | failed: | 236 | failed: |
233 | posix_acl_release(acl); | 237 | posix_acl_release(acl); |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index bcec06750232..3f75895c919b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -24,22 +24,135 @@ | |||
24 | #include "delayed-ref.h" | 24 | #include "delayed-ref.h" |
25 | #include "locking.h" | 25 | #include "locking.h" |
26 | 26 | ||
27 | struct extent_inode_elem { | ||
28 | u64 inum; | ||
29 | u64 offset; | ||
30 | struct extent_inode_elem *next; | ||
31 | }; | ||
32 | |||
33 | static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb, | ||
34 | struct btrfs_file_extent_item *fi, | ||
35 | u64 extent_item_pos, | ||
36 | struct extent_inode_elem **eie) | ||
37 | { | ||
38 | u64 data_offset; | ||
39 | u64 data_len; | ||
40 | struct extent_inode_elem *e; | ||
41 | |||
42 | data_offset = btrfs_file_extent_offset(eb, fi); | ||
43 | data_len = btrfs_file_extent_num_bytes(eb, fi); | ||
44 | |||
45 | if (extent_item_pos < data_offset || | ||
46 | extent_item_pos >= data_offset + data_len) | ||
47 | return 1; | ||
48 | |||
49 | e = kmalloc(sizeof(*e), GFP_NOFS); | ||
50 | if (!e) | ||
51 | return -ENOMEM; | ||
52 | |||
53 | e->next = *eie; | ||
54 | e->inum = key->objectid; | ||
55 | e->offset = key->offset + (extent_item_pos - data_offset); | ||
56 | *eie = e; | ||
57 | |||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte, | ||
62 | u64 extent_item_pos, | ||
63 | struct extent_inode_elem **eie) | ||
64 | { | ||
65 | u64 disk_byte; | ||
66 | struct btrfs_key key; | ||
67 | struct btrfs_file_extent_item *fi; | ||
68 | int slot; | ||
69 | int nritems; | ||
70 | int extent_type; | ||
71 | int ret; | ||
72 | |||
73 | /* | ||
74 | * from the shared data ref, we only have the leaf but we need | ||
75 | * the key. thus, we must look into all items and see that we | ||
76 | * find one (some) with a reference to our extent item. | ||
77 | */ | ||
78 | nritems = btrfs_header_nritems(eb); | ||
79 | for (slot = 0; slot < nritems; ++slot) { | ||
80 | btrfs_item_key_to_cpu(eb, &key, slot); | ||
81 | if (key.type != BTRFS_EXTENT_DATA_KEY) | ||
82 | continue; | ||
83 | fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | ||
84 | extent_type = btrfs_file_extent_type(eb, fi); | ||
85 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) | ||
86 | continue; | ||
87 | /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ | ||
88 | disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); | ||
89 | if (disk_byte != wanted_disk_byte) | ||
90 | continue; | ||
91 | |||
92 | ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie); | ||
93 | if (ret < 0) | ||
94 | return ret; | ||
95 | } | ||
96 | |||
97 | return 0; | ||
98 | } | ||
99 | |||
27 | /* | 100 | /* |
28 | * this structure records all encountered refs on the way up to the root | 101 | * this structure records all encountered refs on the way up to the root |
29 | */ | 102 | */ |
30 | struct __prelim_ref { | 103 | struct __prelim_ref { |
31 | struct list_head list; | 104 | struct list_head list; |
32 | u64 root_id; | 105 | u64 root_id; |
33 | struct btrfs_key key; | 106 | struct btrfs_key key_for_search; |
34 | int level; | 107 | int level; |
35 | int count; | 108 | int count; |
109 | struct extent_inode_elem *inode_list; | ||
36 | u64 parent; | 110 | u64 parent; |
37 | u64 wanted_disk_byte; | 111 | u64 wanted_disk_byte; |
38 | }; | 112 | }; |
39 | 113 | ||
114 | /* | ||
115 | * the rules for all callers of this function are: | ||
116 | * - obtaining the parent is the goal | ||
117 | * - if you add a key, you must know that it is a correct key | ||
118 | * - if you cannot add the parent or a correct key, then we will look into the | ||
119 | * block later to set a correct key | ||
120 | * | ||
121 | * delayed refs | ||
122 | * ============ | ||
123 | * backref type | shared | indirect | shared | indirect | ||
124 | * information | tree | tree | data | data | ||
125 | * --------------------+--------+----------+--------+---------- | ||
126 | * parent logical | y | - | - | - | ||
127 | * key to resolve | - | y | y | y | ||
128 | * tree block logical | - | - | - | - | ||
129 | * root for resolving | y | y | y | y | ||
130 | * | ||
131 | * - column 1: we've the parent -> done | ||
132 | * - column 2, 3, 4: we use the key to find the parent | ||
133 | * | ||
134 | * on disk refs (inline or keyed) | ||
135 | * ============================== | ||
136 | * backref type | shared | indirect | shared | indirect | ||
137 | * information | tree | tree | data | data | ||
138 | * --------------------+--------+----------+--------+---------- | ||
139 | * parent logical | y | - | y | - | ||
140 | * key to resolve | - | - | - | y | ||
141 | * tree block logical | y | y | y | y | ||
142 | * root for resolving | - | y | y | y | ||
143 | * | ||
144 | * - column 1, 3: we've the parent -> done | ||
145 | * - column 2: we take the first key from the block to find the parent | ||
146 | * (see __add_missing_keys) | ||
147 | * - column 4: we use the key to find the parent | ||
148 | * | ||
149 | * additional information that's available but not required to find the parent | ||
150 | * block might help in merging entries to gain some speed. | ||
151 | */ | ||
152 | |||
40 | static int __add_prelim_ref(struct list_head *head, u64 root_id, | 153 | static int __add_prelim_ref(struct list_head *head, u64 root_id, |
41 | struct btrfs_key *key, int level, u64 parent, | 154 | struct btrfs_key *key, int level, |
42 | u64 wanted_disk_byte, int count) | 155 | u64 parent, u64 wanted_disk_byte, int count) |
43 | { | 156 | { |
44 | struct __prelim_ref *ref; | 157 | struct __prelim_ref *ref; |
45 | 158 | ||
@@ -50,10 +163,11 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, | |||
50 | 163 | ||
51 | ref->root_id = root_id; | 164 | ref->root_id = root_id; |
52 | if (key) | 165 | if (key) |
53 | ref->key = *key; | 166 | ref->key_for_search = *key; |
54 | else | 167 | else |
55 | memset(&ref->key, 0, sizeof(ref->key)); | 168 | memset(&ref->key_for_search, 0, sizeof(ref->key_for_search)); |
56 | 169 | ||
170 | ref->inode_list = NULL; | ||
57 | ref->level = level; | 171 | ref->level = level; |
58 | ref->count = count; | 172 | ref->count = count; |
59 | ref->parent = parent; | 173 | ref->parent = parent; |
@@ -64,18 +178,26 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, | |||
64 | } | 178 | } |
65 | 179 | ||
66 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | 180 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, |
67 | struct ulist *parents, | 181 | struct ulist *parents, int level, |
68 | struct extent_buffer *eb, int level, | 182 | struct btrfs_key *key, u64 wanted_disk_byte, |
69 | u64 wanted_objectid, u64 wanted_disk_byte) | 183 | const u64 *extent_item_pos) |
70 | { | 184 | { |
71 | int ret; | 185 | int ret; |
72 | int slot; | 186 | int slot = path->slots[level]; |
187 | struct extent_buffer *eb = path->nodes[level]; | ||
73 | struct btrfs_file_extent_item *fi; | 188 | struct btrfs_file_extent_item *fi; |
74 | struct btrfs_key key; | 189 | struct extent_inode_elem *eie = NULL; |
75 | u64 disk_byte; | 190 | u64 disk_byte; |
191 | u64 wanted_objectid = key->objectid; | ||
76 | 192 | ||
77 | add_parent: | 193 | add_parent: |
78 | ret = ulist_add(parents, eb->start, 0, GFP_NOFS); | 194 | if (level == 0 && extent_item_pos) { |
195 | fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | ||
196 | ret = check_extent_in_eb(key, eb, fi, *extent_item_pos, &eie); | ||
197 | if (ret < 0) | ||
198 | return ret; | ||
199 | } | ||
200 | ret = ulist_add(parents, eb->start, (unsigned long)eie, GFP_NOFS); | ||
79 | if (ret < 0) | 201 | if (ret < 0) |
80 | return ret; | 202 | return ret; |
81 | 203 | ||
@@ -89,6 +211,7 @@ add_parent: | |||
89 | * repeat this until we don't find any additional EXTENT_DATA items. | 211 | * repeat this until we don't find any additional EXTENT_DATA items. |
90 | */ | 212 | */ |
91 | while (1) { | 213 | while (1) { |
214 | eie = NULL; | ||
92 | ret = btrfs_next_leaf(root, path); | 215 | ret = btrfs_next_leaf(root, path); |
93 | if (ret < 0) | 216 | if (ret < 0) |
94 | return ret; | 217 | return ret; |
@@ -97,9 +220,9 @@ add_parent: | |||
97 | 220 | ||
98 | eb = path->nodes[0]; | 221 | eb = path->nodes[0]; |
99 | for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) { | 222 | for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) { |
100 | btrfs_item_key_to_cpu(eb, &key, slot); | 223 | btrfs_item_key_to_cpu(eb, key, slot); |
101 | if (key.objectid != wanted_objectid || | 224 | if (key->objectid != wanted_objectid || |
102 | key.type != BTRFS_EXTENT_DATA_KEY) | 225 | key->type != BTRFS_EXTENT_DATA_KEY) |
103 | return 0; | 226 | return 0; |
104 | fi = btrfs_item_ptr(eb, slot, | 227 | fi = btrfs_item_ptr(eb, slot, |
105 | struct btrfs_file_extent_item); | 228 | struct btrfs_file_extent_item); |
@@ -118,8 +241,10 @@ add_parent: | |||
118 | */ | 241 | */ |
119 | static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | 242 | static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, |
120 | int search_commit_root, | 243 | int search_commit_root, |
244 | u64 time_seq, | ||
121 | struct __prelim_ref *ref, | 245 | struct __prelim_ref *ref, |
122 | struct ulist *parents) | 246 | struct ulist *parents, |
247 | const u64 *extent_item_pos) | ||
123 | { | 248 | { |
124 | struct btrfs_path *path; | 249 | struct btrfs_path *path; |
125 | struct btrfs_root *root; | 250 | struct btrfs_root *root; |
@@ -152,12 +277,13 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
152 | goto out; | 277 | goto out; |
153 | 278 | ||
154 | path->lowest_level = level; | 279 | path->lowest_level = level; |
155 | ret = btrfs_search_slot(NULL, root, &ref->key, path, 0, 0); | 280 | ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); |
156 | pr_debug("search slot in root %llu (level %d, ref count %d) returned " | 281 | pr_debug("search slot in root %llu (level %d, ref count %d) returned " |
157 | "%d for key (%llu %u %llu)\n", | 282 | "%d for key (%llu %u %llu)\n", |
158 | (unsigned long long)ref->root_id, level, ref->count, ret, | 283 | (unsigned long long)ref->root_id, level, ref->count, ret, |
159 | (unsigned long long)ref->key.objectid, ref->key.type, | 284 | (unsigned long long)ref->key_for_search.objectid, |
160 | (unsigned long long)ref->key.offset); | 285 | ref->key_for_search.type, |
286 | (unsigned long long)ref->key_for_search.offset); | ||
161 | if (ret < 0) | 287 | if (ret < 0) |
162 | goto out; | 288 | goto out; |
163 | 289 | ||
@@ -179,9 +305,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
179 | btrfs_item_key_to_cpu(eb, &key, path->slots[0]); | 305 | btrfs_item_key_to_cpu(eb, &key, path->slots[0]); |
180 | } | 306 | } |
181 | 307 | ||
182 | /* the last two parameters will only be used for level == 0 */ | 308 | ret = add_all_parents(root, path, parents, level, &key, |
183 | ret = add_all_parents(root, path, parents, eb, level, key.objectid, | 309 | ref->wanted_disk_byte, extent_item_pos); |
184 | ref->wanted_disk_byte); | ||
185 | out: | 310 | out: |
186 | btrfs_free_path(path); | 311 | btrfs_free_path(path); |
187 | return ret; | 312 | return ret; |
@@ -191,8 +316,9 @@ out: | |||
191 | * resolve all indirect backrefs from the list | 316 | * resolve all indirect backrefs from the list |
192 | */ | 317 | */ |
193 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | 318 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, |
194 | int search_commit_root, | 319 | int search_commit_root, u64 time_seq, |
195 | struct list_head *head) | 320 | struct list_head *head, |
321 | const u64 *extent_item_pos) | ||
196 | { | 322 | { |
197 | int err; | 323 | int err; |
198 | int ret = 0; | 324 | int ret = 0; |
@@ -201,6 +327,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
201 | struct __prelim_ref *new_ref; | 327 | struct __prelim_ref *new_ref; |
202 | struct ulist *parents; | 328 | struct ulist *parents; |
203 | struct ulist_node *node; | 329 | struct ulist_node *node; |
330 | struct ulist_iterator uiter; | ||
204 | 331 | ||
205 | parents = ulist_alloc(GFP_NOFS); | 332 | parents = ulist_alloc(GFP_NOFS); |
206 | if (!parents) | 333 | if (!parents) |
@@ -217,7 +344,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
217 | if (ref->count == 0) | 344 | if (ref->count == 0) |
218 | continue; | 345 | continue; |
219 | err = __resolve_indirect_ref(fs_info, search_commit_root, | 346 | err = __resolve_indirect_ref(fs_info, search_commit_root, |
220 | ref, parents); | 347 | time_seq, ref, parents, |
348 | extent_item_pos); | ||
221 | if (err) { | 349 | if (err) { |
222 | if (ret == 0) | 350 | if (ret == 0) |
223 | ret = err; | 351 | ret = err; |
@@ -225,11 +353,14 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
225 | } | 353 | } |
226 | 354 | ||
227 | /* we put the first parent into the ref at hand */ | 355 | /* we put the first parent into the ref at hand */ |
228 | node = ulist_next(parents, NULL); | 356 | ULIST_ITER_INIT(&uiter); |
357 | node = ulist_next(parents, &uiter); | ||
229 | ref->parent = node ? node->val : 0; | 358 | ref->parent = node ? node->val : 0; |
359 | ref->inode_list = | ||
360 | node ? (struct extent_inode_elem *)node->aux : 0; | ||
230 | 361 | ||
231 | /* additional parents require new refs being added here */ | 362 | /* additional parents require new refs being added here */ |
232 | while ((node = ulist_next(parents, node))) { | 363 | while ((node = ulist_next(parents, &uiter))) { |
233 | new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); | 364 | new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); |
234 | if (!new_ref) { | 365 | if (!new_ref) { |
235 | ret = -ENOMEM; | 366 | ret = -ENOMEM; |
@@ -237,6 +368,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
237 | } | 368 | } |
238 | memcpy(new_ref, ref, sizeof(*ref)); | 369 | memcpy(new_ref, ref, sizeof(*ref)); |
239 | new_ref->parent = node->val; | 370 | new_ref->parent = node->val; |
371 | new_ref->inode_list = | ||
372 | (struct extent_inode_elem *)node->aux; | ||
240 | list_add(&new_ref->list, &ref->list); | 373 | list_add(&new_ref->list, &ref->list); |
241 | } | 374 | } |
242 | ulist_reinit(parents); | 375 | ulist_reinit(parents); |
@@ -246,10 +379,65 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
246 | return ret; | 379 | return ret; |
247 | } | 380 | } |
248 | 381 | ||
382 | static inline int ref_for_same_block(struct __prelim_ref *ref1, | ||
383 | struct __prelim_ref *ref2) | ||
384 | { | ||
385 | if (ref1->level != ref2->level) | ||
386 | return 0; | ||
387 | if (ref1->root_id != ref2->root_id) | ||
388 | return 0; | ||
389 | if (ref1->key_for_search.type != ref2->key_for_search.type) | ||
390 | return 0; | ||
391 | if (ref1->key_for_search.objectid != ref2->key_for_search.objectid) | ||
392 | return 0; | ||
393 | if (ref1->key_for_search.offset != ref2->key_for_search.offset) | ||
394 | return 0; | ||
395 | if (ref1->parent != ref2->parent) | ||
396 | return 0; | ||
397 | |||
398 | return 1; | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | * read tree blocks and add keys where required. | ||
403 | */ | ||
404 | static int __add_missing_keys(struct btrfs_fs_info *fs_info, | ||
405 | struct list_head *head) | ||
406 | { | ||
407 | struct list_head *pos; | ||
408 | struct extent_buffer *eb; | ||
409 | |||
410 | list_for_each(pos, head) { | ||
411 | struct __prelim_ref *ref; | ||
412 | ref = list_entry(pos, struct __prelim_ref, list); | ||
413 | |||
414 | if (ref->parent) | ||
415 | continue; | ||
416 | if (ref->key_for_search.type) | ||
417 | continue; | ||
418 | BUG_ON(!ref->wanted_disk_byte); | ||
419 | eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, | ||
420 | fs_info->tree_root->leafsize, 0); | ||
421 | BUG_ON(!eb); | ||
422 | btrfs_tree_read_lock(eb); | ||
423 | if (btrfs_header_level(eb) == 0) | ||
424 | btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0); | ||
425 | else | ||
426 | btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0); | ||
427 | btrfs_tree_read_unlock(eb); | ||
428 | free_extent_buffer(eb); | ||
429 | } | ||
430 | return 0; | ||
431 | } | ||
432 | |||
249 | /* | 433 | /* |
250 | * merge two lists of backrefs and adjust counts accordingly | 434 | * merge two lists of backrefs and adjust counts accordingly |
251 | * | 435 | * |
252 | * mode = 1: merge identical keys, if key is set | 436 | * mode = 1: merge identical keys, if key is set |
437 | * FIXME: if we add more keys in __add_prelim_ref, we can merge more here. | ||
438 | * additionally, we could even add a key range for the blocks we | ||
439 | * looked into to merge even more (-> replace unresolved refs by those | ||
440 | * having a parent). | ||
253 | * mode = 2: merge identical parents | 441 | * mode = 2: merge identical parents |
254 | */ | 442 | */ |
255 | static int __merge_refs(struct list_head *head, int mode) | 443 | static int __merge_refs(struct list_head *head, int mode) |
@@ -263,20 +451,21 @@ static int __merge_refs(struct list_head *head, int mode) | |||
263 | 451 | ||
264 | ref1 = list_entry(pos1, struct __prelim_ref, list); | 452 | ref1 = list_entry(pos1, struct __prelim_ref, list); |
265 | 453 | ||
266 | if (mode == 1 && ref1->key.type == 0) | ||
267 | continue; | ||
268 | for (pos2 = pos1->next, n2 = pos2->next; pos2 != head; | 454 | for (pos2 = pos1->next, n2 = pos2->next; pos2 != head; |
269 | pos2 = n2, n2 = pos2->next) { | 455 | pos2 = n2, n2 = pos2->next) { |
270 | struct __prelim_ref *ref2; | 456 | struct __prelim_ref *ref2; |
457 | struct __prelim_ref *xchg; | ||
271 | 458 | ||
272 | ref2 = list_entry(pos2, struct __prelim_ref, list); | 459 | ref2 = list_entry(pos2, struct __prelim_ref, list); |
273 | 460 | ||
274 | if (mode == 1) { | 461 | if (mode == 1) { |
275 | if (memcmp(&ref1->key, &ref2->key, | 462 | if (!ref_for_same_block(ref1, ref2)) |
276 | sizeof(ref1->key)) || | ||
277 | ref1->level != ref2->level || | ||
278 | ref1->root_id != ref2->root_id) | ||
279 | continue; | 463 | continue; |
464 | if (!ref1->parent && ref2->parent) { | ||
465 | xchg = ref1; | ||
466 | ref1 = ref2; | ||
467 | ref2 = xchg; | ||
468 | } | ||
280 | ref1->count += ref2->count; | 469 | ref1->count += ref2->count; |
281 | } else { | 470 | } else { |
282 | if (ref1->parent != ref2->parent) | 471 | if (ref1->parent != ref2->parent) |
@@ -296,16 +485,17 @@ static int __merge_refs(struct list_head *head, int mode) | |||
296 | * smaller or equal that seq to the list | 485 | * smaller or equal that seq to the list |
297 | */ | 486 | */ |
298 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | 487 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, |
299 | struct btrfs_key *info_key, | ||
300 | struct list_head *prefs) | 488 | struct list_head *prefs) |
301 | { | 489 | { |
302 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; | 490 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; |
303 | struct rb_node *n = &head->node.rb_node; | 491 | struct rb_node *n = &head->node.rb_node; |
492 | struct btrfs_key key; | ||
493 | struct btrfs_key op_key = {0}; | ||
304 | int sgn; | 494 | int sgn; |
305 | int ret = 0; | 495 | int ret = 0; |
306 | 496 | ||
307 | if (extent_op && extent_op->update_key) | 497 | if (extent_op && extent_op->update_key) |
308 | btrfs_disk_key_to_cpu(info_key, &extent_op->key); | 498 | btrfs_disk_key_to_cpu(&op_key, &extent_op->key); |
309 | 499 | ||
310 | while ((n = rb_prev(n))) { | 500 | while ((n = rb_prev(n))) { |
311 | struct btrfs_delayed_ref_node *node; | 501 | struct btrfs_delayed_ref_node *node; |
@@ -337,7 +527,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
337 | struct btrfs_delayed_tree_ref *ref; | 527 | struct btrfs_delayed_tree_ref *ref; |
338 | 528 | ||
339 | ref = btrfs_delayed_node_to_tree_ref(node); | 529 | ref = btrfs_delayed_node_to_tree_ref(node); |
340 | ret = __add_prelim_ref(prefs, ref->root, info_key, | 530 | ret = __add_prelim_ref(prefs, ref->root, &op_key, |
341 | ref->level + 1, 0, node->bytenr, | 531 | ref->level + 1, 0, node->bytenr, |
342 | node->ref_mod * sgn); | 532 | node->ref_mod * sgn); |
343 | break; | 533 | break; |
@@ -346,7 +536,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
346 | struct btrfs_delayed_tree_ref *ref; | 536 | struct btrfs_delayed_tree_ref *ref; |
347 | 537 | ||
348 | ref = btrfs_delayed_node_to_tree_ref(node); | 538 | ref = btrfs_delayed_node_to_tree_ref(node); |
349 | ret = __add_prelim_ref(prefs, ref->root, info_key, | 539 | ret = __add_prelim_ref(prefs, ref->root, NULL, |
350 | ref->level + 1, ref->parent, | 540 | ref->level + 1, ref->parent, |
351 | node->bytenr, | 541 | node->bytenr, |
352 | node->ref_mod * sgn); | 542 | node->ref_mod * sgn); |
@@ -354,8 +544,6 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
354 | } | 544 | } |
355 | case BTRFS_EXTENT_DATA_REF_KEY: { | 545 | case BTRFS_EXTENT_DATA_REF_KEY: { |
356 | struct btrfs_delayed_data_ref *ref; | 546 | struct btrfs_delayed_data_ref *ref; |
357 | struct btrfs_key key; | ||
358 | |||
359 | ref = btrfs_delayed_node_to_data_ref(node); | 547 | ref = btrfs_delayed_node_to_data_ref(node); |
360 | 548 | ||
361 | key.objectid = ref->objectid; | 549 | key.objectid = ref->objectid; |
@@ -368,7 +556,6 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
368 | } | 556 | } |
369 | case BTRFS_SHARED_DATA_REF_KEY: { | 557 | case BTRFS_SHARED_DATA_REF_KEY: { |
370 | struct btrfs_delayed_data_ref *ref; | 558 | struct btrfs_delayed_data_ref *ref; |
371 | struct btrfs_key key; | ||
372 | 559 | ||
373 | ref = btrfs_delayed_node_to_data_ref(node); | 560 | ref = btrfs_delayed_node_to_data_ref(node); |
374 | 561 | ||
@@ -394,8 +581,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
394 | */ | 581 | */ |
395 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, | 582 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, |
396 | struct btrfs_path *path, u64 bytenr, | 583 | struct btrfs_path *path, u64 bytenr, |
397 | struct btrfs_key *info_key, int *info_level, | 584 | int *info_level, struct list_head *prefs) |
398 | struct list_head *prefs) | ||
399 | { | 585 | { |
400 | int ret = 0; | 586 | int ret = 0; |
401 | int slot; | 587 | int slot; |
@@ -411,7 +597,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
411 | * enumerate all inline refs | 597 | * enumerate all inline refs |
412 | */ | 598 | */ |
413 | leaf = path->nodes[0]; | 599 | leaf = path->nodes[0]; |
414 | slot = path->slots[0] - 1; | 600 | slot = path->slots[0]; |
415 | 601 | ||
416 | item_size = btrfs_item_size_nr(leaf, slot); | 602 | item_size = btrfs_item_size_nr(leaf, slot); |
417 | BUG_ON(item_size < sizeof(*ei)); | 603 | BUG_ON(item_size < sizeof(*ei)); |
@@ -424,12 +610,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
424 | 610 | ||
425 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 611 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
426 | struct btrfs_tree_block_info *info; | 612 | struct btrfs_tree_block_info *info; |
427 | struct btrfs_disk_key disk_key; | ||
428 | 613 | ||
429 | info = (struct btrfs_tree_block_info *)ptr; | 614 | info = (struct btrfs_tree_block_info *)ptr; |
430 | *info_level = btrfs_tree_block_level(leaf, info); | 615 | *info_level = btrfs_tree_block_level(leaf, info); |
431 | btrfs_tree_block_key(leaf, info, &disk_key); | ||
432 | btrfs_disk_key_to_cpu(info_key, &disk_key); | ||
433 | ptr += sizeof(struct btrfs_tree_block_info); | 616 | ptr += sizeof(struct btrfs_tree_block_info); |
434 | BUG_ON(ptr > end); | 617 | BUG_ON(ptr > end); |
435 | } else { | 618 | } else { |
@@ -447,7 +630,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
447 | 630 | ||
448 | switch (type) { | 631 | switch (type) { |
449 | case BTRFS_SHARED_BLOCK_REF_KEY: | 632 | case BTRFS_SHARED_BLOCK_REF_KEY: |
450 | ret = __add_prelim_ref(prefs, 0, info_key, | 633 | ret = __add_prelim_ref(prefs, 0, NULL, |
451 | *info_level + 1, offset, | 634 | *info_level + 1, offset, |
452 | bytenr, 1); | 635 | bytenr, 1); |
453 | break; | 636 | break; |
@@ -462,8 +645,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
462 | break; | 645 | break; |
463 | } | 646 | } |
464 | case BTRFS_TREE_BLOCK_REF_KEY: | 647 | case BTRFS_TREE_BLOCK_REF_KEY: |
465 | ret = __add_prelim_ref(prefs, offset, info_key, | 648 | ret = __add_prelim_ref(prefs, offset, NULL, |
466 | *info_level + 1, 0, bytenr, 1); | 649 | *info_level + 1, 0, |
650 | bytenr, 1); | ||
467 | break; | 651 | break; |
468 | case BTRFS_EXTENT_DATA_REF_KEY: { | 652 | case BTRFS_EXTENT_DATA_REF_KEY: { |
469 | struct btrfs_extent_data_ref *dref; | 653 | struct btrfs_extent_data_ref *dref; |
@@ -477,8 +661,8 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
477 | key.type = BTRFS_EXTENT_DATA_KEY; | 661 | key.type = BTRFS_EXTENT_DATA_KEY; |
478 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); | 662 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); |
479 | root = btrfs_extent_data_ref_root(leaf, dref); | 663 | root = btrfs_extent_data_ref_root(leaf, dref); |
480 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, bytenr, | 664 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, |
481 | count); | 665 | bytenr, count); |
482 | break; | 666 | break; |
483 | } | 667 | } |
484 | default: | 668 | default: |
@@ -496,8 +680,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
496 | */ | 680 | */ |
497 | static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | 681 | static int __add_keyed_refs(struct btrfs_fs_info *fs_info, |
498 | struct btrfs_path *path, u64 bytenr, | 682 | struct btrfs_path *path, u64 bytenr, |
499 | struct btrfs_key *info_key, int info_level, | 683 | int info_level, struct list_head *prefs) |
500 | struct list_head *prefs) | ||
501 | { | 684 | { |
502 | struct btrfs_root *extent_root = fs_info->extent_root; | 685 | struct btrfs_root *extent_root = fs_info->extent_root; |
503 | int ret; | 686 | int ret; |
@@ -527,7 +710,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
527 | 710 | ||
528 | switch (key.type) { | 711 | switch (key.type) { |
529 | case BTRFS_SHARED_BLOCK_REF_KEY: | 712 | case BTRFS_SHARED_BLOCK_REF_KEY: |
530 | ret = __add_prelim_ref(prefs, 0, info_key, | 713 | ret = __add_prelim_ref(prefs, 0, NULL, |
531 | info_level + 1, key.offset, | 714 | info_level + 1, key.offset, |
532 | bytenr, 1); | 715 | bytenr, 1); |
533 | break; | 716 | break; |
@@ -543,8 +726,9 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
543 | break; | 726 | break; |
544 | } | 727 | } |
545 | case BTRFS_TREE_BLOCK_REF_KEY: | 728 | case BTRFS_TREE_BLOCK_REF_KEY: |
546 | ret = __add_prelim_ref(prefs, key.offset, info_key, | 729 | ret = __add_prelim_ref(prefs, key.offset, NULL, |
547 | info_level + 1, 0, bytenr, 1); | 730 | info_level + 1, 0, |
731 | bytenr, 1); | ||
548 | break; | 732 | break; |
549 | case BTRFS_EXTENT_DATA_REF_KEY: { | 733 | case BTRFS_EXTENT_DATA_REF_KEY: { |
550 | struct btrfs_extent_data_ref *dref; | 734 | struct btrfs_extent_data_ref *dref; |
@@ -560,7 +744,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
560 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); | 744 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); |
561 | root = btrfs_extent_data_ref_root(leaf, dref); | 745 | root = btrfs_extent_data_ref_root(leaf, dref); |
562 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, | 746 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, |
563 | bytenr, count); | 747 | bytenr, count); |
564 | break; | 748 | break; |
565 | } | 749 | } |
566 | default: | 750 | default: |
@@ -582,11 +766,12 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
582 | */ | 766 | */ |
583 | static int find_parent_nodes(struct btrfs_trans_handle *trans, | 767 | static int find_parent_nodes(struct btrfs_trans_handle *trans, |
584 | struct btrfs_fs_info *fs_info, u64 bytenr, | 768 | struct btrfs_fs_info *fs_info, u64 bytenr, |
585 | u64 seq, struct ulist *refs, struct ulist *roots) | 769 | u64 delayed_ref_seq, u64 time_seq, |
770 | struct ulist *refs, struct ulist *roots, | ||
771 | const u64 *extent_item_pos) | ||
586 | { | 772 | { |
587 | struct btrfs_key key; | 773 | struct btrfs_key key; |
588 | struct btrfs_path *path; | 774 | struct btrfs_path *path; |
589 | struct btrfs_key info_key = { 0 }; | ||
590 | struct btrfs_delayed_ref_root *delayed_refs = NULL; | 775 | struct btrfs_delayed_ref_root *delayed_refs = NULL; |
591 | struct btrfs_delayed_ref_head *head; | 776 | struct btrfs_delayed_ref_head *head; |
592 | int info_level = 0; | 777 | int info_level = 0; |
@@ -645,7 +830,7 @@ again: | |||
645 | btrfs_put_delayed_ref(&head->node); | 830 | btrfs_put_delayed_ref(&head->node); |
646 | goto again; | 831 | goto again; |
647 | } | 832 | } |
648 | ret = __add_delayed_refs(head, seq, &info_key, | 833 | ret = __add_delayed_refs(head, delayed_ref_seq, |
649 | &prefs_delayed); | 834 | &prefs_delayed); |
650 | if (ret) { | 835 | if (ret) { |
651 | spin_unlock(&delayed_refs->lock); | 836 | spin_unlock(&delayed_refs->lock); |
@@ -659,16 +844,17 @@ again: | |||
659 | struct extent_buffer *leaf; | 844 | struct extent_buffer *leaf; |
660 | int slot; | 845 | int slot; |
661 | 846 | ||
847 | path->slots[0]--; | ||
662 | leaf = path->nodes[0]; | 848 | leaf = path->nodes[0]; |
663 | slot = path->slots[0] - 1; | 849 | slot = path->slots[0]; |
664 | btrfs_item_key_to_cpu(leaf, &key, slot); | 850 | btrfs_item_key_to_cpu(leaf, &key, slot); |
665 | if (key.objectid == bytenr && | 851 | if (key.objectid == bytenr && |
666 | key.type == BTRFS_EXTENT_ITEM_KEY) { | 852 | key.type == BTRFS_EXTENT_ITEM_KEY) { |
667 | ret = __add_inline_refs(fs_info, path, bytenr, | 853 | ret = __add_inline_refs(fs_info, path, bytenr, |
668 | &info_key, &info_level, &prefs); | 854 | &info_level, &prefs); |
669 | if (ret) | 855 | if (ret) |
670 | goto out; | 856 | goto out; |
671 | ret = __add_keyed_refs(fs_info, path, bytenr, &info_key, | 857 | ret = __add_keyed_refs(fs_info, path, bytenr, |
672 | info_level, &prefs); | 858 | info_level, &prefs); |
673 | if (ret) | 859 | if (ret) |
674 | goto out; | 860 | goto out; |
@@ -676,21 +862,18 @@ again: | |||
676 | } | 862 | } |
677 | btrfs_release_path(path); | 863 | btrfs_release_path(path); |
678 | 864 | ||
679 | /* | ||
680 | * when adding the delayed refs above, the info_key might not have | ||
681 | * been known yet. Go over the list and replace the missing keys | ||
682 | */ | ||
683 | list_for_each_entry(ref, &prefs_delayed, list) { | ||
684 | if ((ref->key.offset | ref->key.type | ref->key.objectid) == 0) | ||
685 | memcpy(&ref->key, &info_key, sizeof(ref->key)); | ||
686 | } | ||
687 | list_splice_init(&prefs_delayed, &prefs); | 865 | list_splice_init(&prefs_delayed, &prefs); |
688 | 866 | ||
867 | ret = __add_missing_keys(fs_info, &prefs); | ||
868 | if (ret) | ||
869 | goto out; | ||
870 | |||
689 | ret = __merge_refs(&prefs, 1); | 871 | ret = __merge_refs(&prefs, 1); |
690 | if (ret) | 872 | if (ret) |
691 | goto out; | 873 | goto out; |
692 | 874 | ||
693 | ret = __resolve_indirect_refs(fs_info, search_commit_root, &prefs); | 875 | ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq, |
876 | &prefs, extent_item_pos); | ||
694 | if (ret) | 877 | if (ret) |
695 | goto out; | 878 | goto out; |
696 | 879 | ||
@@ -709,7 +892,33 @@ again: | |||
709 | BUG_ON(ret < 0); | 892 | BUG_ON(ret < 0); |
710 | } | 893 | } |
711 | if (ref->count && ref->parent) { | 894 | if (ref->count && ref->parent) { |
712 | ret = ulist_add(refs, ref->parent, 0, GFP_NOFS); | 895 | struct extent_inode_elem *eie = NULL; |
896 | if (extent_item_pos && !ref->inode_list) { | ||
897 | u32 bsz; | ||
898 | struct extent_buffer *eb; | ||
899 | bsz = btrfs_level_size(fs_info->extent_root, | ||
900 | info_level); | ||
901 | eb = read_tree_block(fs_info->extent_root, | ||
902 | ref->parent, bsz, 0); | ||
903 | BUG_ON(!eb); | ||
904 | ret = find_extent_in_eb(eb, bytenr, | ||
905 | *extent_item_pos, &eie); | ||
906 | ref->inode_list = eie; | ||
907 | free_extent_buffer(eb); | ||
908 | } | ||
909 | ret = ulist_add_merge(refs, ref->parent, | ||
910 | (unsigned long)ref->inode_list, | ||
911 | (unsigned long *)&eie, GFP_NOFS); | ||
912 | if (!ret && extent_item_pos) { | ||
913 | /* | ||
914 | * we've recorded that parent, so we must extend | ||
915 | * its inode list here | ||
916 | */ | ||
917 | BUG_ON(!eie); | ||
918 | while (eie->next) | ||
919 | eie = eie->next; | ||
920 | eie->next = ref->inode_list; | ||
921 | } | ||
713 | BUG_ON(ret < 0); | 922 | BUG_ON(ret < 0); |
714 | } | 923 | } |
715 | kfree(ref); | 924 | kfree(ref); |
@@ -734,6 +943,28 @@ out: | |||
734 | return ret; | 943 | return ret; |
735 | } | 944 | } |
736 | 945 | ||
946 | static void free_leaf_list(struct ulist *blocks) | ||
947 | { | ||
948 | struct ulist_node *node = NULL; | ||
949 | struct extent_inode_elem *eie; | ||
950 | struct extent_inode_elem *eie_next; | ||
951 | struct ulist_iterator uiter; | ||
952 | |||
953 | ULIST_ITER_INIT(&uiter); | ||
954 | while ((node = ulist_next(blocks, &uiter))) { | ||
955 | if (!node->aux) | ||
956 | continue; | ||
957 | eie = (struct extent_inode_elem *)node->aux; | ||
958 | for (; eie; eie = eie_next) { | ||
959 | eie_next = eie->next; | ||
960 | kfree(eie); | ||
961 | } | ||
962 | node->aux = 0; | ||
963 | } | ||
964 | |||
965 | ulist_free(blocks); | ||
966 | } | ||
967 | |||
737 | /* | 968 | /* |
738 | * Finds all leafs with a reference to the specified combination of bytenr and | 969 | * Finds all leafs with a reference to the specified combination of bytenr and |
739 | * offset. key_list_head will point to a list of corresponding keys (caller must | 970 | * offset. key_list_head will point to a list of corresponding keys (caller must |
@@ -744,7 +975,9 @@ out: | |||
744 | */ | 975 | */ |
745 | static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | 976 | static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, |
746 | struct btrfs_fs_info *fs_info, u64 bytenr, | 977 | struct btrfs_fs_info *fs_info, u64 bytenr, |
747 | u64 num_bytes, u64 seq, struct ulist **leafs) | 978 | u64 delayed_ref_seq, u64 time_seq, |
979 | struct ulist **leafs, | ||
980 | const u64 *extent_item_pos) | ||
748 | { | 981 | { |
749 | struct ulist *tmp; | 982 | struct ulist *tmp; |
750 | int ret; | 983 | int ret; |
@@ -758,11 +991,12 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
758 | return -ENOMEM; | 991 | return -ENOMEM; |
759 | } | 992 | } |
760 | 993 | ||
761 | ret = find_parent_nodes(trans, fs_info, bytenr, seq, *leafs, tmp); | 994 | ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, |
995 | time_seq, *leafs, tmp, extent_item_pos); | ||
762 | ulist_free(tmp); | 996 | ulist_free(tmp); |
763 | 997 | ||
764 | if (ret < 0 && ret != -ENOENT) { | 998 | if (ret < 0 && ret != -ENOENT) { |
765 | ulist_free(*leafs); | 999 | free_leaf_list(*leafs); |
766 | return ret; | 1000 | return ret; |
767 | } | 1001 | } |
768 | 1002 | ||
@@ -784,10 +1018,12 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
784 | */ | 1018 | */ |
785 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 1019 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
786 | struct btrfs_fs_info *fs_info, u64 bytenr, | 1020 | struct btrfs_fs_info *fs_info, u64 bytenr, |
787 | u64 num_bytes, u64 seq, struct ulist **roots) | 1021 | u64 delayed_ref_seq, u64 time_seq, |
1022 | struct ulist **roots) | ||
788 | { | 1023 | { |
789 | struct ulist *tmp; | 1024 | struct ulist *tmp; |
790 | struct ulist_node *node = NULL; | 1025 | struct ulist_node *node = NULL; |
1026 | struct ulist_iterator uiter; | ||
791 | int ret; | 1027 | int ret; |
792 | 1028 | ||
793 | tmp = ulist_alloc(GFP_NOFS); | 1029 | tmp = ulist_alloc(GFP_NOFS); |
@@ -799,15 +1035,16 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | |||
799 | return -ENOMEM; | 1035 | return -ENOMEM; |
800 | } | 1036 | } |
801 | 1037 | ||
1038 | ULIST_ITER_INIT(&uiter); | ||
802 | while (1) { | 1039 | while (1) { |
803 | ret = find_parent_nodes(trans, fs_info, bytenr, seq, | 1040 | ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq, |
804 | tmp, *roots); | 1041 | time_seq, tmp, *roots, NULL); |
805 | if (ret < 0 && ret != -ENOENT) { | 1042 | if (ret < 0 && ret != -ENOENT) { |
806 | ulist_free(tmp); | 1043 | ulist_free(tmp); |
807 | ulist_free(*roots); | 1044 | ulist_free(*roots); |
808 | return ret; | 1045 | return ret; |
809 | } | 1046 | } |
810 | node = ulist_next(tmp, node); | 1047 | node = ulist_next(tmp, &uiter); |
811 | if (!node) | 1048 | if (!node) |
812 | break; | 1049 | break; |
813 | bytenr = node->val; | 1050 | bytenr = node->val; |
@@ -1093,67 +1330,25 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | |||
1093 | return 0; | 1330 | return 0; |
1094 | } | 1331 | } |
1095 | 1332 | ||
1096 | static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, u64 logical, | 1333 | static int iterate_leaf_refs(struct extent_inode_elem *inode_list, |
1097 | u64 orig_extent_item_objectid, | 1334 | u64 root, u64 extent_item_objectid, |
1098 | u64 extent_item_pos, u64 root, | ||
1099 | iterate_extent_inodes_t *iterate, void *ctx) | 1335 | iterate_extent_inodes_t *iterate, void *ctx) |
1100 | { | 1336 | { |
1101 | u64 disk_byte; | 1337 | struct extent_inode_elem *eie; |
1102 | struct btrfs_key key; | ||
1103 | struct btrfs_file_extent_item *fi; | ||
1104 | struct extent_buffer *eb; | ||
1105 | int slot; | ||
1106 | int nritems; | ||
1107 | int ret = 0; | 1338 | int ret = 0; |
1108 | int extent_type; | ||
1109 | u64 data_offset; | ||
1110 | u64 data_len; | ||
1111 | |||
1112 | eb = read_tree_block(fs_info->tree_root, logical, | ||
1113 | fs_info->tree_root->leafsize, 0); | ||
1114 | if (!eb) | ||
1115 | return -EIO; | ||
1116 | |||
1117 | /* | ||
1118 | * from the shared data ref, we only have the leaf but we need | ||
1119 | * the key. thus, we must look into all items and see that we | ||
1120 | * find one (some) with a reference to our extent item. | ||
1121 | */ | ||
1122 | nritems = btrfs_header_nritems(eb); | ||
1123 | for (slot = 0; slot < nritems; ++slot) { | ||
1124 | btrfs_item_key_to_cpu(eb, &key, slot); | ||
1125 | if (key.type != BTRFS_EXTENT_DATA_KEY) | ||
1126 | continue; | ||
1127 | fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); | ||
1128 | extent_type = btrfs_file_extent_type(eb, fi); | ||
1129 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) | ||
1130 | continue; | ||
1131 | /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ | ||
1132 | disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); | ||
1133 | if (disk_byte != orig_extent_item_objectid) | ||
1134 | continue; | ||
1135 | |||
1136 | data_offset = btrfs_file_extent_offset(eb, fi); | ||
1137 | data_len = btrfs_file_extent_num_bytes(eb, fi); | ||
1138 | |||
1139 | if (extent_item_pos < data_offset || | ||
1140 | extent_item_pos >= data_offset + data_len) | ||
1141 | continue; | ||
1142 | 1339 | ||
1340 | for (eie = inode_list; eie; eie = eie->next) { | ||
1143 | pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), " | 1341 | pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), " |
1144 | "root %llu\n", orig_extent_item_objectid, | 1342 | "root %llu\n", extent_item_objectid, |
1145 | key.objectid, key.offset, root); | 1343 | eie->inum, eie->offset, root); |
1146 | ret = iterate(key.objectid, | 1344 | ret = iterate(eie->inum, eie->offset, root, ctx); |
1147 | key.offset + (extent_item_pos - data_offset), | ||
1148 | root, ctx); | ||
1149 | if (ret) { | 1345 | if (ret) { |
1150 | pr_debug("stopping iteration because ret=%d\n", ret); | 1346 | pr_debug("stopping iteration for %llu due to ret=%d\n", |
1347 | extent_item_objectid, ret); | ||
1151 | break; | 1348 | break; |
1152 | } | 1349 | } |
1153 | } | 1350 | } |
1154 | 1351 | ||
1155 | free_extent_buffer(eb); | ||
1156 | |||
1157 | return ret; | 1352 | return ret; |
1158 | } | 1353 | } |
1159 | 1354 | ||
@@ -1175,7 +1370,10 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1175 | struct ulist *roots = NULL; | 1370 | struct ulist *roots = NULL; |
1176 | struct ulist_node *ref_node = NULL; | 1371 | struct ulist_node *ref_node = NULL; |
1177 | struct ulist_node *root_node = NULL; | 1372 | struct ulist_node *root_node = NULL; |
1178 | struct seq_list seq_elem; | 1373 | struct seq_list seq_elem = {}; |
1374 | struct seq_list tree_mod_seq_elem = {}; | ||
1375 | struct ulist_iterator ref_uiter; | ||
1376 | struct ulist_iterator root_uiter; | ||
1179 | struct btrfs_delayed_ref_root *delayed_refs = NULL; | 1377 | struct btrfs_delayed_ref_root *delayed_refs = NULL; |
1180 | 1378 | ||
1181 | pr_debug("resolving all inodes for extent %llu\n", | 1379 | pr_debug("resolving all inodes for extent %llu\n", |
@@ -1192,34 +1390,41 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1192 | spin_lock(&delayed_refs->lock); | 1390 | spin_lock(&delayed_refs->lock); |
1193 | btrfs_get_delayed_seq(delayed_refs, &seq_elem); | 1391 | btrfs_get_delayed_seq(delayed_refs, &seq_elem); |
1194 | spin_unlock(&delayed_refs->lock); | 1392 | spin_unlock(&delayed_refs->lock); |
1393 | btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); | ||
1195 | } | 1394 | } |
1196 | 1395 | ||
1197 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, | 1396 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, |
1198 | extent_item_pos, seq_elem.seq, | 1397 | seq_elem.seq, tree_mod_seq_elem.seq, &refs, |
1199 | &refs); | 1398 | &extent_item_pos); |
1200 | |||
1201 | if (ret) | 1399 | if (ret) |
1202 | goto out; | 1400 | goto out; |
1203 | 1401 | ||
1204 | while (!ret && (ref_node = ulist_next(refs, ref_node))) { | 1402 | ULIST_ITER_INIT(&ref_uiter); |
1205 | ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, -1, | 1403 | while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { |
1206 | seq_elem.seq, &roots); | 1404 | ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, |
1405 | seq_elem.seq, | ||
1406 | tree_mod_seq_elem.seq, &roots); | ||
1207 | if (ret) | 1407 | if (ret) |
1208 | break; | 1408 | break; |
1209 | while (!ret && (root_node = ulist_next(roots, root_node))) { | 1409 | ULIST_ITER_INIT(&root_uiter); |
1210 | pr_debug("root %llu references leaf %llu\n", | 1410 | while (!ret && (root_node = ulist_next(roots, &root_uiter))) { |
1211 | root_node->val, ref_node->val); | 1411 | pr_debug("root %llu references leaf %llu, data list " |
1212 | ret = iterate_leaf_refs(fs_info, ref_node->val, | 1412 | "%#lx\n", root_node->val, ref_node->val, |
1213 | extent_item_objectid, | 1413 | ref_node->aux); |
1214 | extent_item_pos, root_node->val, | 1414 | ret = iterate_leaf_refs( |
1215 | iterate, ctx); | 1415 | (struct extent_inode_elem *)ref_node->aux, |
1416 | root_node->val, extent_item_objectid, | ||
1417 | iterate, ctx); | ||
1216 | } | 1418 | } |
1419 | ulist_free(roots); | ||
1420 | roots = NULL; | ||
1217 | } | 1421 | } |
1218 | 1422 | ||
1219 | ulist_free(refs); | 1423 | free_leaf_list(refs); |
1220 | ulist_free(roots); | 1424 | ulist_free(roots); |
1221 | out: | 1425 | out: |
1222 | if (!search_commit_root) { | 1426 | if (!search_commit_root) { |
1427 | btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); | ||
1223 | btrfs_put_delayed_seq(delayed_refs, &seq_elem); | 1428 | btrfs_put_delayed_seq(delayed_refs, &seq_elem); |
1224 | btrfs_end_transaction(trans, fs_info->extent_root); | 1429 | btrfs_end_transaction(trans, fs_info->extent_root); |
1225 | } | 1430 | } |
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 57ea2e959e4d..c18d8ac7b795 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
@@ -58,7 +58,8 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); | |||
58 | 58 | ||
59 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 59 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
60 | struct btrfs_fs_info *fs_info, u64 bytenr, | 60 | struct btrfs_fs_info *fs_info, u64 bytenr, |
61 | u64 num_bytes, u64 seq, struct ulist **roots); | 61 | u64 delayed_ref_seq, u64 time_seq, |
62 | struct ulist **roots); | ||
62 | 63 | ||
63 | struct btrfs_data_container *init_data_container(u32 total_bytes); | 64 | struct btrfs_data_container *init_data_container(u32 total_bytes); |
64 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, | 65 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 9b9b15fd5204..e616f8872e69 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -24,6 +24,20 @@ | |||
24 | #include "ordered-data.h" | 24 | #include "ordered-data.h" |
25 | #include "delayed-inode.h" | 25 | #include "delayed-inode.h" |
26 | 26 | ||
27 | /* | ||
28 | * ordered_data_close is set by truncate when a file that used | ||
29 | * to have good data has been truncated to zero. When it is set | ||
30 | * the btrfs file release call will add this inode to the | ||
31 | * ordered operations list so that we make sure to flush out any | ||
32 | * new data the application may have written before commit. | ||
33 | */ | ||
34 | #define BTRFS_INODE_ORDERED_DATA_CLOSE 0 | ||
35 | #define BTRFS_INODE_ORPHAN_META_RESERVED 1 | ||
36 | #define BTRFS_INODE_DUMMY 2 | ||
37 | #define BTRFS_INODE_IN_DEFRAG 3 | ||
38 | #define BTRFS_INODE_DELALLOC_META_RESERVED 4 | ||
39 | #define BTRFS_INODE_HAS_ORPHAN_ITEM 5 | ||
40 | |||
27 | /* in memory btrfs inode */ | 41 | /* in memory btrfs inode */ |
28 | struct btrfs_inode { | 42 | struct btrfs_inode { |
29 | /* which subvolume this inode belongs to */ | 43 | /* which subvolume this inode belongs to */ |
@@ -57,9 +71,6 @@ struct btrfs_inode { | |||
57 | /* used to order data wrt metadata */ | 71 | /* used to order data wrt metadata */ |
58 | struct btrfs_ordered_inode_tree ordered_tree; | 72 | struct btrfs_ordered_inode_tree ordered_tree; |
59 | 73 | ||
60 | /* for keeping track of orphaned inodes */ | ||
61 | struct list_head i_orphan; | ||
62 | |||
63 | /* list of all the delalloc inodes in the FS. There are times we need | 74 | /* list of all the delalloc inodes in the FS. There are times we need |
64 | * to write all the delalloc pages to disk, and this list is used | 75 | * to write all the delalloc pages to disk, and this list is used |
65 | * to walk them all. | 76 | * to walk them all. |
@@ -78,14 +89,13 @@ struct btrfs_inode { | |||
78 | /* the space_info for where this inode's data allocations are done */ | 89 | /* the space_info for where this inode's data allocations are done */ |
79 | struct btrfs_space_info *space_info; | 90 | struct btrfs_space_info *space_info; |
80 | 91 | ||
92 | unsigned long runtime_flags; | ||
93 | |||
81 | /* full 64 bit generation number, struct vfs_inode doesn't have a big | 94 | /* full 64 bit generation number, struct vfs_inode doesn't have a big |
82 | * enough field for this. | 95 | * enough field for this. |
83 | */ | 96 | */ |
84 | u64 generation; | 97 | u64 generation; |
85 | 98 | ||
86 | /* sequence number for NFS changes */ | ||
87 | u64 sequence; | ||
88 | |||
89 | /* | 99 | /* |
90 | * transid of the trans_handle that last modified this inode | 100 | * transid of the trans_handle that last modified this inode |
91 | */ | 101 | */ |
@@ -145,22 +155,9 @@ struct btrfs_inode { | |||
145 | unsigned reserved_extents; | 155 | unsigned reserved_extents; |
146 | 156 | ||
147 | /* | 157 | /* |
148 | * ordered_data_close is set by truncate when a file that used | ||
149 | * to have good data has been truncated to zero. When it is set | ||
150 | * the btrfs file release call will add this inode to the | ||
151 | * ordered operations list so that we make sure to flush out any | ||
152 | * new data the application may have written before commit. | ||
153 | */ | ||
154 | unsigned ordered_data_close:1; | ||
155 | unsigned orphan_meta_reserved:1; | ||
156 | unsigned dummy_inode:1; | ||
157 | unsigned in_defrag:1; | ||
158 | unsigned delalloc_meta_reserved:1; | ||
159 | |||
160 | /* | ||
161 | * always compress this one file | 158 | * always compress this one file |
162 | */ | 159 | */ |
163 | unsigned force_compress:4; | 160 | unsigned force_compress; |
164 | 161 | ||
165 | struct btrfs_delayed_node *delayed_node; | 162 | struct btrfs_delayed_node *delayed_node; |
166 | 163 | ||
@@ -202,4 +199,17 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_root *root, | |||
202 | return false; | 199 | return false; |
203 | } | 200 | } |
204 | 201 | ||
202 | static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) | ||
203 | { | ||
204 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
205 | int ret = 0; | ||
206 | |||
207 | mutex_lock(&root->log_mutex); | ||
208 | if (BTRFS_I(inode)->logged_trans == generation && | ||
209 | BTRFS_I(inode)->last_sub_trans <= root->last_log_commit) | ||
210 | ret = 1; | ||
211 | mutex_unlock(&root->log_mutex); | ||
212 | return ret; | ||
213 | } | ||
214 | |||
205 | #endif | 215 | #endif |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index c053e90f2006..9cebb1fd6a3c 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
@@ -103,8 +103,6 @@ | |||
103 | #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 | 103 | #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 |
104 | #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, | 104 | #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, |
105 | * excluding " [...]" */ | 105 | * excluding " [...]" */ |
106 | #define BTRFSIC_BLOCK_SIZE PAGE_SIZE | ||
107 | |||
108 | #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) | 106 | #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) |
109 | 107 | ||
110 | /* | 108 | /* |
@@ -210,8 +208,9 @@ struct btrfsic_block_data_ctx { | |||
210 | u64 dev_bytenr; /* physical bytenr on device */ | 208 | u64 dev_bytenr; /* physical bytenr on device */ |
211 | u32 len; | 209 | u32 len; |
212 | struct btrfsic_dev_state *dev; | 210 | struct btrfsic_dev_state *dev; |
213 | char *data; | 211 | char **datav; |
214 | struct buffer_head *bh; /* do not use if set to NULL */ | 212 | struct page **pagev; |
213 | void *mem_to_free; | ||
215 | }; | 214 | }; |
216 | 215 | ||
217 | /* This structure is used to implement recursion without occupying | 216 | /* This structure is used to implement recursion without occupying |
@@ -243,6 +242,8 @@ struct btrfsic_state { | |||
243 | struct btrfs_root *root; | 242 | struct btrfs_root *root; |
244 | u64 max_superblock_generation; | 243 | u64 max_superblock_generation; |
245 | struct btrfsic_block *latest_superblock; | 244 | struct btrfsic_block *latest_superblock; |
245 | u32 metablock_size; | ||
246 | u32 datablock_size; | ||
246 | }; | 247 | }; |
247 | 248 | ||
248 | static void btrfsic_block_init(struct btrfsic_block *b); | 249 | static void btrfsic_block_init(struct btrfsic_block *b); |
@@ -290,8 +291,10 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
290 | static int btrfsic_process_metablock(struct btrfsic_state *state, | 291 | static int btrfsic_process_metablock(struct btrfsic_state *state, |
291 | struct btrfsic_block *block, | 292 | struct btrfsic_block *block, |
292 | struct btrfsic_block_data_ctx *block_ctx, | 293 | struct btrfsic_block_data_ctx *block_ctx, |
293 | struct btrfs_header *hdr, | ||
294 | int limit_nesting, int force_iodone_flag); | 294 | int limit_nesting, int force_iodone_flag); |
295 | static void btrfsic_read_from_block_data( | ||
296 | struct btrfsic_block_data_ctx *block_ctx, | ||
297 | void *dst, u32 offset, size_t len); | ||
295 | static int btrfsic_create_link_to_next_block( | 298 | static int btrfsic_create_link_to_next_block( |
296 | struct btrfsic_state *state, | 299 | struct btrfsic_state *state, |
297 | struct btrfsic_block *block, | 300 | struct btrfsic_block *block, |
@@ -318,12 +321,13 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); | |||
318 | static int btrfsic_read_block(struct btrfsic_state *state, | 321 | static int btrfsic_read_block(struct btrfsic_state *state, |
319 | struct btrfsic_block_data_ctx *block_ctx); | 322 | struct btrfsic_block_data_ctx *block_ctx); |
320 | static void btrfsic_dump_database(struct btrfsic_state *state); | 323 | static void btrfsic_dump_database(struct btrfsic_state *state); |
324 | static void btrfsic_complete_bio_end_io(struct bio *bio, int err); | ||
321 | static int btrfsic_test_for_metadata(struct btrfsic_state *state, | 325 | static int btrfsic_test_for_metadata(struct btrfsic_state *state, |
322 | const u8 *data, unsigned int size); | 326 | char **datav, unsigned int num_pages); |
323 | static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | 327 | static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, |
324 | u64 dev_bytenr, u8 *mapped_data, | 328 | u64 dev_bytenr, char **mapped_datav, |
325 | unsigned int len, struct bio *bio, | 329 | unsigned int num_pages, |
326 | int *bio_is_patched, | 330 | struct bio *bio, int *bio_is_patched, |
327 | struct buffer_head *bh, | 331 | struct buffer_head *bh, |
328 | int submit_bio_bh_rw); | 332 | int submit_bio_bh_rw); |
329 | static int btrfsic_process_written_superblock( | 333 | static int btrfsic_process_written_superblock( |
@@ -375,7 +379,7 @@ static struct btrfsic_dev_state *btrfsic_dev_state_lookup( | |||
375 | static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, | 379 | static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, |
376 | u64 bytenr, | 380 | u64 bytenr, |
377 | struct btrfsic_dev_state *dev_state, | 381 | struct btrfsic_dev_state *dev_state, |
378 | u64 dev_bytenr, char *data); | 382 | u64 dev_bytenr); |
379 | 383 | ||
380 | static struct mutex btrfsic_mutex; | 384 | static struct mutex btrfsic_mutex; |
381 | static int btrfsic_is_initialized; | 385 | static int btrfsic_is_initialized; |
@@ -651,7 +655,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
651 | int pass; | 655 | int pass; |
652 | 656 | ||
653 | BUG_ON(NULL == state); | 657 | BUG_ON(NULL == state); |
654 | selected_super = kmalloc(sizeof(*selected_super), GFP_NOFS); | 658 | selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS); |
655 | if (NULL == selected_super) { | 659 | if (NULL == selected_super) { |
656 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); | 660 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); |
657 | return -1; | 661 | return -1; |
@@ -718,7 +722,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
718 | 722 | ||
719 | num_copies = | 723 | num_copies = |
720 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 724 | btrfs_num_copies(&state->root->fs_info->mapping_tree, |
721 | next_bytenr, PAGE_SIZE); | 725 | next_bytenr, state->metablock_size); |
722 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 726 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
723 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 727 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
724 | (unsigned long long)next_bytenr, num_copies); | 728 | (unsigned long long)next_bytenr, num_copies); |
@@ -727,9 +731,9 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
727 | struct btrfsic_block *next_block; | 731 | struct btrfsic_block *next_block; |
728 | struct btrfsic_block_data_ctx tmp_next_block_ctx; | 732 | struct btrfsic_block_data_ctx tmp_next_block_ctx; |
729 | struct btrfsic_block_link *l; | 733 | struct btrfsic_block_link *l; |
730 | struct btrfs_header *hdr; | ||
731 | 734 | ||
732 | ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE, | 735 | ret = btrfsic_map_block(state, next_bytenr, |
736 | state->metablock_size, | ||
733 | &tmp_next_block_ctx, | 737 | &tmp_next_block_ctx, |
734 | mirror_num); | 738 | mirror_num); |
735 | if (ret) { | 739 | if (ret) { |
@@ -758,7 +762,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
758 | BUG_ON(NULL == l); | 762 | BUG_ON(NULL == l); |
759 | 763 | ||
760 | ret = btrfsic_read_block(state, &tmp_next_block_ctx); | 764 | ret = btrfsic_read_block(state, &tmp_next_block_ctx); |
761 | if (ret < (int)BTRFSIC_BLOCK_SIZE) { | 765 | if (ret < (int)PAGE_CACHE_SIZE) { |
762 | printk(KERN_INFO | 766 | printk(KERN_INFO |
763 | "btrfsic: read @logical %llu failed!\n", | 767 | "btrfsic: read @logical %llu failed!\n", |
764 | (unsigned long long) | 768 | (unsigned long long) |
@@ -768,11 +772,9 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
768 | return -1; | 772 | return -1; |
769 | } | 773 | } |
770 | 774 | ||
771 | hdr = (struct btrfs_header *)tmp_next_block_ctx.data; | ||
772 | ret = btrfsic_process_metablock(state, | 775 | ret = btrfsic_process_metablock(state, |
773 | next_block, | 776 | next_block, |
774 | &tmp_next_block_ctx, | 777 | &tmp_next_block_ctx, |
775 | hdr, | ||
776 | BTRFS_MAX_LEVEL + 3, 1); | 778 | BTRFS_MAX_LEVEL + 3, 1); |
777 | btrfsic_release_block_ctx(&tmp_next_block_ctx); | 779 | btrfsic_release_block_ctx(&tmp_next_block_ctx); |
778 | } | 780 | } |
@@ -799,7 +801,10 @@ static int btrfsic_process_superblock_dev_mirror( | |||
799 | 801 | ||
800 | /* super block bytenr is always the unmapped device bytenr */ | 802 | /* super block bytenr is always the unmapped device bytenr */ |
801 | dev_bytenr = btrfs_sb_offset(superblock_mirror_num); | 803 | dev_bytenr = btrfs_sb_offset(superblock_mirror_num); |
802 | bh = __bread(superblock_bdev, dev_bytenr / 4096, 4096); | 804 | if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) |
805 | return -1; | ||
806 | bh = __bread(superblock_bdev, dev_bytenr / 4096, | ||
807 | BTRFS_SUPER_INFO_SIZE); | ||
803 | if (NULL == bh) | 808 | if (NULL == bh) |
804 | return -1; | 809 | return -1; |
805 | super_tmp = (struct btrfs_super_block *) | 810 | super_tmp = (struct btrfs_super_block *) |
@@ -808,7 +813,10 @@ static int btrfsic_process_superblock_dev_mirror( | |||
808 | if (btrfs_super_bytenr(super_tmp) != dev_bytenr || | 813 | if (btrfs_super_bytenr(super_tmp) != dev_bytenr || |
809 | strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, | 814 | strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, |
810 | sizeof(super_tmp->magic)) || | 815 | sizeof(super_tmp->magic)) || |
811 | memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE)) { | 816 | memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || |
817 | btrfs_super_nodesize(super_tmp) != state->metablock_size || | ||
818 | btrfs_super_leafsize(super_tmp) != state->metablock_size || | ||
819 | btrfs_super_sectorsize(super_tmp) != state->datablock_size) { | ||
812 | brelse(bh); | 820 | brelse(bh); |
813 | return 0; | 821 | return 0; |
814 | } | 822 | } |
@@ -893,7 +901,7 @@ static int btrfsic_process_superblock_dev_mirror( | |||
893 | 901 | ||
894 | num_copies = | 902 | num_copies = |
895 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 903 | btrfs_num_copies(&state->root->fs_info->mapping_tree, |
896 | next_bytenr, PAGE_SIZE); | 904 | next_bytenr, state->metablock_size); |
897 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 905 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
898 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 906 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
899 | (unsigned long long)next_bytenr, num_copies); | 907 | (unsigned long long)next_bytenr, num_copies); |
@@ -902,7 +910,8 @@ static int btrfsic_process_superblock_dev_mirror( | |||
902 | struct btrfsic_block_data_ctx tmp_next_block_ctx; | 910 | struct btrfsic_block_data_ctx tmp_next_block_ctx; |
903 | struct btrfsic_block_link *l; | 911 | struct btrfsic_block_link *l; |
904 | 912 | ||
905 | if (btrfsic_map_block(state, next_bytenr, PAGE_SIZE, | 913 | if (btrfsic_map_block(state, next_bytenr, |
914 | state->metablock_size, | ||
906 | &tmp_next_block_ctx, | 915 | &tmp_next_block_ctx, |
907 | mirror_num)) { | 916 | mirror_num)) { |
908 | printk(KERN_INFO "btrfsic: btrfsic_map_block(" | 917 | printk(KERN_INFO "btrfsic: btrfsic_map_block(" |
@@ -966,13 +975,15 @@ static int btrfsic_process_metablock( | |||
966 | struct btrfsic_state *state, | 975 | struct btrfsic_state *state, |
967 | struct btrfsic_block *const first_block, | 976 | struct btrfsic_block *const first_block, |
968 | struct btrfsic_block_data_ctx *const first_block_ctx, | 977 | struct btrfsic_block_data_ctx *const first_block_ctx, |
969 | struct btrfs_header *const first_hdr, | ||
970 | int first_limit_nesting, int force_iodone_flag) | 978 | int first_limit_nesting, int force_iodone_flag) |
971 | { | 979 | { |
972 | struct btrfsic_stack_frame initial_stack_frame = { 0 }; | 980 | struct btrfsic_stack_frame initial_stack_frame = { 0 }; |
973 | struct btrfsic_stack_frame *sf; | 981 | struct btrfsic_stack_frame *sf; |
974 | struct btrfsic_stack_frame *next_stack; | 982 | struct btrfsic_stack_frame *next_stack; |
983 | struct btrfs_header *const first_hdr = | ||
984 | (struct btrfs_header *)first_block_ctx->datav[0]; | ||
975 | 985 | ||
986 | BUG_ON(!first_hdr); | ||
976 | sf = &initial_stack_frame; | 987 | sf = &initial_stack_frame; |
977 | sf->error = 0; | 988 | sf->error = 0; |
978 | sf->i = -1; | 989 | sf->i = -1; |
@@ -1012,21 +1023,47 @@ continue_with_current_leaf_stack_frame: | |||
1012 | } | 1023 | } |
1013 | 1024 | ||
1014 | if (sf->i < sf->nr) { | 1025 | if (sf->i < sf->nr) { |
1015 | struct btrfs_item *disk_item = leafhdr->items + sf->i; | 1026 | struct btrfs_item disk_item; |
1016 | struct btrfs_disk_key *disk_key = &disk_item->key; | 1027 | u32 disk_item_offset = |
1028 | (uintptr_t)(leafhdr->items + sf->i) - | ||
1029 | (uintptr_t)leafhdr; | ||
1030 | struct btrfs_disk_key *disk_key; | ||
1017 | u8 type; | 1031 | u8 type; |
1018 | const u32 item_offset = le32_to_cpu(disk_item->offset); | 1032 | u32 item_offset; |
1019 | 1033 | ||
1034 | if (disk_item_offset + sizeof(struct btrfs_item) > | ||
1035 | sf->block_ctx->len) { | ||
1036 | leaf_item_out_of_bounce_error: | ||
1037 | printk(KERN_INFO | ||
1038 | "btrfsic: leaf item out of bounce at logical %llu, dev %s\n", | ||
1039 | sf->block_ctx->start, | ||
1040 | sf->block_ctx->dev->name); | ||
1041 | goto one_stack_frame_backwards; | ||
1042 | } | ||
1043 | btrfsic_read_from_block_data(sf->block_ctx, | ||
1044 | &disk_item, | ||
1045 | disk_item_offset, | ||
1046 | sizeof(struct btrfs_item)); | ||
1047 | item_offset = le32_to_cpu(disk_item.offset); | ||
1048 | disk_key = &disk_item.key; | ||
1020 | type = disk_key->type; | 1049 | type = disk_key->type; |
1021 | 1050 | ||
1022 | if (BTRFS_ROOT_ITEM_KEY == type) { | 1051 | if (BTRFS_ROOT_ITEM_KEY == type) { |
1023 | const struct btrfs_root_item *const root_item = | 1052 | struct btrfs_root_item root_item; |
1024 | (struct btrfs_root_item *) | 1053 | u32 root_item_offset; |
1025 | (sf->block_ctx->data + | 1054 | u64 next_bytenr; |
1026 | offsetof(struct btrfs_leaf, items) + | 1055 | |
1027 | item_offset); | 1056 | root_item_offset = item_offset + |
1028 | const u64 next_bytenr = | 1057 | offsetof(struct btrfs_leaf, items); |
1029 | le64_to_cpu(root_item->bytenr); | 1058 | if (root_item_offset + |
1059 | sizeof(struct btrfs_root_item) > | ||
1060 | sf->block_ctx->len) | ||
1061 | goto leaf_item_out_of_bounce_error; | ||
1062 | btrfsic_read_from_block_data( | ||
1063 | sf->block_ctx, &root_item, | ||
1064 | root_item_offset, | ||
1065 | sizeof(struct btrfs_root_item)); | ||
1066 | next_bytenr = le64_to_cpu(root_item.bytenr); | ||
1030 | 1067 | ||
1031 | sf->error = | 1068 | sf->error = |
1032 | btrfsic_create_link_to_next_block( | 1069 | btrfsic_create_link_to_next_block( |
@@ -1041,7 +1078,7 @@ continue_with_current_leaf_stack_frame: | |||
1041 | &sf->num_copies, | 1078 | &sf->num_copies, |
1042 | &sf->mirror_num, | 1079 | &sf->mirror_num, |
1043 | disk_key, | 1080 | disk_key, |
1044 | le64_to_cpu(root_item-> | 1081 | le64_to_cpu(root_item. |
1045 | generation)); | 1082 | generation)); |
1046 | if (sf->error) | 1083 | if (sf->error) |
1047 | goto one_stack_frame_backwards; | 1084 | goto one_stack_frame_backwards; |
@@ -1049,7 +1086,7 @@ continue_with_current_leaf_stack_frame: | |||
1049 | if (NULL != sf->next_block) { | 1086 | if (NULL != sf->next_block) { |
1050 | struct btrfs_header *const next_hdr = | 1087 | struct btrfs_header *const next_hdr = |
1051 | (struct btrfs_header *) | 1088 | (struct btrfs_header *) |
1052 | sf->next_block_ctx.data; | 1089 | sf->next_block_ctx.datav[0]; |
1053 | 1090 | ||
1054 | next_stack = | 1091 | next_stack = |
1055 | btrfsic_stack_frame_alloc(); | 1092 | btrfsic_stack_frame_alloc(); |
@@ -1111,10 +1148,24 @@ continue_with_current_node_stack_frame: | |||
1111 | } | 1148 | } |
1112 | 1149 | ||
1113 | if (sf->i < sf->nr) { | 1150 | if (sf->i < sf->nr) { |
1114 | struct btrfs_key_ptr *disk_key_ptr = | 1151 | struct btrfs_key_ptr key_ptr; |
1115 | nodehdr->ptrs + sf->i; | 1152 | u32 key_ptr_offset; |
1116 | const u64 next_bytenr = | 1153 | u64 next_bytenr; |
1117 | le64_to_cpu(disk_key_ptr->blockptr); | 1154 | |
1155 | key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) - | ||
1156 | (uintptr_t)nodehdr; | ||
1157 | if (key_ptr_offset + sizeof(struct btrfs_key_ptr) > | ||
1158 | sf->block_ctx->len) { | ||
1159 | printk(KERN_INFO | ||
1160 | "btrfsic: node item out of bounce at logical %llu, dev %s\n", | ||
1161 | sf->block_ctx->start, | ||
1162 | sf->block_ctx->dev->name); | ||
1163 | goto one_stack_frame_backwards; | ||
1164 | } | ||
1165 | btrfsic_read_from_block_data( | ||
1166 | sf->block_ctx, &key_ptr, key_ptr_offset, | ||
1167 | sizeof(struct btrfs_key_ptr)); | ||
1168 | next_bytenr = le64_to_cpu(key_ptr.blockptr); | ||
1118 | 1169 | ||
1119 | sf->error = btrfsic_create_link_to_next_block( | 1170 | sf->error = btrfsic_create_link_to_next_block( |
1120 | state, | 1171 | state, |
@@ -1127,15 +1178,15 @@ continue_with_current_node_stack_frame: | |||
1127 | force_iodone_flag, | 1178 | force_iodone_flag, |
1128 | &sf->num_copies, | 1179 | &sf->num_copies, |
1129 | &sf->mirror_num, | 1180 | &sf->mirror_num, |
1130 | &disk_key_ptr->key, | 1181 | &key_ptr.key, |
1131 | le64_to_cpu(disk_key_ptr->generation)); | 1182 | le64_to_cpu(key_ptr.generation)); |
1132 | if (sf->error) | 1183 | if (sf->error) |
1133 | goto one_stack_frame_backwards; | 1184 | goto one_stack_frame_backwards; |
1134 | 1185 | ||
1135 | if (NULL != sf->next_block) { | 1186 | if (NULL != sf->next_block) { |
1136 | struct btrfs_header *const next_hdr = | 1187 | struct btrfs_header *const next_hdr = |
1137 | (struct btrfs_header *) | 1188 | (struct btrfs_header *) |
1138 | sf->next_block_ctx.data; | 1189 | sf->next_block_ctx.datav[0]; |
1139 | 1190 | ||
1140 | next_stack = btrfsic_stack_frame_alloc(); | 1191 | next_stack = btrfsic_stack_frame_alloc(); |
1141 | if (NULL == next_stack) | 1192 | if (NULL == next_stack) |
@@ -1181,6 +1232,35 @@ one_stack_frame_backwards: | |||
1181 | return sf->error; | 1232 | return sf->error; |
1182 | } | 1233 | } |
1183 | 1234 | ||
1235 | static void btrfsic_read_from_block_data( | ||
1236 | struct btrfsic_block_data_ctx *block_ctx, | ||
1237 | void *dstv, u32 offset, size_t len) | ||
1238 | { | ||
1239 | size_t cur; | ||
1240 | size_t offset_in_page; | ||
1241 | char *kaddr; | ||
1242 | char *dst = (char *)dstv; | ||
1243 | size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1); | ||
1244 | unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT; | ||
1245 | |||
1246 | WARN_ON(offset + len > block_ctx->len); | ||
1247 | offset_in_page = (start_offset + offset) & | ||
1248 | ((unsigned long)PAGE_CACHE_SIZE - 1); | ||
1249 | |||
1250 | while (len > 0) { | ||
1251 | cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); | ||
1252 | BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >> | ||
1253 | PAGE_CACHE_SHIFT); | ||
1254 | kaddr = block_ctx->datav[i]; | ||
1255 | memcpy(dst, kaddr + offset_in_page, cur); | ||
1256 | |||
1257 | dst += cur; | ||
1258 | len -= cur; | ||
1259 | offset_in_page = 0; | ||
1260 | i++; | ||
1261 | } | ||
1262 | } | ||
1263 | |||
1184 | static int btrfsic_create_link_to_next_block( | 1264 | static int btrfsic_create_link_to_next_block( |
1185 | struct btrfsic_state *state, | 1265 | struct btrfsic_state *state, |
1186 | struct btrfsic_block *block, | 1266 | struct btrfsic_block *block, |
@@ -1204,7 +1284,7 @@ static int btrfsic_create_link_to_next_block( | |||
1204 | if (0 == *num_copiesp) { | 1284 | if (0 == *num_copiesp) { |
1205 | *num_copiesp = | 1285 | *num_copiesp = |
1206 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 1286 | btrfs_num_copies(&state->root->fs_info->mapping_tree, |
1207 | next_bytenr, PAGE_SIZE); | 1287 | next_bytenr, state->metablock_size); |
1208 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 1288 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
1209 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 1289 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
1210 | (unsigned long long)next_bytenr, *num_copiesp); | 1290 | (unsigned long long)next_bytenr, *num_copiesp); |
@@ -1219,7 +1299,7 @@ static int btrfsic_create_link_to_next_block( | |||
1219 | "btrfsic_create_link_to_next_block(mirror_num=%d)\n", | 1299 | "btrfsic_create_link_to_next_block(mirror_num=%d)\n", |
1220 | *mirror_nump); | 1300 | *mirror_nump); |
1221 | ret = btrfsic_map_block(state, next_bytenr, | 1301 | ret = btrfsic_map_block(state, next_bytenr, |
1222 | BTRFSIC_BLOCK_SIZE, | 1302 | state->metablock_size, |
1223 | next_block_ctx, *mirror_nump); | 1303 | next_block_ctx, *mirror_nump); |
1224 | if (ret) { | 1304 | if (ret) { |
1225 | printk(KERN_INFO | 1305 | printk(KERN_INFO |
@@ -1314,7 +1394,7 @@ static int btrfsic_create_link_to_next_block( | |||
1314 | 1394 | ||
1315 | if (limit_nesting > 0 && did_alloc_block_link) { | 1395 | if (limit_nesting > 0 && did_alloc_block_link) { |
1316 | ret = btrfsic_read_block(state, next_block_ctx); | 1396 | ret = btrfsic_read_block(state, next_block_ctx); |
1317 | if (ret < (int)BTRFSIC_BLOCK_SIZE) { | 1397 | if (ret < (int)next_block_ctx->len) { |
1318 | printk(KERN_INFO | 1398 | printk(KERN_INFO |
1319 | "btrfsic: read block @logical %llu failed!\n", | 1399 | "btrfsic: read block @logical %llu failed!\n", |
1320 | (unsigned long long)next_bytenr); | 1400 | (unsigned long long)next_bytenr); |
@@ -1339,43 +1419,74 @@ static int btrfsic_handle_extent_data( | |||
1339 | u32 item_offset, int force_iodone_flag) | 1419 | u32 item_offset, int force_iodone_flag) |
1340 | { | 1420 | { |
1341 | int ret; | 1421 | int ret; |
1342 | struct btrfs_file_extent_item *file_extent_item = | 1422 | struct btrfs_file_extent_item file_extent_item; |
1343 | (struct btrfs_file_extent_item *)(block_ctx->data + | 1423 | u64 file_extent_item_offset; |
1344 | offsetof(struct btrfs_leaf, | 1424 | u64 next_bytenr; |
1345 | items) + item_offset); | 1425 | u64 num_bytes; |
1346 | u64 next_bytenr = | 1426 | u64 generation; |
1347 | le64_to_cpu(file_extent_item->disk_bytenr) + | ||
1348 | le64_to_cpu(file_extent_item->offset); | ||
1349 | u64 num_bytes = le64_to_cpu(file_extent_item->num_bytes); | ||
1350 | u64 generation = le64_to_cpu(file_extent_item->generation); | ||
1351 | struct btrfsic_block_link *l; | 1427 | struct btrfsic_block_link *l; |
1352 | 1428 | ||
1429 | file_extent_item_offset = offsetof(struct btrfs_leaf, items) + | ||
1430 | item_offset; | ||
1431 | if (file_extent_item_offset + | ||
1432 | offsetof(struct btrfs_file_extent_item, disk_num_bytes) > | ||
1433 | block_ctx->len) { | ||
1434 | printk(KERN_INFO | ||
1435 | "btrfsic: file item out of bounce at logical %llu, dev %s\n", | ||
1436 | block_ctx->start, block_ctx->dev->name); | ||
1437 | return -1; | ||
1438 | } | ||
1439 | |||
1440 | btrfsic_read_from_block_data(block_ctx, &file_extent_item, | ||
1441 | file_extent_item_offset, | ||
1442 | offsetof(struct btrfs_file_extent_item, disk_num_bytes)); | ||
1443 | if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || | ||
1444 | ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) { | ||
1445 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) | ||
1446 | printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n", | ||
1447 | file_extent_item.type, | ||
1448 | (unsigned long long) | ||
1449 | le64_to_cpu(file_extent_item.disk_bytenr)); | ||
1450 | return 0; | ||
1451 | } | ||
1452 | |||
1453 | if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) > | ||
1454 | block_ctx->len) { | ||
1455 | printk(KERN_INFO | ||
1456 | "btrfsic: file item out of bounce at logical %llu, dev %s\n", | ||
1457 | block_ctx->start, block_ctx->dev->name); | ||
1458 | return -1; | ||
1459 | } | ||
1460 | btrfsic_read_from_block_data(block_ctx, &file_extent_item, | ||
1461 | file_extent_item_offset, | ||
1462 | sizeof(struct btrfs_file_extent_item)); | ||
1463 | next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) + | ||
1464 | le64_to_cpu(file_extent_item.offset); | ||
1465 | generation = le64_to_cpu(file_extent_item.generation); | ||
1466 | num_bytes = le64_to_cpu(file_extent_item.num_bytes); | ||
1467 | generation = le64_to_cpu(file_extent_item.generation); | ||
1468 | |||
1353 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) | 1469 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) |
1354 | printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," | 1470 | printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," |
1355 | " offset = %llu, num_bytes = %llu\n", | 1471 | " offset = %llu, num_bytes = %llu\n", |
1356 | file_extent_item->type, | 1472 | file_extent_item.type, |
1357 | (unsigned long long) | 1473 | (unsigned long long) |
1358 | le64_to_cpu(file_extent_item->disk_bytenr), | 1474 | le64_to_cpu(file_extent_item.disk_bytenr), |
1359 | (unsigned long long) | 1475 | (unsigned long long)le64_to_cpu(file_extent_item.offset), |
1360 | le64_to_cpu(file_extent_item->offset), | 1476 | (unsigned long long)num_bytes); |
1361 | (unsigned long long) | ||
1362 | le64_to_cpu(file_extent_item->num_bytes)); | ||
1363 | if (BTRFS_FILE_EXTENT_REG != file_extent_item->type || | ||
1364 | ((u64)0) == le64_to_cpu(file_extent_item->disk_bytenr)) | ||
1365 | return 0; | ||
1366 | while (num_bytes > 0) { | 1477 | while (num_bytes > 0) { |
1367 | u32 chunk_len; | 1478 | u32 chunk_len; |
1368 | int num_copies; | 1479 | int num_copies; |
1369 | int mirror_num; | 1480 | int mirror_num; |
1370 | 1481 | ||
1371 | if (num_bytes > BTRFSIC_BLOCK_SIZE) | 1482 | if (num_bytes > state->datablock_size) |
1372 | chunk_len = BTRFSIC_BLOCK_SIZE; | 1483 | chunk_len = state->datablock_size; |
1373 | else | 1484 | else |
1374 | chunk_len = num_bytes; | 1485 | chunk_len = num_bytes; |
1375 | 1486 | ||
1376 | num_copies = | 1487 | num_copies = |
1377 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 1488 | btrfs_num_copies(&state->root->fs_info->mapping_tree, |
1378 | next_bytenr, PAGE_SIZE); | 1489 | next_bytenr, state->datablock_size); |
1379 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 1490 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
1380 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 1491 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
1381 | (unsigned long long)next_bytenr, num_copies); | 1492 | (unsigned long long)next_bytenr, num_copies); |
@@ -1475,8 +1586,9 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, | |||
1475 | block_ctx_out->dev_bytenr = multi->stripes[0].physical; | 1586 | block_ctx_out->dev_bytenr = multi->stripes[0].physical; |
1476 | block_ctx_out->start = bytenr; | 1587 | block_ctx_out->start = bytenr; |
1477 | block_ctx_out->len = len; | 1588 | block_ctx_out->len = len; |
1478 | block_ctx_out->data = NULL; | 1589 | block_ctx_out->datav = NULL; |
1479 | block_ctx_out->bh = NULL; | 1590 | block_ctx_out->pagev = NULL; |
1591 | block_ctx_out->mem_to_free = NULL; | ||
1480 | 1592 | ||
1481 | if (0 == ret) | 1593 | if (0 == ret) |
1482 | kfree(multi); | 1594 | kfree(multi); |
@@ -1496,8 +1608,9 @@ static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, | |||
1496 | block_ctx_out->dev_bytenr = bytenr; | 1608 | block_ctx_out->dev_bytenr = bytenr; |
1497 | block_ctx_out->start = bytenr; | 1609 | block_ctx_out->start = bytenr; |
1498 | block_ctx_out->len = len; | 1610 | block_ctx_out->len = len; |
1499 | block_ctx_out->data = NULL; | 1611 | block_ctx_out->datav = NULL; |
1500 | block_ctx_out->bh = NULL; | 1612 | block_ctx_out->pagev = NULL; |
1613 | block_ctx_out->mem_to_free = NULL; | ||
1501 | if (NULL != block_ctx_out->dev) { | 1614 | if (NULL != block_ctx_out->dev) { |
1502 | return 0; | 1615 | return 0; |
1503 | } else { | 1616 | } else { |
@@ -1508,38 +1621,127 @@ static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, | |||
1508 | 1621 | ||
1509 | static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) | 1622 | static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) |
1510 | { | 1623 | { |
1511 | if (NULL != block_ctx->bh) { | 1624 | if (block_ctx->mem_to_free) { |
1512 | brelse(block_ctx->bh); | 1625 | unsigned int num_pages; |
1513 | block_ctx->bh = NULL; | 1626 | |
1627 | BUG_ON(!block_ctx->datav); | ||
1628 | BUG_ON(!block_ctx->pagev); | ||
1629 | num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> | ||
1630 | PAGE_CACHE_SHIFT; | ||
1631 | while (num_pages > 0) { | ||
1632 | num_pages--; | ||
1633 | if (block_ctx->datav[num_pages]) { | ||
1634 | kunmap(block_ctx->pagev[num_pages]); | ||
1635 | block_ctx->datav[num_pages] = NULL; | ||
1636 | } | ||
1637 | if (block_ctx->pagev[num_pages]) { | ||
1638 | __free_page(block_ctx->pagev[num_pages]); | ||
1639 | block_ctx->pagev[num_pages] = NULL; | ||
1640 | } | ||
1641 | } | ||
1642 | |||
1643 | kfree(block_ctx->mem_to_free); | ||
1644 | block_ctx->mem_to_free = NULL; | ||
1645 | block_ctx->pagev = NULL; | ||
1646 | block_ctx->datav = NULL; | ||
1514 | } | 1647 | } |
1515 | } | 1648 | } |
1516 | 1649 | ||
1517 | static int btrfsic_read_block(struct btrfsic_state *state, | 1650 | static int btrfsic_read_block(struct btrfsic_state *state, |
1518 | struct btrfsic_block_data_ctx *block_ctx) | 1651 | struct btrfsic_block_data_ctx *block_ctx) |
1519 | { | 1652 | { |
1520 | block_ctx->bh = NULL; | 1653 | unsigned int num_pages; |
1521 | if (block_ctx->dev_bytenr & 4095) { | 1654 | unsigned int i; |
1655 | u64 dev_bytenr; | ||
1656 | int ret; | ||
1657 | |||
1658 | BUG_ON(block_ctx->datav); | ||
1659 | BUG_ON(block_ctx->pagev); | ||
1660 | BUG_ON(block_ctx->mem_to_free); | ||
1661 | if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) { | ||
1522 | printk(KERN_INFO | 1662 | printk(KERN_INFO |
1523 | "btrfsic: read_block() with unaligned bytenr %llu\n", | 1663 | "btrfsic: read_block() with unaligned bytenr %llu\n", |
1524 | (unsigned long long)block_ctx->dev_bytenr); | 1664 | (unsigned long long)block_ctx->dev_bytenr); |
1525 | return -1; | 1665 | return -1; |
1526 | } | 1666 | } |
1527 | if (block_ctx->len > 4096) { | 1667 | |
1528 | printk(KERN_INFO | 1668 | num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> |
1529 | "btrfsic: read_block() with too huge size %d\n", | 1669 | PAGE_CACHE_SHIFT; |
1530 | block_ctx->len); | 1670 | block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) + |
1671 | sizeof(*block_ctx->pagev)) * | ||
1672 | num_pages, GFP_NOFS); | ||
1673 | if (!block_ctx->mem_to_free) | ||
1531 | return -1; | 1674 | return -1; |
1675 | block_ctx->datav = block_ctx->mem_to_free; | ||
1676 | block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages); | ||
1677 | for (i = 0; i < num_pages; i++) { | ||
1678 | block_ctx->pagev[i] = alloc_page(GFP_NOFS); | ||
1679 | if (!block_ctx->pagev[i]) | ||
1680 | return -1; | ||
1532 | } | 1681 | } |
1533 | 1682 | ||
1534 | block_ctx->bh = __bread(block_ctx->dev->bdev, | 1683 | dev_bytenr = block_ctx->dev_bytenr; |
1535 | block_ctx->dev_bytenr >> 12, 4096); | 1684 | for (i = 0; i < num_pages;) { |
1536 | if (NULL == block_ctx->bh) | 1685 | struct bio *bio; |
1537 | return -1; | 1686 | unsigned int j; |
1538 | block_ctx->data = block_ctx->bh->b_data; | 1687 | DECLARE_COMPLETION_ONSTACK(complete); |
1688 | |||
1689 | bio = bio_alloc(GFP_NOFS, num_pages - i); | ||
1690 | if (!bio) { | ||
1691 | printk(KERN_INFO | ||
1692 | "btrfsic: bio_alloc() for %u pages failed!\n", | ||
1693 | num_pages - i); | ||
1694 | return -1; | ||
1695 | } | ||
1696 | bio->bi_bdev = block_ctx->dev->bdev; | ||
1697 | bio->bi_sector = dev_bytenr >> 9; | ||
1698 | bio->bi_end_io = btrfsic_complete_bio_end_io; | ||
1699 | bio->bi_private = &complete; | ||
1700 | |||
1701 | for (j = i; j < num_pages; j++) { | ||
1702 | ret = bio_add_page(bio, block_ctx->pagev[j], | ||
1703 | PAGE_CACHE_SIZE, 0); | ||
1704 | if (PAGE_CACHE_SIZE != ret) | ||
1705 | break; | ||
1706 | } | ||
1707 | if (j == i) { | ||
1708 | printk(KERN_INFO | ||
1709 | "btrfsic: error, failed to add a single page!\n"); | ||
1710 | return -1; | ||
1711 | } | ||
1712 | submit_bio(READ, bio); | ||
1713 | |||
1714 | /* this will also unplug the queue */ | ||
1715 | wait_for_completion(&complete); | ||
1716 | |||
1717 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | ||
1718 | printk(KERN_INFO | ||
1719 | "btrfsic: read error at logical %llu dev %s!\n", | ||
1720 | block_ctx->start, block_ctx->dev->name); | ||
1721 | bio_put(bio); | ||
1722 | return -1; | ||
1723 | } | ||
1724 | bio_put(bio); | ||
1725 | dev_bytenr += (j - i) * PAGE_CACHE_SIZE; | ||
1726 | i = j; | ||
1727 | } | ||
1728 | for (i = 0; i < num_pages; i++) { | ||
1729 | block_ctx->datav[i] = kmap(block_ctx->pagev[i]); | ||
1730 | if (!block_ctx->datav[i]) { | ||
1731 | printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n", | ||
1732 | block_ctx->dev->name); | ||
1733 | return -1; | ||
1734 | } | ||
1735 | } | ||
1539 | 1736 | ||
1540 | return block_ctx->len; | 1737 | return block_ctx->len; |
1541 | } | 1738 | } |
1542 | 1739 | ||
1740 | static void btrfsic_complete_bio_end_io(struct bio *bio, int err) | ||
1741 | { | ||
1742 | complete((struct completion *)bio->bi_private); | ||
1743 | } | ||
1744 | |||
1543 | static void btrfsic_dump_database(struct btrfsic_state *state) | 1745 | static void btrfsic_dump_database(struct btrfsic_state *state) |
1544 | { | 1746 | { |
1545 | struct list_head *elem_all; | 1747 | struct list_head *elem_all; |
@@ -1617,32 +1819,39 @@ static void btrfsic_dump_database(struct btrfsic_state *state) | |||
1617 | * (note that this test fails for the super block) | 1819 | * (note that this test fails for the super block) |
1618 | */ | 1820 | */ |
1619 | static int btrfsic_test_for_metadata(struct btrfsic_state *state, | 1821 | static int btrfsic_test_for_metadata(struct btrfsic_state *state, |
1620 | const u8 *data, unsigned int size) | 1822 | char **datav, unsigned int num_pages) |
1621 | { | 1823 | { |
1622 | struct btrfs_header *h; | 1824 | struct btrfs_header *h; |
1623 | u8 csum[BTRFS_CSUM_SIZE]; | 1825 | u8 csum[BTRFS_CSUM_SIZE]; |
1624 | u32 crc = ~(u32)0; | 1826 | u32 crc = ~(u32)0; |
1625 | int fail = 0; | 1827 | unsigned int i; |
1626 | int crc_fail = 0; | ||
1627 | 1828 | ||
1628 | h = (struct btrfs_header *)data; | 1829 | if (num_pages * PAGE_CACHE_SIZE < state->metablock_size) |
1830 | return 1; /* not metadata */ | ||
1831 | num_pages = state->metablock_size >> PAGE_CACHE_SHIFT; | ||
1832 | h = (struct btrfs_header *)datav[0]; | ||
1629 | 1833 | ||
1630 | if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) | 1834 | if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) |
1631 | fail++; | 1835 | return 1; |
1836 | |||
1837 | for (i = 0; i < num_pages; i++) { | ||
1838 | u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE); | ||
1839 | size_t sublen = i ? PAGE_CACHE_SIZE : | ||
1840 | (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE); | ||
1632 | 1841 | ||
1633 | crc = crc32c(crc, data + BTRFS_CSUM_SIZE, PAGE_SIZE - BTRFS_CSUM_SIZE); | 1842 | crc = crc32c(crc, data, sublen); |
1843 | } | ||
1634 | btrfs_csum_final(crc, csum); | 1844 | btrfs_csum_final(crc, csum); |
1635 | if (memcmp(csum, h->csum, state->csum_size)) | 1845 | if (memcmp(csum, h->csum, state->csum_size)) |
1636 | crc_fail++; | 1846 | return 1; |
1637 | 1847 | ||
1638 | return fail || crc_fail; | 1848 | return 0; /* is metadata */ |
1639 | } | 1849 | } |
1640 | 1850 | ||
1641 | static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | 1851 | static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, |
1642 | u64 dev_bytenr, | 1852 | u64 dev_bytenr, char **mapped_datav, |
1643 | u8 *mapped_data, unsigned int len, | 1853 | unsigned int num_pages, |
1644 | struct bio *bio, | 1854 | struct bio *bio, int *bio_is_patched, |
1645 | int *bio_is_patched, | ||
1646 | struct buffer_head *bh, | 1855 | struct buffer_head *bh, |
1647 | int submit_bio_bh_rw) | 1856 | int submit_bio_bh_rw) |
1648 | { | 1857 | { |
@@ -1652,12 +1861,19 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1652 | int ret; | 1861 | int ret; |
1653 | struct btrfsic_state *state = dev_state->state; | 1862 | struct btrfsic_state *state = dev_state->state; |
1654 | struct block_device *bdev = dev_state->bdev; | 1863 | struct block_device *bdev = dev_state->bdev; |
1864 | unsigned int processed_len; | ||
1655 | 1865 | ||
1656 | WARN_ON(len > PAGE_SIZE); | ||
1657 | is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_data, len)); | ||
1658 | if (NULL != bio_is_patched) | 1866 | if (NULL != bio_is_patched) |
1659 | *bio_is_patched = 0; | 1867 | *bio_is_patched = 0; |
1660 | 1868 | ||
1869 | again: | ||
1870 | if (num_pages == 0) | ||
1871 | return; | ||
1872 | |||
1873 | processed_len = 0; | ||
1874 | is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav, | ||
1875 | num_pages)); | ||
1876 | |||
1661 | block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, | 1877 | block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, |
1662 | &state->block_hashtable); | 1878 | &state->block_hashtable); |
1663 | if (NULL != block) { | 1879 | if (NULL != block) { |
@@ -1667,8 +1883,16 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1667 | 1883 | ||
1668 | if (block->is_superblock) { | 1884 | if (block->is_superblock) { |
1669 | bytenr = le64_to_cpu(((struct btrfs_super_block *) | 1885 | bytenr = le64_to_cpu(((struct btrfs_super_block *) |
1670 | mapped_data)->bytenr); | 1886 | mapped_datav[0])->bytenr); |
1887 | if (num_pages * PAGE_CACHE_SIZE < | ||
1888 | BTRFS_SUPER_INFO_SIZE) { | ||
1889 | printk(KERN_INFO | ||
1890 | "btrfsic: cannot work with too short bios!\n"); | ||
1891 | return; | ||
1892 | } | ||
1671 | is_metadata = 1; | 1893 | is_metadata = 1; |
1894 | BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1)); | ||
1895 | processed_len = BTRFS_SUPER_INFO_SIZE; | ||
1672 | if (state->print_mask & | 1896 | if (state->print_mask & |
1673 | BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { | 1897 | BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { |
1674 | printk(KERN_INFO | 1898 | printk(KERN_INFO |
@@ -1678,12 +1902,18 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1678 | } | 1902 | } |
1679 | if (is_metadata) { | 1903 | if (is_metadata) { |
1680 | if (!block->is_superblock) { | 1904 | if (!block->is_superblock) { |
1905 | if (num_pages * PAGE_CACHE_SIZE < | ||
1906 | state->metablock_size) { | ||
1907 | printk(KERN_INFO | ||
1908 | "btrfsic: cannot work with too short bios!\n"); | ||
1909 | return; | ||
1910 | } | ||
1911 | processed_len = state->metablock_size; | ||
1681 | bytenr = le64_to_cpu(((struct btrfs_header *) | 1912 | bytenr = le64_to_cpu(((struct btrfs_header *) |
1682 | mapped_data)->bytenr); | 1913 | mapped_datav[0])->bytenr); |
1683 | btrfsic_cmp_log_and_dev_bytenr(state, bytenr, | 1914 | btrfsic_cmp_log_and_dev_bytenr(state, bytenr, |
1684 | dev_state, | 1915 | dev_state, |
1685 | dev_bytenr, | 1916 | dev_bytenr); |
1686 | mapped_data); | ||
1687 | } | 1917 | } |
1688 | if (block->logical_bytenr != bytenr) { | 1918 | if (block->logical_bytenr != bytenr) { |
1689 | printk(KERN_INFO | 1919 | printk(KERN_INFO |
@@ -1710,6 +1940,13 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1710 | block->mirror_num, | 1940 | block->mirror_num, |
1711 | btrfsic_get_block_type(state, block)); | 1941 | btrfsic_get_block_type(state, block)); |
1712 | } else { | 1942 | } else { |
1943 | if (num_pages * PAGE_CACHE_SIZE < | ||
1944 | state->datablock_size) { | ||
1945 | printk(KERN_INFO | ||
1946 | "btrfsic: cannot work with too short bios!\n"); | ||
1947 | return; | ||
1948 | } | ||
1949 | processed_len = state->datablock_size; | ||
1713 | bytenr = block->logical_bytenr; | 1950 | bytenr = block->logical_bytenr; |
1714 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | 1951 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) |
1715 | printk(KERN_INFO | 1952 | printk(KERN_INFO |
@@ -1747,7 +1984,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1747 | le64_to_cpu(block->disk_key.offset), | 1984 | le64_to_cpu(block->disk_key.offset), |
1748 | (unsigned long long) | 1985 | (unsigned long long) |
1749 | le64_to_cpu(((struct btrfs_header *) | 1986 | le64_to_cpu(((struct btrfs_header *) |
1750 | mapped_data)->generation), | 1987 | mapped_datav[0])->generation), |
1751 | (unsigned long long) | 1988 | (unsigned long long) |
1752 | state->max_superblock_generation); | 1989 | state->max_superblock_generation); |
1753 | btrfsic_dump_tree(state); | 1990 | btrfsic_dump_tree(state); |
@@ -1765,10 +2002,10 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1765 | (unsigned long long)block->generation, | 2002 | (unsigned long long)block->generation, |
1766 | (unsigned long long) | 2003 | (unsigned long long) |
1767 | le64_to_cpu(((struct btrfs_header *) | 2004 | le64_to_cpu(((struct btrfs_header *) |
1768 | mapped_data)->generation)); | 2005 | mapped_datav[0])->generation)); |
1769 | /* it would not be safe to go on */ | 2006 | /* it would not be safe to go on */ |
1770 | btrfsic_dump_tree(state); | 2007 | btrfsic_dump_tree(state); |
1771 | return; | 2008 | goto continue_loop; |
1772 | } | 2009 | } |
1773 | 2010 | ||
1774 | /* | 2011 | /* |
@@ -1796,18 +2033,19 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1796 | } | 2033 | } |
1797 | 2034 | ||
1798 | if (block->is_superblock) | 2035 | if (block->is_superblock) |
1799 | ret = btrfsic_map_superblock(state, bytenr, len, | 2036 | ret = btrfsic_map_superblock(state, bytenr, |
2037 | processed_len, | ||
1800 | bdev, &block_ctx); | 2038 | bdev, &block_ctx); |
1801 | else | 2039 | else |
1802 | ret = btrfsic_map_block(state, bytenr, len, | 2040 | ret = btrfsic_map_block(state, bytenr, processed_len, |
1803 | &block_ctx, 0); | 2041 | &block_ctx, 0); |
1804 | if (ret) { | 2042 | if (ret) { |
1805 | printk(KERN_INFO | 2043 | printk(KERN_INFO |
1806 | "btrfsic: btrfsic_map_block(root @%llu)" | 2044 | "btrfsic: btrfsic_map_block(root @%llu)" |
1807 | " failed!\n", (unsigned long long)bytenr); | 2045 | " failed!\n", (unsigned long long)bytenr); |
1808 | return; | 2046 | goto continue_loop; |
1809 | } | 2047 | } |
1810 | block_ctx.data = mapped_data; | 2048 | block_ctx.datav = mapped_datav; |
1811 | /* the following is required in case of writes to mirrors, | 2049 | /* the following is required in case of writes to mirrors, |
1812 | * use the same that was used for the lookup */ | 2050 | * use the same that was used for the lookup */ |
1813 | block_ctx.dev = dev_state; | 2051 | block_ctx.dev = dev_state; |
@@ -1863,11 +2101,13 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1863 | block->logical_bytenr = bytenr; | 2101 | block->logical_bytenr = bytenr; |
1864 | block->is_metadata = 1; | 2102 | block->is_metadata = 1; |
1865 | if (block->is_superblock) { | 2103 | if (block->is_superblock) { |
2104 | BUG_ON(PAGE_CACHE_SIZE != | ||
2105 | BTRFS_SUPER_INFO_SIZE); | ||
1866 | ret = btrfsic_process_written_superblock( | 2106 | ret = btrfsic_process_written_superblock( |
1867 | state, | 2107 | state, |
1868 | block, | 2108 | block, |
1869 | (struct btrfs_super_block *) | 2109 | (struct btrfs_super_block *) |
1870 | mapped_data); | 2110 | mapped_datav[0]); |
1871 | if (state->print_mask & | 2111 | if (state->print_mask & |
1872 | BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { | 2112 | BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { |
1873 | printk(KERN_INFO | 2113 | printk(KERN_INFO |
@@ -1880,8 +2120,6 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1880 | state, | 2120 | state, |
1881 | block, | 2121 | block, |
1882 | &block_ctx, | 2122 | &block_ctx, |
1883 | (struct btrfs_header *) | ||
1884 | block_ctx.data, | ||
1885 | 0, 0); | 2123 | 0, 0); |
1886 | } | 2124 | } |
1887 | if (ret) | 2125 | if (ret) |
@@ -1912,26 +2150,30 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1912 | u64 bytenr; | 2150 | u64 bytenr; |
1913 | 2151 | ||
1914 | if (!is_metadata) { | 2152 | if (!is_metadata) { |
2153 | processed_len = state->datablock_size; | ||
1915 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | 2154 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) |
1916 | printk(KERN_INFO "Written block (%s/%llu/?)" | 2155 | printk(KERN_INFO "Written block (%s/%llu/?)" |
1917 | " !found in hash table, D.\n", | 2156 | " !found in hash table, D.\n", |
1918 | dev_state->name, | 2157 | dev_state->name, |
1919 | (unsigned long long)dev_bytenr); | 2158 | (unsigned long long)dev_bytenr); |
1920 | if (!state->include_extent_data) | 2159 | if (!state->include_extent_data) { |
1921 | return; /* ignore that written D block */ | 2160 | /* ignore that written D block */ |
2161 | goto continue_loop; | ||
2162 | } | ||
1922 | 2163 | ||
1923 | /* this is getting ugly for the | 2164 | /* this is getting ugly for the |
1924 | * include_extent_data case... */ | 2165 | * include_extent_data case... */ |
1925 | bytenr = 0; /* unknown */ | 2166 | bytenr = 0; /* unknown */ |
1926 | block_ctx.start = bytenr; | 2167 | block_ctx.start = bytenr; |
1927 | block_ctx.len = len; | 2168 | block_ctx.len = processed_len; |
1928 | block_ctx.bh = NULL; | 2169 | block_ctx.mem_to_free = NULL; |
2170 | block_ctx.pagev = NULL; | ||
1929 | } else { | 2171 | } else { |
2172 | processed_len = state->metablock_size; | ||
1930 | bytenr = le64_to_cpu(((struct btrfs_header *) | 2173 | bytenr = le64_to_cpu(((struct btrfs_header *) |
1931 | mapped_data)->bytenr); | 2174 | mapped_datav[0])->bytenr); |
1932 | btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, | 2175 | btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, |
1933 | dev_bytenr, | 2176 | dev_bytenr); |
1934 | mapped_data); | ||
1935 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) | 2177 | if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) |
1936 | printk(KERN_INFO | 2178 | printk(KERN_INFO |
1937 | "Written block @%llu (%s/%llu/?)" | 2179 | "Written block @%llu (%s/%llu/?)" |
@@ -1940,17 +2182,17 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1940 | dev_state->name, | 2182 | dev_state->name, |
1941 | (unsigned long long)dev_bytenr); | 2183 | (unsigned long long)dev_bytenr); |
1942 | 2184 | ||
1943 | ret = btrfsic_map_block(state, bytenr, len, &block_ctx, | 2185 | ret = btrfsic_map_block(state, bytenr, processed_len, |
1944 | 0); | 2186 | &block_ctx, 0); |
1945 | if (ret) { | 2187 | if (ret) { |
1946 | printk(KERN_INFO | 2188 | printk(KERN_INFO |
1947 | "btrfsic: btrfsic_map_block(root @%llu)" | 2189 | "btrfsic: btrfsic_map_block(root @%llu)" |
1948 | " failed!\n", | 2190 | " failed!\n", |
1949 | (unsigned long long)dev_bytenr); | 2191 | (unsigned long long)dev_bytenr); |
1950 | return; | 2192 | goto continue_loop; |
1951 | } | 2193 | } |
1952 | } | 2194 | } |
1953 | block_ctx.data = mapped_data; | 2195 | block_ctx.datav = mapped_datav; |
1954 | /* the following is required in case of writes to mirrors, | 2196 | /* the following is required in case of writes to mirrors, |
1955 | * use the same that was used for the lookup */ | 2197 | * use the same that was used for the lookup */ |
1956 | block_ctx.dev = dev_state; | 2198 | block_ctx.dev = dev_state; |
@@ -1960,7 +2202,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
1960 | if (NULL == block) { | 2202 | if (NULL == block) { |
1961 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); | 2203 | printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); |
1962 | btrfsic_release_block_ctx(&block_ctx); | 2204 | btrfsic_release_block_ctx(&block_ctx); |
1963 | return; | 2205 | goto continue_loop; |
1964 | } | 2206 | } |
1965 | block->dev_state = dev_state; | 2207 | block->dev_state = dev_state; |
1966 | block->dev_bytenr = dev_bytenr; | 2208 | block->dev_bytenr = dev_bytenr; |
@@ -2020,9 +2262,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
2020 | 2262 | ||
2021 | if (is_metadata) { | 2263 | if (is_metadata) { |
2022 | ret = btrfsic_process_metablock(state, block, | 2264 | ret = btrfsic_process_metablock(state, block, |
2023 | &block_ctx, | 2265 | &block_ctx, 0, 0); |
2024 | (struct btrfs_header *) | ||
2025 | block_ctx.data, 0, 0); | ||
2026 | if (ret) | 2266 | if (ret) |
2027 | printk(KERN_INFO | 2267 | printk(KERN_INFO |
2028 | "btrfsic: process_metablock(root @%llu)" | 2268 | "btrfsic: process_metablock(root @%llu)" |
@@ -2031,6 +2271,13 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | |||
2031 | } | 2271 | } |
2032 | btrfsic_release_block_ctx(&block_ctx); | 2272 | btrfsic_release_block_ctx(&block_ctx); |
2033 | } | 2273 | } |
2274 | |||
2275 | continue_loop: | ||
2276 | BUG_ON(!processed_len); | ||
2277 | dev_bytenr += processed_len; | ||
2278 | mapped_datav += processed_len >> PAGE_CACHE_SHIFT; | ||
2279 | num_pages -= processed_len >> PAGE_CACHE_SHIFT; | ||
2280 | goto again; | ||
2034 | } | 2281 | } |
2035 | 2282 | ||
2036 | static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) | 2283 | static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) |
@@ -2213,7 +2460,7 @@ static int btrfsic_process_written_superblock( | |||
2213 | 2460 | ||
2214 | num_copies = | 2461 | num_copies = |
2215 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 2462 | btrfs_num_copies(&state->root->fs_info->mapping_tree, |
2216 | next_bytenr, PAGE_SIZE); | 2463 | next_bytenr, BTRFS_SUPER_INFO_SIZE); |
2217 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 2464 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
2218 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 2465 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
2219 | (unsigned long long)next_bytenr, num_copies); | 2466 | (unsigned long long)next_bytenr, num_copies); |
@@ -2224,7 +2471,8 @@ static int btrfsic_process_written_superblock( | |||
2224 | printk(KERN_INFO | 2471 | printk(KERN_INFO |
2225 | "btrfsic_process_written_superblock(" | 2472 | "btrfsic_process_written_superblock(" |
2226 | "mirror_num=%d)\n", mirror_num); | 2473 | "mirror_num=%d)\n", mirror_num); |
2227 | ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE, | 2474 | ret = btrfsic_map_block(state, next_bytenr, |
2475 | BTRFS_SUPER_INFO_SIZE, | ||
2228 | &tmp_next_block_ctx, | 2476 | &tmp_next_block_ctx, |
2229 | mirror_num); | 2477 | mirror_num); |
2230 | if (ret) { | 2478 | if (ret) { |
@@ -2689,7 +2937,7 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add( | |||
2689 | static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, | 2937 | static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, |
2690 | u64 bytenr, | 2938 | u64 bytenr, |
2691 | struct btrfsic_dev_state *dev_state, | 2939 | struct btrfsic_dev_state *dev_state, |
2692 | u64 dev_bytenr, char *data) | 2940 | u64 dev_bytenr) |
2693 | { | 2941 | { |
2694 | int num_copies; | 2942 | int num_copies; |
2695 | int mirror_num; | 2943 | int mirror_num; |
@@ -2698,10 +2946,10 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, | |||
2698 | int match = 0; | 2946 | int match = 0; |
2699 | 2947 | ||
2700 | num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, | 2948 | num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, |
2701 | bytenr, PAGE_SIZE); | 2949 | bytenr, state->metablock_size); |
2702 | 2950 | ||
2703 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { | 2951 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { |
2704 | ret = btrfsic_map_block(state, bytenr, PAGE_SIZE, | 2952 | ret = btrfsic_map_block(state, bytenr, state->metablock_size, |
2705 | &block_ctx, mirror_num); | 2953 | &block_ctx, mirror_num); |
2706 | if (ret) { | 2954 | if (ret) { |
2707 | printk(KERN_INFO "btrfsic:" | 2955 | printk(KERN_INFO "btrfsic:" |
@@ -2727,7 +2975,8 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, | |||
2727 | (unsigned long long)bytenr, dev_state->name, | 2975 | (unsigned long long)bytenr, dev_state->name, |
2728 | (unsigned long long)dev_bytenr); | 2976 | (unsigned long long)dev_bytenr); |
2729 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { | 2977 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { |
2730 | ret = btrfsic_map_block(state, bytenr, PAGE_SIZE, | 2978 | ret = btrfsic_map_block(state, bytenr, |
2979 | state->metablock_size, | ||
2731 | &block_ctx, mirror_num); | 2980 | &block_ctx, mirror_num); |
2732 | if (ret) | 2981 | if (ret) |
2733 | continue; | 2982 | continue; |
@@ -2781,13 +3030,13 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) | |||
2781 | (unsigned long)bh->b_size, bh->b_data, | 3030 | (unsigned long)bh->b_size, bh->b_data, |
2782 | bh->b_bdev); | 3031 | bh->b_bdev); |
2783 | btrfsic_process_written_block(dev_state, dev_bytenr, | 3032 | btrfsic_process_written_block(dev_state, dev_bytenr, |
2784 | bh->b_data, bh->b_size, NULL, | 3033 | &bh->b_data, 1, NULL, |
2785 | NULL, bh, rw); | 3034 | NULL, bh, rw); |
2786 | } else if (NULL != dev_state && (rw & REQ_FLUSH)) { | 3035 | } else if (NULL != dev_state && (rw & REQ_FLUSH)) { |
2787 | if (dev_state->state->print_mask & | 3036 | if (dev_state->state->print_mask & |
2788 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) | 3037 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) |
2789 | printk(KERN_INFO | 3038 | printk(KERN_INFO |
2790 | "submit_bh(rw=0x%x) FLUSH, bdev=%p)\n", | 3039 | "submit_bh(rw=0x%x FLUSH, bdev=%p)\n", |
2791 | rw, bh->b_bdev); | 3040 | rw, bh->b_bdev); |
2792 | if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { | 3041 | if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { |
2793 | if ((dev_state->state->print_mask & | 3042 | if ((dev_state->state->print_mask & |
@@ -2836,6 +3085,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) | |||
2836 | unsigned int i; | 3085 | unsigned int i; |
2837 | u64 dev_bytenr; | 3086 | u64 dev_bytenr; |
2838 | int bio_is_patched; | 3087 | int bio_is_patched; |
3088 | char **mapped_datav; | ||
2839 | 3089 | ||
2840 | dev_bytenr = 512 * bio->bi_sector; | 3090 | dev_bytenr = 512 * bio->bi_sector; |
2841 | bio_is_patched = 0; | 3091 | bio_is_patched = 0; |
@@ -2848,35 +3098,46 @@ void btrfsic_submit_bio(int rw, struct bio *bio) | |||
2848 | (unsigned long long)dev_bytenr, | 3098 | (unsigned long long)dev_bytenr, |
2849 | bio->bi_bdev); | 3099 | bio->bi_bdev); |
2850 | 3100 | ||
3101 | mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, | ||
3102 | GFP_NOFS); | ||
3103 | if (!mapped_datav) | ||
3104 | goto leave; | ||
2851 | for (i = 0; i < bio->bi_vcnt; i++) { | 3105 | for (i = 0; i < bio->bi_vcnt; i++) { |
2852 | u8 *mapped_data; | 3106 | BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); |
2853 | 3107 | mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); | |
2854 | mapped_data = kmap(bio->bi_io_vec[i].bv_page); | 3108 | if (!mapped_datav[i]) { |
3109 | while (i > 0) { | ||
3110 | i--; | ||
3111 | kunmap(bio->bi_io_vec[i].bv_page); | ||
3112 | } | ||
3113 | kfree(mapped_datav); | ||
3114 | goto leave; | ||
3115 | } | ||
2855 | if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | | 3116 | if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | |
2856 | BTRFSIC_PRINT_MASK_VERBOSE) == | 3117 | BTRFSIC_PRINT_MASK_VERBOSE) == |
2857 | (dev_state->state->print_mask & | 3118 | (dev_state->state->print_mask & |
2858 | (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | | 3119 | (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | |
2859 | BTRFSIC_PRINT_MASK_VERBOSE))) | 3120 | BTRFSIC_PRINT_MASK_VERBOSE))) |
2860 | printk(KERN_INFO | 3121 | printk(KERN_INFO |
2861 | "#%u: page=%p, mapped=%p, len=%u," | 3122 | "#%u: page=%p, len=%u, offset=%u\n", |
2862 | " offset=%u\n", | ||
2863 | i, bio->bi_io_vec[i].bv_page, | 3123 | i, bio->bi_io_vec[i].bv_page, |
2864 | mapped_data, | ||
2865 | bio->bi_io_vec[i].bv_len, | 3124 | bio->bi_io_vec[i].bv_len, |
2866 | bio->bi_io_vec[i].bv_offset); | 3125 | bio->bi_io_vec[i].bv_offset); |
2867 | btrfsic_process_written_block(dev_state, dev_bytenr, | 3126 | } |
2868 | mapped_data, | 3127 | btrfsic_process_written_block(dev_state, dev_bytenr, |
2869 | bio->bi_io_vec[i].bv_len, | 3128 | mapped_datav, bio->bi_vcnt, |
2870 | bio, &bio_is_patched, | 3129 | bio, &bio_is_patched, |
2871 | NULL, rw); | 3130 | NULL, rw); |
3131 | while (i > 0) { | ||
3132 | i--; | ||
2872 | kunmap(bio->bi_io_vec[i].bv_page); | 3133 | kunmap(bio->bi_io_vec[i].bv_page); |
2873 | dev_bytenr += bio->bi_io_vec[i].bv_len; | ||
2874 | } | 3134 | } |
3135 | kfree(mapped_datav); | ||
2875 | } else if (NULL != dev_state && (rw & REQ_FLUSH)) { | 3136 | } else if (NULL != dev_state && (rw & REQ_FLUSH)) { |
2876 | if (dev_state->state->print_mask & | 3137 | if (dev_state->state->print_mask & |
2877 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) | 3138 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) |
2878 | printk(KERN_INFO | 3139 | printk(KERN_INFO |
2879 | "submit_bio(rw=0x%x) FLUSH, bdev=%p)\n", | 3140 | "submit_bio(rw=0x%x FLUSH, bdev=%p)\n", |
2880 | rw, bio->bi_bdev); | 3141 | rw, bio->bi_bdev); |
2881 | if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { | 3142 | if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { |
2882 | if ((dev_state->state->print_mask & | 3143 | if ((dev_state->state->print_mask & |
@@ -2903,6 +3164,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) | |||
2903 | bio->bi_end_io = btrfsic_bio_end_io; | 3164 | bio->bi_end_io = btrfsic_bio_end_io; |
2904 | } | 3165 | } |
2905 | } | 3166 | } |
3167 | leave: | ||
2906 | mutex_unlock(&btrfsic_mutex); | 3168 | mutex_unlock(&btrfsic_mutex); |
2907 | 3169 | ||
2908 | submit_bio(rw, bio); | 3170 | submit_bio(rw, bio); |
@@ -2917,6 +3179,30 @@ int btrfsic_mount(struct btrfs_root *root, | |||
2917 | struct list_head *dev_head = &fs_devices->devices; | 3179 | struct list_head *dev_head = &fs_devices->devices; |
2918 | struct btrfs_device *device; | 3180 | struct btrfs_device *device; |
2919 | 3181 | ||
3182 | if (root->nodesize != root->leafsize) { | ||
3183 | printk(KERN_INFO | ||
3184 | "btrfsic: cannot handle nodesize %d != leafsize %d!\n", | ||
3185 | root->nodesize, root->leafsize); | ||
3186 | return -1; | ||
3187 | } | ||
3188 | if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { | ||
3189 | printk(KERN_INFO | ||
3190 | "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", | ||
3191 | root->nodesize, (unsigned long)PAGE_CACHE_SIZE); | ||
3192 | return -1; | ||
3193 | } | ||
3194 | if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { | ||
3195 | printk(KERN_INFO | ||
3196 | "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", | ||
3197 | root->leafsize, (unsigned long)PAGE_CACHE_SIZE); | ||
3198 | return -1; | ||
3199 | } | ||
3200 | if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { | ||
3201 | printk(KERN_INFO | ||
3202 | "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", | ||
3203 | root->sectorsize, (unsigned long)PAGE_CACHE_SIZE); | ||
3204 | return -1; | ||
3205 | } | ||
2920 | state = kzalloc(sizeof(*state), GFP_NOFS); | 3206 | state = kzalloc(sizeof(*state), GFP_NOFS); |
2921 | if (NULL == state) { | 3207 | if (NULL == state) { |
2922 | printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); | 3208 | printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); |
@@ -2933,6 +3219,8 @@ int btrfsic_mount(struct btrfs_root *root, | |||
2933 | state->print_mask = print_mask; | 3219 | state->print_mask = print_mask; |
2934 | state->include_extent_data = including_extent_data; | 3220 | state->include_extent_data = including_extent_data; |
2935 | state->csum_size = 0; | 3221 | state->csum_size = 0; |
3222 | state->metablock_size = root->nodesize; | ||
3223 | state->datablock_size = root->sectorsize; | ||
2936 | INIT_LIST_HEAD(&state->all_blocks_list); | 3224 | INIT_LIST_HEAD(&state->all_blocks_list); |
2937 | btrfsic_block_hashtable_init(&state->block_hashtable); | 3225 | btrfsic_block_hashtable_init(&state->block_hashtable); |
2938 | btrfsic_block_link_hashtable_init(&state->block_link_hashtable); | 3226 | btrfsic_block_link_hashtable_init(&state->block_link_hashtable); |
@@ -3049,7 +3337,7 @@ void btrfsic_unmount(struct btrfs_root *root, | |||
3049 | btrfsic_block_link_free(l); | 3337 | btrfsic_block_link_free(l); |
3050 | } | 3338 | } |
3051 | 3339 | ||
3052 | if (b_all->is_iodone) | 3340 | if (b_all->is_iodone || b_all->never_written) |
3053 | btrfsic_block_free(b_all); | 3341 | btrfsic_block_free(b_all); |
3054 | else | 3342 | else |
3055 | printk(KERN_INFO "btrfs: attempt to free %c-block" | 3343 | printk(KERN_INFO "btrfs: attempt to free %c-block" |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 4106264fbc65..d7a96cfdc50a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -18,6 +18,7 @@ | |||
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/rbtree.h> | ||
21 | #include "ctree.h" | 22 | #include "ctree.h" |
22 | #include "disk-io.h" | 23 | #include "disk-io.h" |
23 | #include "transaction.h" | 24 | #include "transaction.h" |
@@ -37,7 +38,16 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
37 | struct extent_buffer *dst_buf, | 38 | struct extent_buffer *dst_buf, |
38 | struct extent_buffer *src_buf); | 39 | struct extent_buffer *src_buf); |
39 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 40 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
40 | struct btrfs_path *path, int level, int slot); | 41 | struct btrfs_path *path, int level, int slot, |
42 | int tree_mod_log); | ||
43 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, | ||
44 | struct extent_buffer *eb); | ||
45 | struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr, | ||
46 | u32 blocksize, u64 parent_transid, | ||
47 | u64 time_seq); | ||
48 | struct extent_buffer *btrfs_find_old_tree_block(struct btrfs_root *root, | ||
49 | u64 bytenr, u32 blocksize, | ||
50 | u64 time_seq); | ||
41 | 51 | ||
42 | struct btrfs_path *btrfs_alloc_path(void) | 52 | struct btrfs_path *btrfs_alloc_path(void) |
43 | { | 53 | { |
@@ -255,7 +265,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
255 | 265 | ||
256 | cow = btrfs_alloc_free_block(trans, root, buf->len, 0, | 266 | cow = btrfs_alloc_free_block(trans, root, buf->len, 0, |
257 | new_root_objectid, &disk_key, level, | 267 | new_root_objectid, &disk_key, level, |
258 | buf->start, 0, 1); | 268 | buf->start, 0); |
259 | if (IS_ERR(cow)) | 269 | if (IS_ERR(cow)) |
260 | return PTR_ERR(cow); | 270 | return PTR_ERR(cow); |
261 | 271 | ||
@@ -288,6 +298,434 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
288 | return 0; | 298 | return 0; |
289 | } | 299 | } |
290 | 300 | ||
301 | enum mod_log_op { | ||
302 | MOD_LOG_KEY_REPLACE, | ||
303 | MOD_LOG_KEY_ADD, | ||
304 | MOD_LOG_KEY_REMOVE, | ||
305 | MOD_LOG_KEY_REMOVE_WHILE_FREEING, | ||
306 | MOD_LOG_KEY_REMOVE_WHILE_MOVING, | ||
307 | MOD_LOG_MOVE_KEYS, | ||
308 | MOD_LOG_ROOT_REPLACE, | ||
309 | }; | ||
310 | |||
311 | struct tree_mod_move { | ||
312 | int dst_slot; | ||
313 | int nr_items; | ||
314 | }; | ||
315 | |||
316 | struct tree_mod_root { | ||
317 | u64 logical; | ||
318 | u8 level; | ||
319 | }; | ||
320 | |||
321 | struct tree_mod_elem { | ||
322 | struct rb_node node; | ||
323 | u64 index; /* shifted logical */ | ||
324 | struct seq_list elem; | ||
325 | enum mod_log_op op; | ||
326 | |||
327 | /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ | ||
328 | int slot; | ||
329 | |||
330 | /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */ | ||
331 | u64 generation; | ||
332 | |||
333 | /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */ | ||
334 | struct btrfs_disk_key key; | ||
335 | u64 blockptr; | ||
336 | |||
337 | /* this is used for op == MOD_LOG_MOVE_KEYS */ | ||
338 | struct tree_mod_move move; | ||
339 | |||
340 | /* this is used for op == MOD_LOG_ROOT_REPLACE */ | ||
341 | struct tree_mod_root old_root; | ||
342 | }; | ||
343 | |||
344 | static inline void | ||
345 | __get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) | ||
346 | { | ||
347 | elem->seq = atomic_inc_return(&fs_info->tree_mod_seq); | ||
348 | list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); | ||
349 | } | ||
350 | |||
351 | void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
352 | struct seq_list *elem) | ||
353 | { | ||
354 | elem->flags = 1; | ||
355 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
356 | __get_tree_mod_seq(fs_info, elem); | ||
357 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
358 | } | ||
359 | |||
360 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
361 | struct seq_list *elem) | ||
362 | { | ||
363 | struct rb_root *tm_root; | ||
364 | struct rb_node *node; | ||
365 | struct rb_node *next; | ||
366 | struct seq_list *cur_elem; | ||
367 | struct tree_mod_elem *tm; | ||
368 | u64 min_seq = (u64)-1; | ||
369 | u64 seq_putting = elem->seq; | ||
370 | |||
371 | if (!seq_putting) | ||
372 | return; | ||
373 | |||
374 | BUG_ON(!(elem->flags & 1)); | ||
375 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
376 | list_del(&elem->list); | ||
377 | |||
378 | list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) { | ||
379 | if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) { | ||
380 | if (seq_putting > cur_elem->seq) { | ||
381 | /* | ||
382 | * blocker with lower sequence number exists, we | ||
383 | * cannot remove anything from the log | ||
384 | */ | ||
385 | goto out; | ||
386 | } | ||
387 | min_seq = cur_elem->seq; | ||
388 | } | ||
389 | } | ||
390 | |||
391 | /* | ||
392 | * anything that's lower than the lowest existing (read: blocked) | ||
393 | * sequence number can be removed from the tree. | ||
394 | */ | ||
395 | write_lock(&fs_info->tree_mod_log_lock); | ||
396 | tm_root = &fs_info->tree_mod_log; | ||
397 | for (node = rb_first(tm_root); node; node = next) { | ||
398 | next = rb_next(node); | ||
399 | tm = container_of(node, struct tree_mod_elem, node); | ||
400 | if (tm->elem.seq > min_seq) | ||
401 | continue; | ||
402 | rb_erase(node, tm_root); | ||
403 | list_del(&tm->elem.list); | ||
404 | kfree(tm); | ||
405 | } | ||
406 | write_unlock(&fs_info->tree_mod_log_lock); | ||
407 | out: | ||
408 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
409 | } | ||
410 | |||
411 | /* | ||
412 | * key order of the log: | ||
413 | * index -> sequence | ||
414 | * | ||
415 | * the index is the shifted logical of the *new* root node for root replace | ||
416 | * operations, or the shifted logical of the affected block for all other | ||
417 | * operations. | ||
418 | */ | ||
419 | static noinline int | ||
420 | __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) | ||
421 | { | ||
422 | struct rb_root *tm_root; | ||
423 | struct rb_node **new; | ||
424 | struct rb_node *parent = NULL; | ||
425 | struct tree_mod_elem *cur; | ||
426 | int ret = 0; | ||
427 | |||
428 | BUG_ON(!tm || !tm->elem.seq); | ||
429 | |||
430 | write_lock(&fs_info->tree_mod_log_lock); | ||
431 | tm_root = &fs_info->tree_mod_log; | ||
432 | new = &tm_root->rb_node; | ||
433 | while (*new) { | ||
434 | cur = container_of(*new, struct tree_mod_elem, node); | ||
435 | parent = *new; | ||
436 | if (cur->index < tm->index) | ||
437 | new = &((*new)->rb_left); | ||
438 | else if (cur->index > tm->index) | ||
439 | new = &((*new)->rb_right); | ||
440 | else if (cur->elem.seq < tm->elem.seq) | ||
441 | new = &((*new)->rb_left); | ||
442 | else if (cur->elem.seq > tm->elem.seq) | ||
443 | new = &((*new)->rb_right); | ||
444 | else { | ||
445 | kfree(tm); | ||
446 | ret = -EEXIST; | ||
447 | goto unlock; | ||
448 | } | ||
449 | } | ||
450 | |||
451 | rb_link_node(&tm->node, parent, new); | ||
452 | rb_insert_color(&tm->node, tm_root); | ||
453 | unlock: | ||
454 | write_unlock(&fs_info->tree_mod_log_lock); | ||
455 | return ret; | ||
456 | } | ||
457 | |||
458 | static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, | ||
459 | struct extent_buffer *eb) { | ||
460 | smp_mb(); | ||
461 | if (list_empty(&(fs_info)->tree_mod_seq_list)) | ||
462 | return 1; | ||
463 | if (!eb) | ||
464 | return 0; | ||
465 | if (btrfs_header_level(eb) == 0) | ||
466 | return 1; | ||
467 | return 0; | ||
468 | } | ||
469 | |||
470 | static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, | ||
471 | struct tree_mod_elem **tm_ret) | ||
472 | { | ||
473 | struct tree_mod_elem *tm; | ||
474 | int seq; | ||
475 | |||
476 | if (tree_mod_dont_log(fs_info, NULL)) | ||
477 | return 0; | ||
478 | |||
479 | tm = *tm_ret = kzalloc(sizeof(*tm), flags); | ||
480 | if (!tm) | ||
481 | return -ENOMEM; | ||
482 | |||
483 | tm->elem.flags = 0; | ||
484 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
485 | if (list_empty(&fs_info->tree_mod_seq_list)) { | ||
486 | /* | ||
487 | * someone emptied the list while we were waiting for the lock. | ||
488 | * we must not add to the list, because no blocker exists. items | ||
489 | * are removed from the list only when the existing blocker is | ||
490 | * removed from the list. | ||
491 | */ | ||
492 | kfree(tm); | ||
493 | seq = 0; | ||
494 | } else { | ||
495 | __get_tree_mod_seq(fs_info, &tm->elem); | ||
496 | seq = tm->elem.seq; | ||
497 | } | ||
498 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
499 | |||
500 | return seq; | ||
501 | } | ||
502 | |||
503 | static noinline int | ||
504 | tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, | ||
505 | struct extent_buffer *eb, int slot, | ||
506 | enum mod_log_op op, gfp_t flags) | ||
507 | { | ||
508 | struct tree_mod_elem *tm; | ||
509 | int ret; | ||
510 | |||
511 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
512 | if (ret <= 0) | ||
513 | return ret; | ||
514 | |||
515 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | ||
516 | if (op != MOD_LOG_KEY_ADD) { | ||
517 | btrfs_node_key(eb, &tm->key, slot); | ||
518 | tm->blockptr = btrfs_node_blockptr(eb, slot); | ||
519 | } | ||
520 | tm->op = op; | ||
521 | tm->slot = slot; | ||
522 | tm->generation = btrfs_node_ptr_generation(eb, slot); | ||
523 | |||
524 | return __tree_mod_log_insert(fs_info, tm); | ||
525 | } | ||
526 | |||
527 | static noinline int | ||
528 | tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | ||
529 | int slot, enum mod_log_op op) | ||
530 | { | ||
531 | return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS); | ||
532 | } | ||
533 | |||
534 | static noinline int | ||
535 | tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, | ||
536 | struct extent_buffer *eb, int dst_slot, int src_slot, | ||
537 | int nr_items, gfp_t flags) | ||
538 | { | ||
539 | struct tree_mod_elem *tm; | ||
540 | int ret; | ||
541 | int i; | ||
542 | |||
543 | if (tree_mod_dont_log(fs_info, eb)) | ||
544 | return 0; | ||
545 | |||
546 | for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { | ||
547 | ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot, | ||
548 | MOD_LOG_KEY_REMOVE_WHILE_MOVING); | ||
549 | BUG_ON(ret < 0); | ||
550 | } | ||
551 | |||
552 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
553 | if (ret <= 0) | ||
554 | return ret; | ||
555 | |||
556 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | ||
557 | tm->slot = src_slot; | ||
558 | tm->move.dst_slot = dst_slot; | ||
559 | tm->move.nr_items = nr_items; | ||
560 | tm->op = MOD_LOG_MOVE_KEYS; | ||
561 | |||
562 | return __tree_mod_log_insert(fs_info, tm); | ||
563 | } | ||
564 | |||
565 | static noinline int | ||
566 | tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | ||
567 | struct extent_buffer *old_root, | ||
568 | struct extent_buffer *new_root, gfp_t flags) | ||
569 | { | ||
570 | struct tree_mod_elem *tm; | ||
571 | int ret; | ||
572 | |||
573 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
574 | if (ret <= 0) | ||
575 | return ret; | ||
576 | |||
577 | tm->index = new_root->start >> PAGE_CACHE_SHIFT; | ||
578 | tm->old_root.logical = old_root->start; | ||
579 | tm->old_root.level = btrfs_header_level(old_root); | ||
580 | tm->generation = btrfs_header_generation(old_root); | ||
581 | tm->op = MOD_LOG_ROOT_REPLACE; | ||
582 | |||
583 | return __tree_mod_log_insert(fs_info, tm); | ||
584 | } | ||
585 | |||
586 | static struct tree_mod_elem * | ||
587 | __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, | ||
588 | int smallest) | ||
589 | { | ||
590 | struct rb_root *tm_root; | ||
591 | struct rb_node *node; | ||
592 | struct tree_mod_elem *cur = NULL; | ||
593 | struct tree_mod_elem *found = NULL; | ||
594 | u64 index = start >> PAGE_CACHE_SHIFT; | ||
595 | |||
596 | read_lock(&fs_info->tree_mod_log_lock); | ||
597 | tm_root = &fs_info->tree_mod_log; | ||
598 | node = tm_root->rb_node; | ||
599 | while (node) { | ||
600 | cur = container_of(node, struct tree_mod_elem, node); | ||
601 | if (cur->index < index) { | ||
602 | node = node->rb_left; | ||
603 | } else if (cur->index > index) { | ||
604 | node = node->rb_right; | ||
605 | } else if (cur->elem.seq < min_seq) { | ||
606 | node = node->rb_left; | ||
607 | } else if (!smallest) { | ||
608 | /* we want the node with the highest seq */ | ||
609 | if (found) | ||
610 | BUG_ON(found->elem.seq > cur->elem.seq); | ||
611 | found = cur; | ||
612 | node = node->rb_left; | ||
613 | } else if (cur->elem.seq > min_seq) { | ||
614 | /* we want the node with the smallest seq */ | ||
615 | if (found) | ||
616 | BUG_ON(found->elem.seq < cur->elem.seq); | ||
617 | found = cur; | ||
618 | node = node->rb_right; | ||
619 | } else { | ||
620 | found = cur; | ||
621 | break; | ||
622 | } | ||
623 | } | ||
624 | read_unlock(&fs_info->tree_mod_log_lock); | ||
625 | |||
626 | return found; | ||
627 | } | ||
628 | |||
629 | /* | ||
630 | * this returns the element from the log with the smallest time sequence | ||
631 | * value that's in the log (the oldest log item). any element with a time | ||
632 | * sequence lower than min_seq will be ignored. | ||
633 | */ | ||
634 | static struct tree_mod_elem * | ||
635 | tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start, | ||
636 | u64 min_seq) | ||
637 | { | ||
638 | return __tree_mod_log_search(fs_info, start, min_seq, 1); | ||
639 | } | ||
640 | |||
641 | /* | ||
642 | * this returns the element from the log with the largest time sequence | ||
643 | * value that's in the log (the most recent log item). any element with | ||
644 | * a time sequence lower than min_seq will be ignored. | ||
645 | */ | ||
646 | static struct tree_mod_elem * | ||
647 | tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) | ||
648 | { | ||
649 | return __tree_mod_log_search(fs_info, start, min_seq, 0); | ||
650 | } | ||
651 | |||
652 | static inline void | ||
653 | tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | ||
654 | struct extent_buffer *src, unsigned long dst_offset, | ||
655 | unsigned long src_offset, int nr_items) | ||
656 | { | ||
657 | int ret; | ||
658 | int i; | ||
659 | |||
660 | if (tree_mod_dont_log(fs_info, NULL)) | ||
661 | return; | ||
662 | |||
663 | if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) | ||
664 | return; | ||
665 | |||
666 | /* speed this up by single seq for all operations? */ | ||
667 | for (i = 0; i < nr_items; i++) { | ||
668 | ret = tree_mod_log_insert_key(fs_info, src, i + src_offset, | ||
669 | MOD_LOG_KEY_REMOVE); | ||
670 | BUG_ON(ret < 0); | ||
671 | ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset, | ||
672 | MOD_LOG_KEY_ADD); | ||
673 | BUG_ON(ret < 0); | ||
674 | } | ||
675 | } | ||
676 | |||
677 | static inline void | ||
678 | tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | ||
679 | int dst_offset, int src_offset, int nr_items) | ||
680 | { | ||
681 | int ret; | ||
682 | ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset, | ||
683 | nr_items, GFP_NOFS); | ||
684 | BUG_ON(ret < 0); | ||
685 | } | ||
686 | |||
687 | static inline void | ||
688 | tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, | ||
689 | struct extent_buffer *eb, | ||
690 | struct btrfs_disk_key *disk_key, int slot, int atomic) | ||
691 | { | ||
692 | int ret; | ||
693 | |||
694 | ret = tree_mod_log_insert_key_mask(fs_info, eb, slot, | ||
695 | MOD_LOG_KEY_REPLACE, | ||
696 | atomic ? GFP_ATOMIC : GFP_NOFS); | ||
697 | BUG_ON(ret < 0); | ||
698 | } | ||
699 | |||
700 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, | ||
701 | struct extent_buffer *eb) | ||
702 | { | ||
703 | int i; | ||
704 | int ret; | ||
705 | u32 nritems; | ||
706 | |||
707 | if (tree_mod_dont_log(fs_info, eb)) | ||
708 | return; | ||
709 | |||
710 | nritems = btrfs_header_nritems(eb); | ||
711 | for (i = nritems - 1; i >= 0; i--) { | ||
712 | ret = tree_mod_log_insert_key(fs_info, eb, i, | ||
713 | MOD_LOG_KEY_REMOVE_WHILE_FREEING); | ||
714 | BUG_ON(ret < 0); | ||
715 | } | ||
716 | } | ||
717 | |||
718 | static inline void | ||
719 | tree_mod_log_set_root_pointer(struct btrfs_root *root, | ||
720 | struct extent_buffer *new_root_node) | ||
721 | { | ||
722 | int ret; | ||
723 | tree_mod_log_free_eb(root->fs_info, root->node); | ||
724 | ret = tree_mod_log_insert_root(root->fs_info, root->node, | ||
725 | new_root_node, GFP_NOFS); | ||
726 | BUG_ON(ret < 0); | ||
727 | } | ||
728 | |||
291 | /* | 729 | /* |
292 | * check if the tree block can be shared by multiple trees | 730 | * check if the tree block can be shared by multiple trees |
293 | */ | 731 | */ |
@@ -409,6 +847,12 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
409 | ret = btrfs_dec_ref(trans, root, buf, 1, 1); | 847 | ret = btrfs_dec_ref(trans, root, buf, 1, 1); |
410 | BUG_ON(ret); /* -ENOMEM */ | 848 | BUG_ON(ret); /* -ENOMEM */ |
411 | } | 849 | } |
850 | /* | ||
851 | * don't log freeing in case we're freeing the root node, this | ||
852 | * is done by tree_mod_log_set_root_pointer later | ||
853 | */ | ||
854 | if (buf != root->node && btrfs_header_level(buf) != 0) | ||
855 | tree_mod_log_free_eb(root->fs_info, buf); | ||
412 | clean_tree_block(trans, root, buf); | 856 | clean_tree_block(trans, root, buf); |
413 | *last_ref = 1; | 857 | *last_ref = 1; |
414 | } | 858 | } |
@@ -467,7 +911,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
467 | 911 | ||
468 | cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, | 912 | cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, |
469 | root->root_key.objectid, &disk_key, | 913 | root->root_key.objectid, &disk_key, |
470 | level, search_start, empty_size, 1); | 914 | level, search_start, empty_size); |
471 | if (IS_ERR(cow)) | 915 | if (IS_ERR(cow)) |
472 | return PTR_ERR(cow); | 916 | return PTR_ERR(cow); |
473 | 917 | ||
@@ -506,10 +950,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
506 | parent_start = 0; | 950 | parent_start = 0; |
507 | 951 | ||
508 | extent_buffer_get(cow); | 952 | extent_buffer_get(cow); |
953 | tree_mod_log_set_root_pointer(root, cow); | ||
509 | rcu_assign_pointer(root->node, cow); | 954 | rcu_assign_pointer(root->node, cow); |
510 | 955 | ||
511 | btrfs_free_tree_block(trans, root, buf, parent_start, | 956 | btrfs_free_tree_block(trans, root, buf, parent_start, |
512 | last_ref, 1); | 957 | last_ref); |
513 | free_extent_buffer(buf); | 958 | free_extent_buffer(buf); |
514 | add_root_to_dirty_list(root); | 959 | add_root_to_dirty_list(root); |
515 | } else { | 960 | } else { |
@@ -519,13 +964,15 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
519 | parent_start = 0; | 964 | parent_start = 0; |
520 | 965 | ||
521 | WARN_ON(trans->transid != btrfs_header_generation(parent)); | 966 | WARN_ON(trans->transid != btrfs_header_generation(parent)); |
967 | tree_mod_log_insert_key(root->fs_info, parent, parent_slot, | ||
968 | MOD_LOG_KEY_REPLACE); | ||
522 | btrfs_set_node_blockptr(parent, parent_slot, | 969 | btrfs_set_node_blockptr(parent, parent_slot, |
523 | cow->start); | 970 | cow->start); |
524 | btrfs_set_node_ptr_generation(parent, parent_slot, | 971 | btrfs_set_node_ptr_generation(parent, parent_slot, |
525 | trans->transid); | 972 | trans->transid); |
526 | btrfs_mark_buffer_dirty(parent); | 973 | btrfs_mark_buffer_dirty(parent); |
527 | btrfs_free_tree_block(trans, root, buf, parent_start, | 974 | btrfs_free_tree_block(trans, root, buf, parent_start, |
528 | last_ref, 1); | 975 | last_ref); |
529 | } | 976 | } |
530 | if (unlock_orig) | 977 | if (unlock_orig) |
531 | btrfs_tree_unlock(buf); | 978 | btrfs_tree_unlock(buf); |
@@ -535,6 +982,210 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
535 | return 0; | 982 | return 0; |
536 | } | 983 | } |
537 | 984 | ||
985 | /* | ||
986 | * returns the logical address of the oldest predecessor of the given root. | ||
987 | * entries older than time_seq are ignored. | ||
988 | */ | ||
989 | static struct tree_mod_elem * | ||
990 | __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, | ||
991 | struct btrfs_root *root, u64 time_seq) | ||
992 | { | ||
993 | struct tree_mod_elem *tm; | ||
994 | struct tree_mod_elem *found = NULL; | ||
995 | u64 root_logical = root->node->start; | ||
996 | int looped = 0; | ||
997 | |||
998 | if (!time_seq) | ||
999 | return 0; | ||
1000 | |||
1001 | /* | ||
1002 | * the very last operation that's logged for a root is the replacement | ||
1003 | * operation (if it is replaced at all). this has the index of the *new* | ||
1004 | * root, making it the very first operation that's logged for this root. | ||
1005 | */ | ||
1006 | while (1) { | ||
1007 | tm = tree_mod_log_search_oldest(fs_info, root_logical, | ||
1008 | time_seq); | ||
1009 | if (!looped && !tm) | ||
1010 | return 0; | ||
1011 | /* | ||
1012 | * we must have key remove operations in the log before the | ||
1013 | * replace operation. | ||
1014 | */ | ||
1015 | BUG_ON(!tm); | ||
1016 | |||
1017 | if (tm->op != MOD_LOG_ROOT_REPLACE) | ||
1018 | break; | ||
1019 | |||
1020 | found = tm; | ||
1021 | root_logical = tm->old_root.logical; | ||
1022 | BUG_ON(root_logical == root->node->start); | ||
1023 | looped = 1; | ||
1024 | } | ||
1025 | |||
1026 | return found; | ||
1027 | } | ||
1028 | |||
1029 | /* | ||
1030 | * tm is a pointer to the first operation to rewind within eb. then, all | ||
1031 | * previous operations will be rewinded (until we reach something older than | ||
1032 | * time_seq). | ||
1033 | */ | ||
1034 | static void | ||
1035 | __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, | ||
1036 | struct tree_mod_elem *first_tm) | ||
1037 | { | ||
1038 | u32 n; | ||
1039 | struct rb_node *next; | ||
1040 | struct tree_mod_elem *tm = first_tm; | ||
1041 | unsigned long o_dst; | ||
1042 | unsigned long o_src; | ||
1043 | unsigned long p_size = sizeof(struct btrfs_key_ptr); | ||
1044 | |||
1045 | n = btrfs_header_nritems(eb); | ||
1046 | while (tm && tm->elem.seq >= time_seq) { | ||
1047 | /* | ||
1048 | * all the operations are recorded with the operator used for | ||
1049 | * the modification. as we're going backwards, we do the | ||
1050 | * opposite of each operation here. | ||
1051 | */ | ||
1052 | switch (tm->op) { | ||
1053 | case MOD_LOG_KEY_REMOVE_WHILE_FREEING: | ||
1054 | BUG_ON(tm->slot < n); | ||
1055 | case MOD_LOG_KEY_REMOVE_WHILE_MOVING: | ||
1056 | case MOD_LOG_KEY_REMOVE: | ||
1057 | btrfs_set_node_key(eb, &tm->key, tm->slot); | ||
1058 | btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); | ||
1059 | btrfs_set_node_ptr_generation(eb, tm->slot, | ||
1060 | tm->generation); | ||
1061 | n++; | ||
1062 | break; | ||
1063 | case MOD_LOG_KEY_REPLACE: | ||
1064 | BUG_ON(tm->slot >= n); | ||
1065 | btrfs_set_node_key(eb, &tm->key, tm->slot); | ||
1066 | btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); | ||
1067 | btrfs_set_node_ptr_generation(eb, tm->slot, | ||
1068 | tm->generation); | ||
1069 | break; | ||
1070 | case MOD_LOG_KEY_ADD: | ||
1071 | if (tm->slot != n - 1) { | ||
1072 | o_dst = btrfs_node_key_ptr_offset(tm->slot); | ||
1073 | o_src = btrfs_node_key_ptr_offset(tm->slot + 1); | ||
1074 | memmove_extent_buffer(eb, o_dst, o_src, p_size); | ||
1075 | } | ||
1076 | n--; | ||
1077 | break; | ||
1078 | case MOD_LOG_MOVE_KEYS: | ||
1079 | o_dst = btrfs_node_key_ptr_offset(tm->slot); | ||
1080 | o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot); | ||
1081 | memmove_extent_buffer(eb, o_dst, o_src, | ||
1082 | tm->move.nr_items * p_size); | ||
1083 | break; | ||
1084 | case MOD_LOG_ROOT_REPLACE: | ||
1085 | /* | ||
1086 | * this operation is special. for roots, this must be | ||
1087 | * handled explicitly before rewinding. | ||
1088 | * for non-roots, this operation may exist if the node | ||
1089 | * was a root: root A -> child B; then A gets empty and | ||
1090 | * B is promoted to the new root. in the mod log, we'll | ||
1091 | * have a root-replace operation for B, a tree block | ||
1092 | * that is no root. we simply ignore that operation. | ||
1093 | */ | ||
1094 | break; | ||
1095 | } | ||
1096 | next = rb_next(&tm->node); | ||
1097 | if (!next) | ||
1098 | break; | ||
1099 | tm = container_of(next, struct tree_mod_elem, node); | ||
1100 | if (tm->index != first_tm->index) | ||
1101 | break; | ||
1102 | } | ||
1103 | btrfs_set_header_nritems(eb, n); | ||
1104 | } | ||
1105 | |||
1106 | static struct extent_buffer * | ||
1107 | tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | ||
1108 | u64 time_seq) | ||
1109 | { | ||
1110 | struct extent_buffer *eb_rewin; | ||
1111 | struct tree_mod_elem *tm; | ||
1112 | |||
1113 | if (!time_seq) | ||
1114 | return eb; | ||
1115 | |||
1116 | if (btrfs_header_level(eb) == 0) | ||
1117 | return eb; | ||
1118 | |||
1119 | tm = tree_mod_log_search(fs_info, eb->start, time_seq); | ||
1120 | if (!tm) | ||
1121 | return eb; | ||
1122 | |||
1123 | if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { | ||
1124 | BUG_ON(tm->slot != 0); | ||
1125 | eb_rewin = alloc_dummy_extent_buffer(eb->start, | ||
1126 | fs_info->tree_root->nodesize); | ||
1127 | BUG_ON(!eb_rewin); | ||
1128 | btrfs_set_header_bytenr(eb_rewin, eb->start); | ||
1129 | btrfs_set_header_backref_rev(eb_rewin, | ||
1130 | btrfs_header_backref_rev(eb)); | ||
1131 | btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb)); | ||
1132 | btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); | ||
1133 | } else { | ||
1134 | eb_rewin = btrfs_clone_extent_buffer(eb); | ||
1135 | BUG_ON(!eb_rewin); | ||
1136 | } | ||
1137 | |||
1138 | extent_buffer_get(eb_rewin); | ||
1139 | free_extent_buffer(eb); | ||
1140 | |||
1141 | __tree_mod_log_rewind(eb_rewin, time_seq, tm); | ||
1142 | |||
1143 | return eb_rewin; | ||
1144 | } | ||
1145 | |||
1146 | static inline struct extent_buffer * | ||
1147 | get_old_root(struct btrfs_root *root, u64 time_seq) | ||
1148 | { | ||
1149 | struct tree_mod_elem *tm; | ||
1150 | struct extent_buffer *eb; | ||
1151 | struct tree_mod_root *old_root; | ||
1152 | u64 old_generation; | ||
1153 | |||
1154 | tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); | ||
1155 | if (!tm) | ||
1156 | return root->node; | ||
1157 | |||
1158 | old_root = &tm->old_root; | ||
1159 | old_generation = tm->generation; | ||
1160 | |||
1161 | tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); | ||
1162 | /* | ||
1163 | * there was an item in the log when __tree_mod_log_oldest_root | ||
1164 | * returned. this one must not go away, because the time_seq passed to | ||
1165 | * us must be blocking its removal. | ||
1166 | */ | ||
1167 | BUG_ON(!tm); | ||
1168 | |||
1169 | if (old_root->logical == root->node->start) { | ||
1170 | /* there are logged operations for the current root */ | ||
1171 | eb = btrfs_clone_extent_buffer(root->node); | ||
1172 | } else { | ||
1173 | /* there's a root replace operation for the current root */ | ||
1174 | eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, | ||
1175 | root->nodesize); | ||
1176 | btrfs_set_header_bytenr(eb, eb->start); | ||
1177 | btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); | ||
1178 | btrfs_set_header_owner(eb, root->root_key.objectid); | ||
1179 | } | ||
1180 | if (!eb) | ||
1181 | return NULL; | ||
1182 | btrfs_set_header_level(eb, old_root->level); | ||
1183 | btrfs_set_header_generation(eb, old_generation); | ||
1184 | __tree_mod_log_rewind(eb, time_seq, tm); | ||
1185 | |||
1186 | return eb; | ||
1187 | } | ||
1188 | |||
538 | static inline int should_cow_block(struct btrfs_trans_handle *trans, | 1189 | static inline int should_cow_block(struct btrfs_trans_handle *trans, |
539 | struct btrfs_root *root, | 1190 | struct btrfs_root *root, |
540 | struct extent_buffer *buf) | 1191 | struct extent_buffer *buf) |
@@ -739,7 +1390,11 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
739 | if (!cur) | 1390 | if (!cur) |
740 | return -EIO; | 1391 | return -EIO; |
741 | } else if (!uptodate) { | 1392 | } else if (!uptodate) { |
742 | btrfs_read_buffer(cur, gen); | 1393 | err = btrfs_read_buffer(cur, gen); |
1394 | if (err) { | ||
1395 | free_extent_buffer(cur); | ||
1396 | return err; | ||
1397 | } | ||
743 | } | 1398 | } |
744 | } | 1399 | } |
745 | if (search_start == 0) | 1400 | if (search_start == 0) |
@@ -854,20 +1509,18 @@ static noinline int generic_bin_search(struct extent_buffer *eb, | |||
854 | static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 1509 | static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
855 | int level, int *slot) | 1510 | int level, int *slot) |
856 | { | 1511 | { |
857 | if (level == 0) { | 1512 | if (level == 0) |
858 | return generic_bin_search(eb, | 1513 | return generic_bin_search(eb, |
859 | offsetof(struct btrfs_leaf, items), | 1514 | offsetof(struct btrfs_leaf, items), |
860 | sizeof(struct btrfs_item), | 1515 | sizeof(struct btrfs_item), |
861 | key, btrfs_header_nritems(eb), | 1516 | key, btrfs_header_nritems(eb), |
862 | slot); | 1517 | slot); |
863 | } else { | 1518 | else |
864 | return generic_bin_search(eb, | 1519 | return generic_bin_search(eb, |
865 | offsetof(struct btrfs_node, ptrs), | 1520 | offsetof(struct btrfs_node, ptrs), |
866 | sizeof(struct btrfs_key_ptr), | 1521 | sizeof(struct btrfs_key_ptr), |
867 | key, btrfs_header_nritems(eb), | 1522 | key, btrfs_header_nritems(eb), |
868 | slot); | 1523 | slot); |
869 | } | ||
870 | return -1; | ||
871 | } | 1524 | } |
872 | 1525 | ||
873 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 1526 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
@@ -974,6 +1627,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
974 | goto enospc; | 1627 | goto enospc; |
975 | } | 1628 | } |
976 | 1629 | ||
1630 | tree_mod_log_set_root_pointer(root, child); | ||
977 | rcu_assign_pointer(root->node, child); | 1631 | rcu_assign_pointer(root->node, child); |
978 | 1632 | ||
979 | add_root_to_dirty_list(root); | 1633 | add_root_to_dirty_list(root); |
@@ -987,7 +1641,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
987 | free_extent_buffer(mid); | 1641 | free_extent_buffer(mid); |
988 | 1642 | ||
989 | root_sub_used(root, mid->len); | 1643 | root_sub_used(root, mid->len); |
990 | btrfs_free_tree_block(trans, root, mid, 0, 1, 0); | 1644 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
991 | /* once for the root ptr */ | 1645 | /* once for the root ptr */ |
992 | free_extent_buffer_stale(mid); | 1646 | free_extent_buffer_stale(mid); |
993 | return 0; | 1647 | return 0; |
@@ -1040,14 +1694,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1040 | if (btrfs_header_nritems(right) == 0) { | 1694 | if (btrfs_header_nritems(right) == 0) { |
1041 | clean_tree_block(trans, root, right); | 1695 | clean_tree_block(trans, root, right); |
1042 | btrfs_tree_unlock(right); | 1696 | btrfs_tree_unlock(right); |
1043 | del_ptr(trans, root, path, level + 1, pslot + 1); | 1697 | del_ptr(trans, root, path, level + 1, pslot + 1, 1); |
1044 | root_sub_used(root, right->len); | 1698 | root_sub_used(root, right->len); |
1045 | btrfs_free_tree_block(trans, root, right, 0, 1, 0); | 1699 | btrfs_free_tree_block(trans, root, right, 0, 1); |
1046 | free_extent_buffer_stale(right); | 1700 | free_extent_buffer_stale(right); |
1047 | right = NULL; | 1701 | right = NULL; |
1048 | } else { | 1702 | } else { |
1049 | struct btrfs_disk_key right_key; | 1703 | struct btrfs_disk_key right_key; |
1050 | btrfs_node_key(right, &right_key, 0); | 1704 | btrfs_node_key(right, &right_key, 0); |
1705 | tree_mod_log_set_node_key(root->fs_info, parent, | ||
1706 | &right_key, pslot + 1, 0); | ||
1051 | btrfs_set_node_key(parent, &right_key, pslot + 1); | 1707 | btrfs_set_node_key(parent, &right_key, pslot + 1); |
1052 | btrfs_mark_buffer_dirty(parent); | 1708 | btrfs_mark_buffer_dirty(parent); |
1053 | } | 1709 | } |
@@ -1082,15 +1738,17 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1082 | if (btrfs_header_nritems(mid) == 0) { | 1738 | if (btrfs_header_nritems(mid) == 0) { |
1083 | clean_tree_block(trans, root, mid); | 1739 | clean_tree_block(trans, root, mid); |
1084 | btrfs_tree_unlock(mid); | 1740 | btrfs_tree_unlock(mid); |
1085 | del_ptr(trans, root, path, level + 1, pslot); | 1741 | del_ptr(trans, root, path, level + 1, pslot, 1); |
1086 | root_sub_used(root, mid->len); | 1742 | root_sub_used(root, mid->len); |
1087 | btrfs_free_tree_block(trans, root, mid, 0, 1, 0); | 1743 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
1088 | free_extent_buffer_stale(mid); | 1744 | free_extent_buffer_stale(mid); |
1089 | mid = NULL; | 1745 | mid = NULL; |
1090 | } else { | 1746 | } else { |
1091 | /* update the parent key to reflect our changes */ | 1747 | /* update the parent key to reflect our changes */ |
1092 | struct btrfs_disk_key mid_key; | 1748 | struct btrfs_disk_key mid_key; |
1093 | btrfs_node_key(mid, &mid_key, 0); | 1749 | btrfs_node_key(mid, &mid_key, 0); |
1750 | tree_mod_log_set_node_key(root->fs_info, parent, &mid_key, | ||
1751 | pslot, 0); | ||
1094 | btrfs_set_node_key(parent, &mid_key, pslot); | 1752 | btrfs_set_node_key(parent, &mid_key, pslot); |
1095 | btrfs_mark_buffer_dirty(parent); | 1753 | btrfs_mark_buffer_dirty(parent); |
1096 | } | 1754 | } |
@@ -1188,6 +1846,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1188 | struct btrfs_disk_key disk_key; | 1846 | struct btrfs_disk_key disk_key; |
1189 | orig_slot += left_nr; | 1847 | orig_slot += left_nr; |
1190 | btrfs_node_key(mid, &disk_key, 0); | 1848 | btrfs_node_key(mid, &disk_key, 0); |
1849 | tree_mod_log_set_node_key(root->fs_info, parent, | ||
1850 | &disk_key, pslot, 0); | ||
1191 | btrfs_set_node_key(parent, &disk_key, pslot); | 1851 | btrfs_set_node_key(parent, &disk_key, pslot); |
1192 | btrfs_mark_buffer_dirty(parent); | 1852 | btrfs_mark_buffer_dirty(parent); |
1193 | if (btrfs_header_nritems(left) > orig_slot) { | 1853 | if (btrfs_header_nritems(left) > orig_slot) { |
@@ -1239,6 +1899,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1239 | struct btrfs_disk_key disk_key; | 1899 | struct btrfs_disk_key disk_key; |
1240 | 1900 | ||
1241 | btrfs_node_key(right, &disk_key, 0); | 1901 | btrfs_node_key(right, &disk_key, 0); |
1902 | tree_mod_log_set_node_key(root->fs_info, parent, | ||
1903 | &disk_key, pslot + 1, 0); | ||
1242 | btrfs_set_node_key(parent, &disk_key, pslot + 1); | 1904 | btrfs_set_node_key(parent, &disk_key, pslot + 1); |
1243 | btrfs_mark_buffer_dirty(parent); | 1905 | btrfs_mark_buffer_dirty(parent); |
1244 | 1906 | ||
@@ -1496,7 +2158,7 @@ static int | |||
1496 | read_block_for_search(struct btrfs_trans_handle *trans, | 2158 | read_block_for_search(struct btrfs_trans_handle *trans, |
1497 | struct btrfs_root *root, struct btrfs_path *p, | 2159 | struct btrfs_root *root, struct btrfs_path *p, |
1498 | struct extent_buffer **eb_ret, int level, int slot, | 2160 | struct extent_buffer **eb_ret, int level, int slot, |
1499 | struct btrfs_key *key) | 2161 | struct btrfs_key *key, u64 time_seq) |
1500 | { | 2162 | { |
1501 | u64 blocknr; | 2163 | u64 blocknr; |
1502 | u64 gen; | 2164 | u64 gen; |
@@ -1850,7 +2512,7 @@ cow_done: | |||
1850 | } | 2512 | } |
1851 | 2513 | ||
1852 | err = read_block_for_search(trans, root, p, | 2514 | err = read_block_for_search(trans, root, p, |
1853 | &b, level, slot, key); | 2515 | &b, level, slot, key, 0); |
1854 | if (err == -EAGAIN) | 2516 | if (err == -EAGAIN) |
1855 | goto again; | 2517 | goto again; |
1856 | if (err) { | 2518 | if (err) { |
@@ -1922,6 +2584,115 @@ done: | |||
1922 | } | 2584 | } |
1923 | 2585 | ||
1924 | /* | 2586 | /* |
2587 | * Like btrfs_search_slot, this looks for a key in the given tree. It uses the | ||
2588 | * current state of the tree together with the operations recorded in the tree | ||
2589 | * modification log to search for the key in a previous version of this tree, as | ||
2590 | * denoted by the time_seq parameter. | ||
2591 | * | ||
2592 | * Naturally, there is no support for insert, delete or cow operations. | ||
2593 | * | ||
2594 | * The resulting path and return value will be set up as if we called | ||
2595 | * btrfs_search_slot at that point in time with ins_len and cow both set to 0. | ||
2596 | */ | ||
2597 | int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, | ||
2598 | struct btrfs_path *p, u64 time_seq) | ||
2599 | { | ||
2600 | struct extent_buffer *b; | ||
2601 | int slot; | ||
2602 | int ret; | ||
2603 | int err; | ||
2604 | int level; | ||
2605 | int lowest_unlock = 1; | ||
2606 | u8 lowest_level = 0; | ||
2607 | |||
2608 | lowest_level = p->lowest_level; | ||
2609 | WARN_ON(p->nodes[0] != NULL); | ||
2610 | |||
2611 | if (p->search_commit_root) { | ||
2612 | BUG_ON(time_seq); | ||
2613 | return btrfs_search_slot(NULL, root, key, p, 0, 0); | ||
2614 | } | ||
2615 | |||
2616 | again: | ||
2617 | b = get_old_root(root, time_seq); | ||
2618 | extent_buffer_get(b); | ||
2619 | level = btrfs_header_level(b); | ||
2620 | btrfs_tree_read_lock(b); | ||
2621 | p->locks[level] = BTRFS_READ_LOCK; | ||
2622 | |||
2623 | while (b) { | ||
2624 | level = btrfs_header_level(b); | ||
2625 | p->nodes[level] = b; | ||
2626 | btrfs_clear_path_blocking(p, NULL, 0); | ||
2627 | |||
2628 | /* | ||
2629 | * we have a lock on b and as long as we aren't changing | ||
2630 | * the tree, there is no way to for the items in b to change. | ||
2631 | * It is safe to drop the lock on our parent before we | ||
2632 | * go through the expensive btree search on b. | ||
2633 | */ | ||
2634 | btrfs_unlock_up_safe(p, level + 1); | ||
2635 | |||
2636 | ret = bin_search(b, key, level, &slot); | ||
2637 | |||
2638 | if (level != 0) { | ||
2639 | int dec = 0; | ||
2640 | if (ret && slot > 0) { | ||
2641 | dec = 1; | ||
2642 | slot -= 1; | ||
2643 | } | ||
2644 | p->slots[level] = slot; | ||
2645 | unlock_up(p, level, lowest_unlock, 0, NULL); | ||
2646 | |||
2647 | if (level == lowest_level) { | ||
2648 | if (dec) | ||
2649 | p->slots[level]++; | ||
2650 | goto done; | ||
2651 | } | ||
2652 | |||
2653 | err = read_block_for_search(NULL, root, p, &b, level, | ||
2654 | slot, key, time_seq); | ||
2655 | if (err == -EAGAIN) | ||
2656 | goto again; | ||
2657 | if (err) { | ||
2658 | ret = err; | ||
2659 | goto done; | ||
2660 | } | ||
2661 | |||
2662 | level = btrfs_header_level(b); | ||
2663 | err = btrfs_try_tree_read_lock(b); | ||
2664 | if (!err) { | ||
2665 | btrfs_set_path_blocking(p); | ||
2666 | btrfs_tree_read_lock(b); | ||
2667 | btrfs_clear_path_blocking(p, b, | ||
2668 | BTRFS_READ_LOCK); | ||
2669 | } | ||
2670 | p->locks[level] = BTRFS_READ_LOCK; | ||
2671 | p->nodes[level] = b; | ||
2672 | b = tree_mod_log_rewind(root->fs_info, b, time_seq); | ||
2673 | if (b != p->nodes[level]) { | ||
2674 | btrfs_tree_unlock_rw(p->nodes[level], | ||
2675 | p->locks[level]); | ||
2676 | p->locks[level] = 0; | ||
2677 | p->nodes[level] = b; | ||
2678 | } | ||
2679 | } else { | ||
2680 | p->slots[level] = slot; | ||
2681 | unlock_up(p, level, lowest_unlock, 0, NULL); | ||
2682 | goto done; | ||
2683 | } | ||
2684 | } | ||
2685 | ret = 1; | ||
2686 | done: | ||
2687 | if (!p->leave_spinning) | ||
2688 | btrfs_set_path_blocking(p); | ||
2689 | if (ret < 0) | ||
2690 | btrfs_release_path(p); | ||
2691 | |||
2692 | return ret; | ||
2693 | } | ||
2694 | |||
2695 | /* | ||
1925 | * adjust the pointers going up the tree, starting at level | 2696 | * adjust the pointers going up the tree, starting at level |
1926 | * making sure the right key of each node is points to 'key'. | 2697 | * making sure the right key of each node is points to 'key'. |
1927 | * This is used after shifting pointers to the left, so it stops | 2698 | * This is used after shifting pointers to the left, so it stops |
@@ -1941,6 +2712,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans, | |||
1941 | if (!path->nodes[i]) | 2712 | if (!path->nodes[i]) |
1942 | break; | 2713 | break; |
1943 | t = path->nodes[i]; | 2714 | t = path->nodes[i]; |
2715 | tree_mod_log_set_node_key(root->fs_info, t, key, tslot, 1); | ||
1944 | btrfs_set_node_key(t, key, tslot); | 2716 | btrfs_set_node_key(t, key, tslot); |
1945 | btrfs_mark_buffer_dirty(path->nodes[i]); | 2717 | btrfs_mark_buffer_dirty(path->nodes[i]); |
1946 | if (tslot != 0) | 2718 | if (tslot != 0) |
@@ -2023,12 +2795,16 @@ static int push_node_left(struct btrfs_trans_handle *trans, | |||
2023 | } else | 2795 | } else |
2024 | push_items = min(src_nritems - 8, push_items); | 2796 | push_items = min(src_nritems - 8, push_items); |
2025 | 2797 | ||
2798 | tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, | ||
2799 | push_items); | ||
2026 | copy_extent_buffer(dst, src, | 2800 | copy_extent_buffer(dst, src, |
2027 | btrfs_node_key_ptr_offset(dst_nritems), | 2801 | btrfs_node_key_ptr_offset(dst_nritems), |
2028 | btrfs_node_key_ptr_offset(0), | 2802 | btrfs_node_key_ptr_offset(0), |
2029 | push_items * sizeof(struct btrfs_key_ptr)); | 2803 | push_items * sizeof(struct btrfs_key_ptr)); |
2030 | 2804 | ||
2031 | if (push_items < src_nritems) { | 2805 | if (push_items < src_nritems) { |
2806 | tree_mod_log_eb_move(root->fs_info, src, 0, push_items, | ||
2807 | src_nritems - push_items); | ||
2032 | memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), | 2808 | memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), |
2033 | btrfs_node_key_ptr_offset(push_items), | 2809 | btrfs_node_key_ptr_offset(push_items), |
2034 | (src_nritems - push_items) * | 2810 | (src_nritems - push_items) * |
@@ -2082,11 +2858,14 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
2082 | if (max_push < push_items) | 2858 | if (max_push < push_items) |
2083 | push_items = max_push; | 2859 | push_items = max_push; |
2084 | 2860 | ||
2861 | tree_mod_log_eb_move(root->fs_info, dst, push_items, 0, dst_nritems); | ||
2085 | memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), | 2862 | memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), |
2086 | btrfs_node_key_ptr_offset(0), | 2863 | btrfs_node_key_ptr_offset(0), |
2087 | (dst_nritems) * | 2864 | (dst_nritems) * |
2088 | sizeof(struct btrfs_key_ptr)); | 2865 | sizeof(struct btrfs_key_ptr)); |
2089 | 2866 | ||
2867 | tree_mod_log_eb_copy(root->fs_info, dst, src, 0, | ||
2868 | src_nritems - push_items, push_items); | ||
2090 | copy_extent_buffer(dst, src, | 2869 | copy_extent_buffer(dst, src, |
2091 | btrfs_node_key_ptr_offset(0), | 2870 | btrfs_node_key_ptr_offset(0), |
2092 | btrfs_node_key_ptr_offset(src_nritems - push_items), | 2871 | btrfs_node_key_ptr_offset(src_nritems - push_items), |
@@ -2129,7 +2908,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2129 | 2908 | ||
2130 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 2909 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, |
2131 | root->root_key.objectid, &lower_key, | 2910 | root->root_key.objectid, &lower_key, |
2132 | level, root->node->start, 0, 0); | 2911 | level, root->node->start, 0); |
2133 | if (IS_ERR(c)) | 2912 | if (IS_ERR(c)) |
2134 | return PTR_ERR(c); | 2913 | return PTR_ERR(c); |
2135 | 2914 | ||
@@ -2161,6 +2940,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2161 | btrfs_mark_buffer_dirty(c); | 2940 | btrfs_mark_buffer_dirty(c); |
2162 | 2941 | ||
2163 | old = root->node; | 2942 | old = root->node; |
2943 | tree_mod_log_set_root_pointer(root, c); | ||
2164 | rcu_assign_pointer(root->node, c); | 2944 | rcu_assign_pointer(root->node, c); |
2165 | 2945 | ||
2166 | /* the super has an extra ref to root->node */ | 2946 | /* the super has an extra ref to root->node */ |
@@ -2184,10 +2964,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2184 | static void insert_ptr(struct btrfs_trans_handle *trans, | 2964 | static void insert_ptr(struct btrfs_trans_handle *trans, |
2185 | struct btrfs_root *root, struct btrfs_path *path, | 2965 | struct btrfs_root *root, struct btrfs_path *path, |
2186 | struct btrfs_disk_key *key, u64 bytenr, | 2966 | struct btrfs_disk_key *key, u64 bytenr, |
2187 | int slot, int level) | 2967 | int slot, int level, int tree_mod_log) |
2188 | { | 2968 | { |
2189 | struct extent_buffer *lower; | 2969 | struct extent_buffer *lower; |
2190 | int nritems; | 2970 | int nritems; |
2971 | int ret; | ||
2191 | 2972 | ||
2192 | BUG_ON(!path->nodes[level]); | 2973 | BUG_ON(!path->nodes[level]); |
2193 | btrfs_assert_tree_locked(path->nodes[level]); | 2974 | btrfs_assert_tree_locked(path->nodes[level]); |
@@ -2196,11 +2977,19 @@ static void insert_ptr(struct btrfs_trans_handle *trans, | |||
2196 | BUG_ON(slot > nritems); | 2977 | BUG_ON(slot > nritems); |
2197 | BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); | 2978 | BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); |
2198 | if (slot != nritems) { | 2979 | if (slot != nritems) { |
2980 | if (tree_mod_log && level) | ||
2981 | tree_mod_log_eb_move(root->fs_info, lower, slot + 1, | ||
2982 | slot, nritems - slot); | ||
2199 | memmove_extent_buffer(lower, | 2983 | memmove_extent_buffer(lower, |
2200 | btrfs_node_key_ptr_offset(slot + 1), | 2984 | btrfs_node_key_ptr_offset(slot + 1), |
2201 | btrfs_node_key_ptr_offset(slot), | 2985 | btrfs_node_key_ptr_offset(slot), |
2202 | (nritems - slot) * sizeof(struct btrfs_key_ptr)); | 2986 | (nritems - slot) * sizeof(struct btrfs_key_ptr)); |
2203 | } | 2987 | } |
2988 | if (tree_mod_log && level) { | ||
2989 | ret = tree_mod_log_insert_key(root->fs_info, lower, slot, | ||
2990 | MOD_LOG_KEY_ADD); | ||
2991 | BUG_ON(ret < 0); | ||
2992 | } | ||
2204 | btrfs_set_node_key(lower, key, slot); | 2993 | btrfs_set_node_key(lower, key, slot); |
2205 | btrfs_set_node_blockptr(lower, slot, bytenr); | 2994 | btrfs_set_node_blockptr(lower, slot, bytenr); |
2206 | WARN_ON(trans->transid == 0); | 2995 | WARN_ON(trans->transid == 0); |
@@ -2252,7 +3041,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2252 | 3041 | ||
2253 | split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 3042 | split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, |
2254 | root->root_key.objectid, | 3043 | root->root_key.objectid, |
2255 | &disk_key, level, c->start, 0, 0); | 3044 | &disk_key, level, c->start, 0); |
2256 | if (IS_ERR(split)) | 3045 | if (IS_ERR(split)) |
2257 | return PTR_ERR(split); | 3046 | return PTR_ERR(split); |
2258 | 3047 | ||
@@ -2271,7 +3060,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2271 | (unsigned long)btrfs_header_chunk_tree_uuid(split), | 3060 | (unsigned long)btrfs_header_chunk_tree_uuid(split), |
2272 | BTRFS_UUID_SIZE); | 3061 | BTRFS_UUID_SIZE); |
2273 | 3062 | ||
2274 | 3063 | tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); | |
2275 | copy_extent_buffer(split, c, | 3064 | copy_extent_buffer(split, c, |
2276 | btrfs_node_key_ptr_offset(0), | 3065 | btrfs_node_key_ptr_offset(0), |
2277 | btrfs_node_key_ptr_offset(mid), | 3066 | btrfs_node_key_ptr_offset(mid), |
@@ -2284,7 +3073,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2284 | btrfs_mark_buffer_dirty(split); | 3073 | btrfs_mark_buffer_dirty(split); |
2285 | 3074 | ||
2286 | insert_ptr(trans, root, path, &disk_key, split->start, | 3075 | insert_ptr(trans, root, path, &disk_key, split->start, |
2287 | path->slots[level + 1] + 1, level + 1); | 3076 | path->slots[level + 1] + 1, level + 1, 1); |
2288 | 3077 | ||
2289 | if (path->slots[level] >= mid) { | 3078 | if (path->slots[level] >= mid) { |
2290 | path->slots[level] -= mid; | 3079 | path->slots[level] -= mid; |
@@ -2821,7 +3610,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, | |||
2821 | btrfs_set_header_nritems(l, mid); | 3610 | btrfs_set_header_nritems(l, mid); |
2822 | btrfs_item_key(right, &disk_key, 0); | 3611 | btrfs_item_key(right, &disk_key, 0); |
2823 | insert_ptr(trans, root, path, &disk_key, right->start, | 3612 | insert_ptr(trans, root, path, &disk_key, right->start, |
2824 | path->slots[1] + 1, 1); | 3613 | path->slots[1] + 1, 1, 0); |
2825 | 3614 | ||
2826 | btrfs_mark_buffer_dirty(right); | 3615 | btrfs_mark_buffer_dirty(right); |
2827 | btrfs_mark_buffer_dirty(l); | 3616 | btrfs_mark_buffer_dirty(l); |
@@ -3004,7 +3793,7 @@ again: | |||
3004 | 3793 | ||
3005 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 3794 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
3006 | root->root_key.objectid, | 3795 | root->root_key.objectid, |
3007 | &disk_key, 0, l->start, 0, 0); | 3796 | &disk_key, 0, l->start, 0); |
3008 | if (IS_ERR(right)) | 3797 | if (IS_ERR(right)) |
3009 | return PTR_ERR(right); | 3798 | return PTR_ERR(right); |
3010 | 3799 | ||
@@ -3028,7 +3817,7 @@ again: | |||
3028 | if (mid <= slot) { | 3817 | if (mid <= slot) { |
3029 | btrfs_set_header_nritems(right, 0); | 3818 | btrfs_set_header_nritems(right, 0); |
3030 | insert_ptr(trans, root, path, &disk_key, right->start, | 3819 | insert_ptr(trans, root, path, &disk_key, right->start, |
3031 | path->slots[1] + 1, 1); | 3820 | path->slots[1] + 1, 1, 0); |
3032 | btrfs_tree_unlock(path->nodes[0]); | 3821 | btrfs_tree_unlock(path->nodes[0]); |
3033 | free_extent_buffer(path->nodes[0]); | 3822 | free_extent_buffer(path->nodes[0]); |
3034 | path->nodes[0] = right; | 3823 | path->nodes[0] = right; |
@@ -3037,7 +3826,7 @@ again: | |||
3037 | } else { | 3826 | } else { |
3038 | btrfs_set_header_nritems(right, 0); | 3827 | btrfs_set_header_nritems(right, 0); |
3039 | insert_ptr(trans, root, path, &disk_key, right->start, | 3828 | insert_ptr(trans, root, path, &disk_key, right->start, |
3040 | path->slots[1], 1); | 3829 | path->slots[1], 1, 0); |
3041 | btrfs_tree_unlock(path->nodes[0]); | 3830 | btrfs_tree_unlock(path->nodes[0]); |
3042 | free_extent_buffer(path->nodes[0]); | 3831 | free_extent_buffer(path->nodes[0]); |
3043 | path->nodes[0] = right; | 3832 | path->nodes[0] = right; |
@@ -3749,19 +4538,29 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3749 | * empty a node. | 4538 | * empty a node. |
3750 | */ | 4539 | */ |
3751 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 4540 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
3752 | struct btrfs_path *path, int level, int slot) | 4541 | struct btrfs_path *path, int level, int slot, |
4542 | int tree_mod_log) | ||
3753 | { | 4543 | { |
3754 | struct extent_buffer *parent = path->nodes[level]; | 4544 | struct extent_buffer *parent = path->nodes[level]; |
3755 | u32 nritems; | 4545 | u32 nritems; |
4546 | int ret; | ||
3756 | 4547 | ||
3757 | nritems = btrfs_header_nritems(parent); | 4548 | nritems = btrfs_header_nritems(parent); |
3758 | if (slot != nritems - 1) { | 4549 | if (slot != nritems - 1) { |
4550 | if (tree_mod_log && level) | ||
4551 | tree_mod_log_eb_move(root->fs_info, parent, slot, | ||
4552 | slot + 1, nritems - slot - 1); | ||
3759 | memmove_extent_buffer(parent, | 4553 | memmove_extent_buffer(parent, |
3760 | btrfs_node_key_ptr_offset(slot), | 4554 | btrfs_node_key_ptr_offset(slot), |
3761 | btrfs_node_key_ptr_offset(slot + 1), | 4555 | btrfs_node_key_ptr_offset(slot + 1), |
3762 | sizeof(struct btrfs_key_ptr) * | 4556 | sizeof(struct btrfs_key_ptr) * |
3763 | (nritems - slot - 1)); | 4557 | (nritems - slot - 1)); |
4558 | } else if (tree_mod_log && level) { | ||
4559 | ret = tree_mod_log_insert_key(root->fs_info, parent, slot, | ||
4560 | MOD_LOG_KEY_REMOVE); | ||
4561 | BUG_ON(ret < 0); | ||
3764 | } | 4562 | } |
4563 | |||
3765 | nritems--; | 4564 | nritems--; |
3766 | btrfs_set_header_nritems(parent, nritems); | 4565 | btrfs_set_header_nritems(parent, nritems); |
3767 | if (nritems == 0 && parent == root->node) { | 4566 | if (nritems == 0 && parent == root->node) { |
@@ -3793,7 +4592,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
3793 | struct extent_buffer *leaf) | 4592 | struct extent_buffer *leaf) |
3794 | { | 4593 | { |
3795 | WARN_ON(btrfs_header_generation(leaf) != trans->transid); | 4594 | WARN_ON(btrfs_header_generation(leaf) != trans->transid); |
3796 | del_ptr(trans, root, path, 1, path->slots[1]); | 4595 | del_ptr(trans, root, path, 1, path->slots[1], 1); |
3797 | 4596 | ||
3798 | /* | 4597 | /* |
3799 | * btrfs_free_extent is expensive, we want to make sure we | 4598 | * btrfs_free_extent is expensive, we want to make sure we |
@@ -3804,7 +4603,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
3804 | root_sub_used(root, leaf->len); | 4603 | root_sub_used(root, leaf->len); |
3805 | 4604 | ||
3806 | extent_buffer_get(leaf); | 4605 | extent_buffer_get(leaf); |
3807 | btrfs_free_tree_block(trans, root, leaf, 0, 1, 0); | 4606 | btrfs_free_tree_block(trans, root, leaf, 0, 1); |
3808 | free_extent_buffer_stale(leaf); | 4607 | free_extent_buffer_stale(leaf); |
3809 | } | 4608 | } |
3810 | /* | 4609 | /* |
@@ -4271,7 +5070,7 @@ again: | |||
4271 | next = c; | 5070 | next = c; |
4272 | next_rw_lock = path->locks[level]; | 5071 | next_rw_lock = path->locks[level]; |
4273 | ret = read_block_for_search(NULL, root, path, &next, level, | 5072 | ret = read_block_for_search(NULL, root, path, &next, level, |
4274 | slot, &key); | 5073 | slot, &key, 0); |
4275 | if (ret == -EAGAIN) | 5074 | if (ret == -EAGAIN) |
4276 | goto again; | 5075 | goto again; |
4277 | 5076 | ||
@@ -4308,7 +5107,7 @@ again: | |||
4308 | break; | 5107 | break; |
4309 | 5108 | ||
4310 | ret = read_block_for_search(NULL, root, path, &next, level, | 5109 | ret = read_block_for_search(NULL, root, path, &next, level, |
4311 | 0, &key); | 5110 | 0, &key, 0); |
4312 | if (ret == -EAGAIN) | 5111 | if (ret == -EAGAIN) |
4313 | goto again; | 5112 | goto again; |
4314 | 5113 | ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8fd72331d600..0236d03c6732 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -173,6 +173,9 @@ static int btrfs_csum_sizes[] = { 4, 0 }; | |||
173 | #define BTRFS_FT_XATTR 8 | 173 | #define BTRFS_FT_XATTR 8 |
174 | #define BTRFS_FT_MAX 9 | 174 | #define BTRFS_FT_MAX 9 |
175 | 175 | ||
176 | /* ioprio of readahead is set to idle */ | ||
177 | #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) | ||
178 | |||
176 | /* | 179 | /* |
177 | * The key defines the order in the tree, and so it also defines (optimal) | 180 | * The key defines the order in the tree, and so it also defines (optimal) |
178 | * block layout. | 181 | * block layout. |
@@ -823,6 +826,14 @@ struct btrfs_csum_item { | |||
823 | u8 csum; | 826 | u8 csum; |
824 | } __attribute__ ((__packed__)); | 827 | } __attribute__ ((__packed__)); |
825 | 828 | ||
829 | struct btrfs_dev_stats_item { | ||
830 | /* | ||
831 | * grow this item struct at the end for future enhancements and keep | ||
832 | * the existing values unchanged | ||
833 | */ | ||
834 | __le64 values[BTRFS_DEV_STAT_VALUES_MAX]; | ||
835 | } __attribute__ ((__packed__)); | ||
836 | |||
826 | /* different types of block groups (and chunks) */ | 837 | /* different types of block groups (and chunks) */ |
827 | #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) | 838 | #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) |
828 | #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) | 839 | #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) |
@@ -1129,6 +1140,15 @@ struct btrfs_fs_info { | |||
1129 | spinlock_t delayed_iput_lock; | 1140 | spinlock_t delayed_iput_lock; |
1130 | struct list_head delayed_iputs; | 1141 | struct list_head delayed_iputs; |
1131 | 1142 | ||
1143 | /* this protects tree_mod_seq_list */ | ||
1144 | spinlock_t tree_mod_seq_lock; | ||
1145 | atomic_t tree_mod_seq; | ||
1146 | struct list_head tree_mod_seq_list; | ||
1147 | |||
1148 | /* this protects tree_mod_log */ | ||
1149 | rwlock_t tree_mod_log_lock; | ||
1150 | struct rb_root tree_mod_log; | ||
1151 | |||
1132 | atomic_t nr_async_submits; | 1152 | atomic_t nr_async_submits; |
1133 | atomic_t async_submit_draining; | 1153 | atomic_t async_submit_draining; |
1134 | atomic_t nr_async_bios; | 1154 | atomic_t nr_async_bios; |
@@ -1375,7 +1395,7 @@ struct btrfs_root { | |||
1375 | struct list_head root_list; | 1395 | struct list_head root_list; |
1376 | 1396 | ||
1377 | spinlock_t orphan_lock; | 1397 | spinlock_t orphan_lock; |
1378 | struct list_head orphan_list; | 1398 | atomic_t orphan_inodes; |
1379 | struct btrfs_block_rsv *orphan_block_rsv; | 1399 | struct btrfs_block_rsv *orphan_block_rsv; |
1380 | int orphan_item_inserted; | 1400 | int orphan_item_inserted; |
1381 | int orphan_cleanup_state; | 1401 | int orphan_cleanup_state; |
@@ -1508,6 +1528,12 @@ struct btrfs_ioctl_defrag_range_args { | |||
1508 | #define BTRFS_BALANCE_ITEM_KEY 248 | 1528 | #define BTRFS_BALANCE_ITEM_KEY 248 |
1509 | 1529 | ||
1510 | /* | 1530 | /* |
1531 | * Persistantly stores the io stats in the device tree. | ||
1532 | * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid). | ||
1533 | */ | ||
1534 | #define BTRFS_DEV_STATS_KEY 249 | ||
1535 | |||
1536 | /* | ||
1511 | * string items are for debugging. They just store a short string of | 1537 | * string items are for debugging. They just store a short string of |
1512 | * data in the FS | 1538 | * data in the FS |
1513 | */ | 1539 | */ |
@@ -2415,6 +2441,30 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, | |||
2415 | return btrfs_item_size(eb, e) - offset; | 2441 | return btrfs_item_size(eb, e) - offset; |
2416 | } | 2442 | } |
2417 | 2443 | ||
2444 | /* btrfs_dev_stats_item */ | ||
2445 | static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb, | ||
2446 | struct btrfs_dev_stats_item *ptr, | ||
2447 | int index) | ||
2448 | { | ||
2449 | u64 val; | ||
2450 | |||
2451 | read_extent_buffer(eb, &val, | ||
2452 | offsetof(struct btrfs_dev_stats_item, values) + | ||
2453 | ((unsigned long)ptr) + (index * sizeof(u64)), | ||
2454 | sizeof(val)); | ||
2455 | return val; | ||
2456 | } | ||
2457 | |||
2458 | static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb, | ||
2459 | struct btrfs_dev_stats_item *ptr, | ||
2460 | int index, u64 val) | ||
2461 | { | ||
2462 | write_extent_buffer(eb, &val, | ||
2463 | offsetof(struct btrfs_dev_stats_item, values) + | ||
2464 | ((unsigned long)ptr) + (index * sizeof(u64)), | ||
2465 | sizeof(val)); | ||
2466 | } | ||
2467 | |||
2418 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) | 2468 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) |
2419 | { | 2469 | { |
2420 | return sb->s_fs_info; | 2470 | return sb->s_fs_info; |
@@ -2496,11 +2546,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
2496 | struct btrfs_root *root, u32 blocksize, | 2546 | struct btrfs_root *root, u32 blocksize, |
2497 | u64 parent, u64 root_objectid, | 2547 | u64 parent, u64 root_objectid, |
2498 | struct btrfs_disk_key *key, int level, | 2548 | struct btrfs_disk_key *key, int level, |
2499 | u64 hint, u64 empty_size, int for_cow); | 2549 | u64 hint, u64 empty_size); |
2500 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 2550 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
2501 | struct btrfs_root *root, | 2551 | struct btrfs_root *root, |
2502 | struct extent_buffer *buf, | 2552 | struct extent_buffer *buf, |
2503 | u64 parent, int last_ref, int for_cow); | 2553 | u64 parent, int last_ref); |
2504 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 2554 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
2505 | struct btrfs_root *root, | 2555 | struct btrfs_root *root, |
2506 | u64 bytenr, u32 blocksize, | 2556 | u64 bytenr, u32 blocksize, |
@@ -2659,6 +2709,8 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans, | |||
2659 | int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | 2709 | int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root |
2660 | *root, struct btrfs_key *key, struct btrfs_path *p, int | 2710 | *root, struct btrfs_key *key, struct btrfs_path *p, int |
2661 | ins_len, int cow); | 2711 | ins_len, int cow); |
2712 | int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, | ||
2713 | struct btrfs_path *p, u64 time_seq); | ||
2662 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, | 2714 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, |
2663 | struct btrfs_root *root, struct extent_buffer *parent, | 2715 | struct btrfs_root *root, struct extent_buffer *parent, |
2664 | int start_slot, int cache_only, u64 *last_ret, | 2716 | int start_slot, int cache_only, u64 *last_ret, |
@@ -2922,7 +2974,6 @@ int btrfs_readpage(struct file *file, struct page *page); | |||
2922 | void btrfs_evict_inode(struct inode *inode); | 2974 | void btrfs_evict_inode(struct inode *inode); |
2923 | int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); | 2975 | int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); |
2924 | int btrfs_dirty_inode(struct inode *inode); | 2976 | int btrfs_dirty_inode(struct inode *inode); |
2925 | int btrfs_update_time(struct file *file); | ||
2926 | struct inode *btrfs_alloc_inode(struct super_block *sb); | 2977 | struct inode *btrfs_alloc_inode(struct super_block *sb); |
2927 | void btrfs_destroy_inode(struct inode *inode); | 2978 | void btrfs_destroy_inode(struct inode *inode); |
2928 | int btrfs_drop_inode(struct inode *inode); | 2979 | int btrfs_drop_inode(struct inode *inode); |
@@ -3098,4 +3149,23 @@ void btrfs_reada_detach(void *handle); | |||
3098 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, | 3149 | int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, |
3099 | u64 start, int err); | 3150 | u64 start, int err); |
3100 | 3151 | ||
3152 | /* delayed seq elem */ | ||
3153 | struct seq_list { | ||
3154 | struct list_head list; | ||
3155 | u64 seq; | ||
3156 | u32 flags; | ||
3157 | }; | ||
3158 | |||
3159 | void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
3160 | struct seq_list *elem); | ||
3161 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
3162 | struct seq_list *elem); | ||
3163 | |||
3164 | static inline int is_fstree(u64 rootid) | ||
3165 | { | ||
3166 | if (rootid == BTRFS_FS_TREE_OBJECTID || | ||
3167 | (s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) | ||
3168 | return 1; | ||
3169 | return 0; | ||
3170 | } | ||
3101 | #endif | 3171 | #endif |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 03e3748d84d0..c18d0442ae6d 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -669,8 +669,8 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
669 | return ret; | 669 | return ret; |
670 | } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { | 670 | } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { |
671 | spin_lock(&BTRFS_I(inode)->lock); | 671 | spin_lock(&BTRFS_I(inode)->lock); |
672 | if (BTRFS_I(inode)->delalloc_meta_reserved) { | 672 | if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
673 | BTRFS_I(inode)->delalloc_meta_reserved = 0; | 673 | &BTRFS_I(inode)->runtime_flags)) { |
674 | spin_unlock(&BTRFS_I(inode)->lock); | 674 | spin_unlock(&BTRFS_I(inode)->lock); |
675 | release = true; | 675 | release = true; |
676 | goto migrate; | 676 | goto migrate; |
@@ -1706,7 +1706,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, | |||
1706 | btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode)); | 1706 | btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode)); |
1707 | btrfs_set_stack_inode_generation(inode_item, | 1707 | btrfs_set_stack_inode_generation(inode_item, |
1708 | BTRFS_I(inode)->generation); | 1708 | BTRFS_I(inode)->generation); |
1709 | btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence); | 1709 | btrfs_set_stack_inode_sequence(inode_item, inode->i_version); |
1710 | btrfs_set_stack_inode_transid(inode_item, trans->transid); | 1710 | btrfs_set_stack_inode_transid(inode_item, trans->transid); |
1711 | btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); | 1711 | btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); |
1712 | btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); | 1712 | btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); |
@@ -1754,7 +1754,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) | |||
1754 | set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); | 1754 | set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); |
1755 | inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); | 1755 | inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); |
1756 | BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); | 1756 | BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); |
1757 | BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item); | 1757 | inode->i_version = btrfs_stack_inode_sequence(inode_item); |
1758 | inode->i_rdev = 0; | 1758 | inode->i_rdev = 0; |
1759 | *rdev = btrfs_stack_inode_rdev(inode_item); | 1759 | *rdev = btrfs_stack_inode_rdev(inode_item); |
1760 | BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); | 1760 | BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 69f22e3ab3bc..13ae7b04790e 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -525,7 +525,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
525 | ref->is_head = 0; | 525 | ref->is_head = 0; |
526 | ref->in_tree = 1; | 526 | ref->in_tree = 1; |
527 | 527 | ||
528 | if (need_ref_seq(for_cow, ref_root)) | 528 | if (is_fstree(ref_root)) |
529 | seq = inc_delayed_seq(delayed_refs); | 529 | seq = inc_delayed_seq(delayed_refs); |
530 | ref->seq = seq; | 530 | ref->seq = seq; |
531 | 531 | ||
@@ -584,7 +584,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
584 | ref->is_head = 0; | 584 | ref->is_head = 0; |
585 | ref->in_tree = 1; | 585 | ref->in_tree = 1; |
586 | 586 | ||
587 | if (need_ref_seq(for_cow, ref_root)) | 587 | if (is_fstree(ref_root)) |
588 | seq = inc_delayed_seq(delayed_refs); | 588 | seq = inc_delayed_seq(delayed_refs); |
589 | ref->seq = seq; | 589 | ref->seq = seq; |
590 | 590 | ||
@@ -658,10 +658,11 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
658 | add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, | 658 | add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, |
659 | num_bytes, parent, ref_root, level, action, | 659 | num_bytes, parent, ref_root, level, action, |
660 | for_cow); | 660 | for_cow); |
661 | if (!need_ref_seq(for_cow, ref_root) && | 661 | if (!is_fstree(ref_root) && |
662 | waitqueue_active(&delayed_refs->seq_wait)) | 662 | waitqueue_active(&delayed_refs->seq_wait)) |
663 | wake_up(&delayed_refs->seq_wait); | 663 | wake_up(&delayed_refs->seq_wait); |
664 | spin_unlock(&delayed_refs->lock); | 664 | spin_unlock(&delayed_refs->lock); |
665 | |||
665 | return 0; | 666 | return 0; |
666 | } | 667 | } |
667 | 668 | ||
@@ -706,10 +707,11 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
706 | add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, | 707 | add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, |
707 | num_bytes, parent, ref_root, owner, offset, | 708 | num_bytes, parent, ref_root, owner, offset, |
708 | action, for_cow); | 709 | action, for_cow); |
709 | if (!need_ref_seq(for_cow, ref_root) && | 710 | if (!is_fstree(ref_root) && |
710 | waitqueue_active(&delayed_refs->seq_wait)) | 711 | waitqueue_active(&delayed_refs->seq_wait)) |
711 | wake_up(&delayed_refs->seq_wait); | 712 | wake_up(&delayed_refs->seq_wait); |
712 | spin_unlock(&delayed_refs->lock); | 713 | spin_unlock(&delayed_refs->lock); |
714 | |||
713 | return 0; | 715 | return 0; |
714 | } | 716 | } |
715 | 717 | ||
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index d8f244d94925..413927fb9957 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -195,11 +195,6 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | |||
195 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | 195 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, |
196 | struct list_head *cluster, u64 search_start); | 196 | struct list_head *cluster, u64 search_start); |
197 | 197 | ||
198 | struct seq_list { | ||
199 | struct list_head list; | ||
200 | u64 seq; | ||
201 | }; | ||
202 | |||
203 | static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) | 198 | static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs) |
204 | { | 199 | { |
205 | assert_spin_locked(&delayed_refs->lock); | 200 | assert_spin_locked(&delayed_refs->lock); |
@@ -230,25 +225,6 @@ int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs, | |||
230 | u64 seq); | 225 | u64 seq); |
231 | 226 | ||
232 | /* | 227 | /* |
233 | * delayed refs with a ref_seq > 0 must be held back during backref walking. | ||
234 | * this only applies to items in one of the fs-trees. for_cow items never need | ||
235 | * to be held back, so they won't get a ref_seq number. | ||
236 | */ | ||
237 | static inline int need_ref_seq(int for_cow, u64 rootid) | ||
238 | { | ||
239 | if (for_cow) | ||
240 | return 0; | ||
241 | |||
242 | if (rootid == BTRFS_FS_TREE_OBJECTID) | ||
243 | return 1; | ||
244 | |||
245 | if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) | ||
246 | return 1; | ||
247 | |||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * a node might live in a head or a regular ref, this lets you | 228 | * a node might live in a head or a regular ref, this lets you |
253 | * test for the proper type to use. | 229 | * test for the proper type to use. |
254 | */ | 230 | */ |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e1fe74a2ce16..7ae51decf6d3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1153,7 +1153,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1153 | root->orphan_block_rsv = NULL; | 1153 | root->orphan_block_rsv = NULL; |
1154 | 1154 | ||
1155 | INIT_LIST_HEAD(&root->dirty_list); | 1155 | INIT_LIST_HEAD(&root->dirty_list); |
1156 | INIT_LIST_HEAD(&root->orphan_list); | ||
1157 | INIT_LIST_HEAD(&root->root_list); | 1156 | INIT_LIST_HEAD(&root->root_list); |
1158 | spin_lock_init(&root->orphan_lock); | 1157 | spin_lock_init(&root->orphan_lock); |
1159 | spin_lock_init(&root->inode_lock); | 1158 | spin_lock_init(&root->inode_lock); |
@@ -1166,6 +1165,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1166 | atomic_set(&root->log_commit[0], 0); | 1165 | atomic_set(&root->log_commit[0], 0); |
1167 | atomic_set(&root->log_commit[1], 0); | 1166 | atomic_set(&root->log_commit[1], 0); |
1168 | atomic_set(&root->log_writers, 0); | 1167 | atomic_set(&root->log_writers, 0); |
1168 | atomic_set(&root->orphan_inodes, 0); | ||
1169 | root->log_batch = 0; | 1169 | root->log_batch = 0; |
1170 | root->log_transid = 0; | 1170 | root->log_transid = 0; |
1171 | root->last_log_commit = 0; | 1171 | root->last_log_commit = 0; |
@@ -1252,7 +1252,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | |||
1252 | 1252 | ||
1253 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 1253 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
1254 | BTRFS_TREE_LOG_OBJECTID, NULL, | 1254 | BTRFS_TREE_LOG_OBJECTID, NULL, |
1255 | 0, 0, 0, 0); | 1255 | 0, 0, 0); |
1256 | if (IS_ERR(leaf)) { | 1256 | if (IS_ERR(leaf)) { |
1257 | kfree(root); | 1257 | kfree(root); |
1258 | return ERR_CAST(leaf); | 1258 | return ERR_CAST(leaf); |
@@ -1914,11 +1914,14 @@ int open_ctree(struct super_block *sb, | |||
1914 | spin_lock_init(&fs_info->delayed_iput_lock); | 1914 | spin_lock_init(&fs_info->delayed_iput_lock); |
1915 | spin_lock_init(&fs_info->defrag_inodes_lock); | 1915 | spin_lock_init(&fs_info->defrag_inodes_lock); |
1916 | spin_lock_init(&fs_info->free_chunk_lock); | 1916 | spin_lock_init(&fs_info->free_chunk_lock); |
1917 | spin_lock_init(&fs_info->tree_mod_seq_lock); | ||
1918 | rwlock_init(&fs_info->tree_mod_log_lock); | ||
1917 | mutex_init(&fs_info->reloc_mutex); | 1919 | mutex_init(&fs_info->reloc_mutex); |
1918 | 1920 | ||
1919 | init_completion(&fs_info->kobj_unregister); | 1921 | init_completion(&fs_info->kobj_unregister); |
1920 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 1922 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
1921 | INIT_LIST_HEAD(&fs_info->space_info); | 1923 | INIT_LIST_HEAD(&fs_info->space_info); |
1924 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); | ||
1922 | btrfs_mapping_init(&fs_info->mapping_tree); | 1925 | btrfs_mapping_init(&fs_info->mapping_tree); |
1923 | btrfs_init_block_rsv(&fs_info->global_block_rsv); | 1926 | btrfs_init_block_rsv(&fs_info->global_block_rsv); |
1924 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); | 1927 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); |
@@ -1931,12 +1934,14 @@ int open_ctree(struct super_block *sb, | |||
1931 | atomic_set(&fs_info->async_submit_draining, 0); | 1934 | atomic_set(&fs_info->async_submit_draining, 0); |
1932 | atomic_set(&fs_info->nr_async_bios, 0); | 1935 | atomic_set(&fs_info->nr_async_bios, 0); |
1933 | atomic_set(&fs_info->defrag_running, 0); | 1936 | atomic_set(&fs_info->defrag_running, 0); |
1937 | atomic_set(&fs_info->tree_mod_seq, 0); | ||
1934 | fs_info->sb = sb; | 1938 | fs_info->sb = sb; |
1935 | fs_info->max_inline = 8192 * 1024; | 1939 | fs_info->max_inline = 8192 * 1024; |
1936 | fs_info->metadata_ratio = 0; | 1940 | fs_info->metadata_ratio = 0; |
1937 | fs_info->defrag_inodes = RB_ROOT; | 1941 | fs_info->defrag_inodes = RB_ROOT; |
1938 | fs_info->trans_no_join = 0; | 1942 | fs_info->trans_no_join = 0; |
1939 | fs_info->free_chunk_space = 0; | 1943 | fs_info->free_chunk_space = 0; |
1944 | fs_info->tree_mod_log = RB_ROOT; | ||
1940 | 1945 | ||
1941 | /* readahead state */ | 1946 | /* readahead state */ |
1942 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); | 1947 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); |
@@ -2001,7 +2006,8 @@ int open_ctree(struct super_block *sb, | |||
2001 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | 2006 | BTRFS_I(fs_info->btree_inode)->root = tree_root; |
2002 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | 2007 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, |
2003 | sizeof(struct btrfs_key)); | 2008 | sizeof(struct btrfs_key)); |
2004 | BTRFS_I(fs_info->btree_inode)->dummy_inode = 1; | 2009 | set_bit(BTRFS_INODE_DUMMY, |
2010 | &BTRFS_I(fs_info->btree_inode)->runtime_flags); | ||
2005 | insert_inode_hash(fs_info->btree_inode); | 2011 | insert_inode_hash(fs_info->btree_inode); |
2006 | 2012 | ||
2007 | spin_lock_init(&fs_info->block_group_cache_lock); | 2013 | spin_lock_init(&fs_info->block_group_cache_lock); |
@@ -2353,6 +2359,13 @@ retry_root_backup: | |||
2353 | fs_info->generation = generation; | 2359 | fs_info->generation = generation; |
2354 | fs_info->last_trans_committed = generation; | 2360 | fs_info->last_trans_committed = generation; |
2355 | 2361 | ||
2362 | ret = btrfs_init_dev_stats(fs_info); | ||
2363 | if (ret) { | ||
2364 | printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n", | ||
2365 | ret); | ||
2366 | goto fail_block_groups; | ||
2367 | } | ||
2368 | |||
2356 | ret = btrfs_init_space_info(fs_info); | 2369 | ret = btrfs_init_space_info(fs_info); |
2357 | if (ret) { | 2370 | if (ret) { |
2358 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); | 2371 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); |
@@ -2556,18 +2569,19 @@ recovery_tree_root: | |||
2556 | 2569 | ||
2557 | static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) | 2570 | static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) |
2558 | { | 2571 | { |
2559 | char b[BDEVNAME_SIZE]; | ||
2560 | |||
2561 | if (uptodate) { | 2572 | if (uptodate) { |
2562 | set_buffer_uptodate(bh); | 2573 | set_buffer_uptodate(bh); |
2563 | } else { | 2574 | } else { |
2575 | struct btrfs_device *device = (struct btrfs_device *) | ||
2576 | bh->b_private; | ||
2577 | |||
2564 | printk_ratelimited(KERN_WARNING "lost page write due to " | 2578 | printk_ratelimited(KERN_WARNING "lost page write due to " |
2565 | "I/O error on %s\n", | 2579 | "I/O error on %s\n", device->name); |
2566 | bdevname(bh->b_bdev, b)); | ||
2567 | /* note, we dont' set_buffer_write_io_error because we have | 2580 | /* note, we dont' set_buffer_write_io_error because we have |
2568 | * our own ways of dealing with the IO errors | 2581 | * our own ways of dealing with the IO errors |
2569 | */ | 2582 | */ |
2570 | clear_buffer_uptodate(bh); | 2583 | clear_buffer_uptodate(bh); |
2584 | btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS); | ||
2571 | } | 2585 | } |
2572 | unlock_buffer(bh); | 2586 | unlock_buffer(bh); |
2573 | put_bh(bh); | 2587 | put_bh(bh); |
@@ -2682,6 +2696,7 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2682 | set_buffer_uptodate(bh); | 2696 | set_buffer_uptodate(bh); |
2683 | lock_buffer(bh); | 2697 | lock_buffer(bh); |
2684 | bh->b_end_io = btrfs_end_buffer_write_sync; | 2698 | bh->b_end_io = btrfs_end_buffer_write_sync; |
2699 | bh->b_private = device; | ||
2685 | } | 2700 | } |
2686 | 2701 | ||
2687 | /* | 2702 | /* |
@@ -2740,6 +2755,9 @@ static int write_dev_flush(struct btrfs_device *device, int wait) | |||
2740 | } | 2755 | } |
2741 | if (!bio_flagged(bio, BIO_UPTODATE)) { | 2756 | if (!bio_flagged(bio, BIO_UPTODATE)) { |
2742 | ret = -EIO; | 2757 | ret = -EIO; |
2758 | if (!bio_flagged(bio, BIO_EOPNOTSUPP)) | ||
2759 | btrfs_dev_stat_inc_and_print(device, | ||
2760 | BTRFS_DEV_STAT_FLUSH_ERRS); | ||
2743 | } | 2761 | } |
2744 | 2762 | ||
2745 | /* drop the reference from the wait == 0 run */ | 2763 | /* drop the reference from the wait == 0 run */ |
@@ -2902,19 +2920,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
2902 | return ret; | 2920 | return ret; |
2903 | } | 2921 | } |
2904 | 2922 | ||
2905 | /* Kill all outstanding I/O */ | ||
2906 | void btrfs_abort_devices(struct btrfs_root *root) | ||
2907 | { | ||
2908 | struct list_head *head; | ||
2909 | struct btrfs_device *dev; | ||
2910 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
2911 | head = &root->fs_info->fs_devices->devices; | ||
2912 | list_for_each_entry_rcu(dev, head, dev_list) { | ||
2913 | blk_abort_queue(dev->bdev->bd_disk->queue); | ||
2914 | } | ||
2915 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
2916 | } | ||
2917 | |||
2918 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | 2923 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) |
2919 | { | 2924 | { |
2920 | spin_lock(&fs_info->fs_roots_radix_lock); | 2925 | spin_lock(&fs_info->fs_roots_radix_lock); |
@@ -3671,17 +3676,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3671 | return 0; | 3676 | return 0; |
3672 | } | 3677 | } |
3673 | 3678 | ||
3674 | static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page, | ||
3675 | u64 start, u64 end, | ||
3676 | struct extent_state *state) | ||
3677 | { | ||
3678 | struct super_block *sb = page->mapping->host->i_sb; | ||
3679 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | ||
3680 | btrfs_error(fs_info, -EIO, | ||
3681 | "Error occured while writing out btree at %llu", start); | ||
3682 | return -EIO; | ||
3683 | } | ||
3684 | |||
3685 | static struct extent_io_ops btree_extent_io_ops = { | 3679 | static struct extent_io_ops btree_extent_io_ops = { |
3686 | .write_cache_pages_lock_hook = btree_lock_page_hook, | 3680 | .write_cache_pages_lock_hook = btree_lock_page_hook, |
3687 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3681 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
@@ -3689,5 +3683,4 @@ static struct extent_io_ops btree_extent_io_ops = { | |||
3689 | .submit_bio_hook = btree_submit_bio_hook, | 3683 | .submit_bio_hook = btree_submit_bio_hook, |
3690 | /* note we're sharing with inode.c for the merge bio hook */ | 3684 | /* note we're sharing with inode.c for the merge bio hook */ |
3691 | .merge_bio_hook = btrfs_merge_bio_hook, | 3685 | .merge_bio_hook = btrfs_merge_bio_hook, |
3692 | .writepage_io_failed_hook = btree_writepage_io_failed_hook, | ||
3693 | }; | 3686 | }; |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index ab1830aaf0ed..05b3fab39f7e 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -89,7 +89,6 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
89 | int btrfs_cleanup_transaction(struct btrfs_root *root); | 89 | int btrfs_cleanup_transaction(struct btrfs_root *root); |
90 | void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, | 90 | void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, |
91 | struct btrfs_root *root); | 91 | struct btrfs_root *root); |
92 | void btrfs_abort_devices(struct btrfs_root *root); | ||
93 | 92 | ||
94 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 93 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
95 | void btrfs_init_lockdep(void); | 94 | void btrfs_init_lockdep(void); |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index e887ee62b6d4..614f34a899c2 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
@@ -13,15 +13,14 @@ | |||
13 | parent_root_objectid) / 4) | 13 | parent_root_objectid) / 4) |
14 | #define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid) / 4) | 14 | #define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid) / 4) |
15 | 15 | ||
16 | static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | 16 | static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, |
17 | int connectable) | 17 | struct inode *parent) |
18 | { | 18 | { |
19 | struct btrfs_fid *fid = (struct btrfs_fid *)fh; | 19 | struct btrfs_fid *fid = (struct btrfs_fid *)fh; |
20 | struct inode *inode = dentry->d_inode; | ||
21 | int len = *max_len; | 20 | int len = *max_len; |
22 | int type; | 21 | int type; |
23 | 22 | ||
24 | if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) { | 23 | if (parent && (len < BTRFS_FID_SIZE_CONNECTABLE)) { |
25 | *max_len = BTRFS_FID_SIZE_CONNECTABLE; | 24 | *max_len = BTRFS_FID_SIZE_CONNECTABLE; |
26 | return 255; | 25 | return 255; |
27 | } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) { | 26 | } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) { |
@@ -36,19 +35,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | |||
36 | fid->root_objectid = BTRFS_I(inode)->root->objectid; | 35 | fid->root_objectid = BTRFS_I(inode)->root->objectid; |
37 | fid->gen = inode->i_generation; | 36 | fid->gen = inode->i_generation; |
38 | 37 | ||
39 | if (connectable && !S_ISDIR(inode->i_mode)) { | 38 | if (parent) { |
40 | struct inode *parent; | ||
41 | u64 parent_root_id; | 39 | u64 parent_root_id; |
42 | 40 | ||
43 | spin_lock(&dentry->d_lock); | ||
44 | |||
45 | parent = dentry->d_parent->d_inode; | ||
46 | fid->parent_objectid = BTRFS_I(parent)->location.objectid; | 41 | fid->parent_objectid = BTRFS_I(parent)->location.objectid; |
47 | fid->parent_gen = parent->i_generation; | 42 | fid->parent_gen = parent->i_generation; |
48 | parent_root_id = BTRFS_I(parent)->root->objectid; | 43 | parent_root_id = BTRFS_I(parent)->root->objectid; |
49 | 44 | ||
50 | spin_unlock(&dentry->d_lock); | ||
51 | |||
52 | if (parent_root_id != fid->root_objectid) { | 45 | if (parent_root_id != fid->root_objectid) { |
53 | fid->parent_root_objectid = parent_root_id; | 46 | fid->parent_root_objectid = parent_root_id; |
54 | len = BTRFS_FID_SIZE_CONNECTABLE_ROOT; | 47 | len = BTRFS_FID_SIZE_CONNECTABLE_ROOT; |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 49fd7b66d57b..4b5a1e1bdefb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3578,7 +3578,7 @@ again: | |||
3578 | space_info->chunk_alloc = 0; | 3578 | space_info->chunk_alloc = 0; |
3579 | spin_unlock(&space_info->lock); | 3579 | spin_unlock(&space_info->lock); |
3580 | out: | 3580 | out: |
3581 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3581 | mutex_unlock(&fs_info->chunk_mutex); |
3582 | return ret; | 3582 | return ret; |
3583 | } | 3583 | } |
3584 | 3584 | ||
@@ -4355,10 +4355,9 @@ static unsigned drop_outstanding_extent(struct inode *inode) | |||
4355 | BTRFS_I(inode)->outstanding_extents--; | 4355 | BTRFS_I(inode)->outstanding_extents--; |
4356 | 4356 | ||
4357 | if (BTRFS_I(inode)->outstanding_extents == 0 && | 4357 | if (BTRFS_I(inode)->outstanding_extents == 0 && |
4358 | BTRFS_I(inode)->delalloc_meta_reserved) { | 4358 | test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
4359 | &BTRFS_I(inode)->runtime_flags)) | ||
4359 | drop_inode_space = 1; | 4360 | drop_inode_space = 1; |
4360 | BTRFS_I(inode)->delalloc_meta_reserved = 0; | ||
4361 | } | ||
4362 | 4361 | ||
4363 | /* | 4362 | /* |
4364 | * If we have more or the same amount of outsanding extents than we have | 4363 | * If we have more or the same amount of outsanding extents than we have |
@@ -4465,7 +4464,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4465 | * Add an item to reserve for updating the inode when we complete the | 4464 | * Add an item to reserve for updating the inode when we complete the |
4466 | * delalloc io. | 4465 | * delalloc io. |
4467 | */ | 4466 | */ |
4468 | if (!BTRFS_I(inode)->delalloc_meta_reserved) { | 4467 | if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
4468 | &BTRFS_I(inode)->runtime_flags)) { | ||
4469 | nr_extents++; | 4469 | nr_extents++; |
4470 | extra_reserve = 1; | 4470 | extra_reserve = 1; |
4471 | } | 4471 | } |
@@ -4511,7 +4511,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4511 | 4511 | ||
4512 | spin_lock(&BTRFS_I(inode)->lock); | 4512 | spin_lock(&BTRFS_I(inode)->lock); |
4513 | if (extra_reserve) { | 4513 | if (extra_reserve) { |
4514 | BTRFS_I(inode)->delalloc_meta_reserved = 1; | 4514 | set_bit(BTRFS_INODE_DELALLOC_META_RESERVED, |
4515 | &BTRFS_I(inode)->runtime_flags); | ||
4515 | nr_extents--; | 4516 | nr_extents--; |
4516 | } | 4517 | } |
4517 | BTRFS_I(inode)->reserved_extents += nr_extents; | 4518 | BTRFS_I(inode)->reserved_extents += nr_extents; |
@@ -5217,7 +5218,7 @@ out: | |||
5217 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 5218 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
5218 | struct btrfs_root *root, | 5219 | struct btrfs_root *root, |
5219 | struct extent_buffer *buf, | 5220 | struct extent_buffer *buf, |
5220 | u64 parent, int last_ref, int for_cow) | 5221 | u64 parent, int last_ref) |
5221 | { | 5222 | { |
5222 | struct btrfs_block_group_cache *cache = NULL; | 5223 | struct btrfs_block_group_cache *cache = NULL; |
5223 | int ret; | 5224 | int ret; |
@@ -5227,7 +5228,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
5227 | buf->start, buf->len, | 5228 | buf->start, buf->len, |
5228 | parent, root->root_key.objectid, | 5229 | parent, root->root_key.objectid, |
5229 | btrfs_header_level(buf), | 5230 | btrfs_header_level(buf), |
5230 | BTRFS_DROP_DELAYED_REF, NULL, for_cow); | 5231 | BTRFS_DROP_DELAYED_REF, NULL, 0); |
5231 | BUG_ON(ret); /* -ENOMEM */ | 5232 | BUG_ON(ret); /* -ENOMEM */ |
5232 | } | 5233 | } |
5233 | 5234 | ||
@@ -6249,7 +6250,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
6249 | struct btrfs_root *root, u32 blocksize, | 6250 | struct btrfs_root *root, u32 blocksize, |
6250 | u64 parent, u64 root_objectid, | 6251 | u64 parent, u64 root_objectid, |
6251 | struct btrfs_disk_key *key, int level, | 6252 | struct btrfs_disk_key *key, int level, |
6252 | u64 hint, u64 empty_size, int for_cow) | 6253 | u64 hint, u64 empty_size) |
6253 | { | 6254 | { |
6254 | struct btrfs_key ins; | 6255 | struct btrfs_key ins; |
6255 | struct btrfs_block_rsv *block_rsv; | 6256 | struct btrfs_block_rsv *block_rsv; |
@@ -6297,7 +6298,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
6297 | ins.objectid, | 6298 | ins.objectid, |
6298 | ins.offset, parent, root_objectid, | 6299 | ins.offset, parent, root_objectid, |
6299 | level, BTRFS_ADD_DELAYED_EXTENT, | 6300 | level, BTRFS_ADD_DELAYED_EXTENT, |
6300 | extent_op, for_cow); | 6301 | extent_op, 0); |
6301 | BUG_ON(ret); /* -ENOMEM */ | 6302 | BUG_ON(ret); /* -ENOMEM */ |
6302 | } | 6303 | } |
6303 | return buf; | 6304 | return buf; |
@@ -6715,7 +6716,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6715 | btrfs_header_owner(path->nodes[level + 1])); | 6716 | btrfs_header_owner(path->nodes[level + 1])); |
6716 | } | 6717 | } |
6717 | 6718 | ||
6718 | btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0); | 6719 | btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); |
6719 | out: | 6720 | out: |
6720 | wc->refs[level] = 0; | 6721 | wc->refs[level] = 0; |
6721 | wc->flags[level] = 0; | 6722 | wc->flags[level] = 0; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c9018a05036e..2c8f7b204617 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -186,7 +186,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | |||
186 | return parent; | 186 | return parent; |
187 | } | 187 | } |
188 | 188 | ||
189 | entry = rb_entry(node, struct tree_entry, rb_node); | ||
190 | rb_link_node(node, parent, p); | 189 | rb_link_node(node, parent, p); |
191 | rb_insert_color(node, root); | 190 | rb_insert_color(node, root); |
192 | return NULL; | 191 | return NULL; |
@@ -413,7 +412,7 @@ static struct extent_state *next_state(struct extent_state *state) | |||
413 | 412 | ||
414 | /* | 413 | /* |
415 | * utility function to clear some bits in an extent state struct. | 414 | * utility function to clear some bits in an extent state struct. |
416 | * it will optionally wake up any one waiting on this state (wake == 1) | 415 | * it will optionally wake up any one waiting on this state (wake == 1). |
417 | * | 416 | * |
418 | * If no bits are set on the state struct after clearing things, the | 417 | * If no bits are set on the state struct after clearing things, the |
419 | * struct is freed and removed from the tree | 418 | * struct is freed and removed from the tree |
@@ -570,10 +569,8 @@ hit_next: | |||
570 | if (err) | 569 | if (err) |
571 | goto out; | 570 | goto out; |
572 | if (state->end <= end) { | 571 | if (state->end <= end) { |
573 | clear_state_bit(tree, state, &bits, wake); | 572 | state = clear_state_bit(tree, state, &bits, wake); |
574 | if (last_end == (u64)-1) | 573 | goto next; |
575 | goto out; | ||
576 | start = last_end + 1; | ||
577 | } | 574 | } |
578 | goto search_again; | 575 | goto search_again; |
579 | } | 576 | } |
@@ -781,7 +778,6 @@ hit_next: | |||
781 | * Just lock what we found and keep going | 778 | * Just lock what we found and keep going |
782 | */ | 779 | */ |
783 | if (state->start == start && state->end <= end) { | 780 | if (state->start == start && state->end <= end) { |
784 | struct rb_node *next_node; | ||
785 | if (state->state & exclusive_bits) { | 781 | if (state->state & exclusive_bits) { |
786 | *failed_start = state->start; | 782 | *failed_start = state->start; |
787 | err = -EEXIST; | 783 | err = -EEXIST; |
@@ -789,20 +785,15 @@ hit_next: | |||
789 | } | 785 | } |
790 | 786 | ||
791 | set_state_bits(tree, state, &bits); | 787 | set_state_bits(tree, state, &bits); |
792 | |||
793 | cache_state(state, cached_state); | 788 | cache_state(state, cached_state); |
794 | merge_state(tree, state); | 789 | merge_state(tree, state); |
795 | if (last_end == (u64)-1) | 790 | if (last_end == (u64)-1) |
796 | goto out; | 791 | goto out; |
797 | |||
798 | start = last_end + 1; | 792 | start = last_end + 1; |
799 | next_node = rb_next(&state->rb_node); | 793 | state = next_state(state); |
800 | if (next_node && start < end && prealloc && !need_resched()) { | 794 | if (start < end && state && state->start == start && |
801 | state = rb_entry(next_node, struct extent_state, | 795 | !need_resched()) |
802 | rb_node); | 796 | goto hit_next; |
803 | if (state->start == start) | ||
804 | goto hit_next; | ||
805 | } | ||
806 | goto search_again; | 797 | goto search_again; |
807 | } | 798 | } |
808 | 799 | ||
@@ -845,6 +836,10 @@ hit_next: | |||
845 | if (last_end == (u64)-1) | 836 | if (last_end == (u64)-1) |
846 | goto out; | 837 | goto out; |
847 | start = last_end + 1; | 838 | start = last_end + 1; |
839 | state = next_state(state); | ||
840 | if (start < end && state && state->start == start && | ||
841 | !need_resched()) | ||
842 | goto hit_next; | ||
848 | } | 843 | } |
849 | goto search_again; | 844 | goto search_again; |
850 | } | 845 | } |
@@ -994,21 +989,14 @@ hit_next: | |||
994 | * Just lock what we found and keep going | 989 | * Just lock what we found and keep going |
995 | */ | 990 | */ |
996 | if (state->start == start && state->end <= end) { | 991 | if (state->start == start && state->end <= end) { |
997 | struct rb_node *next_node; | ||
998 | |||
999 | set_state_bits(tree, state, &bits); | 992 | set_state_bits(tree, state, &bits); |
1000 | clear_state_bit(tree, state, &clear_bits, 0); | 993 | state = clear_state_bit(tree, state, &clear_bits, 0); |
1001 | if (last_end == (u64)-1) | 994 | if (last_end == (u64)-1) |
1002 | goto out; | 995 | goto out; |
1003 | |||
1004 | start = last_end + 1; | 996 | start = last_end + 1; |
1005 | next_node = rb_next(&state->rb_node); | 997 | if (start < end && state && state->start == start && |
1006 | if (next_node && start < end && prealloc && !need_resched()) { | 998 | !need_resched()) |
1007 | state = rb_entry(next_node, struct extent_state, | 999 | goto hit_next; |
1008 | rb_node); | ||
1009 | if (state->start == start) | ||
1010 | goto hit_next; | ||
1011 | } | ||
1012 | goto search_again; | 1000 | goto search_again; |
1013 | } | 1001 | } |
1014 | 1002 | ||
@@ -1042,10 +1030,13 @@ hit_next: | |||
1042 | goto out; | 1030 | goto out; |
1043 | if (state->end <= end) { | 1031 | if (state->end <= end) { |
1044 | set_state_bits(tree, state, &bits); | 1032 | set_state_bits(tree, state, &bits); |
1045 | clear_state_bit(tree, state, &clear_bits, 0); | 1033 | state = clear_state_bit(tree, state, &clear_bits, 0); |
1046 | if (last_end == (u64)-1) | 1034 | if (last_end == (u64)-1) |
1047 | goto out; | 1035 | goto out; |
1048 | start = last_end + 1; | 1036 | start = last_end + 1; |
1037 | if (start < end && state && state->start == start && | ||
1038 | !need_resched()) | ||
1039 | goto hit_next; | ||
1049 | } | 1040 | } |
1050 | goto search_again; | 1041 | goto search_again; |
1051 | } | 1042 | } |
@@ -1173,9 +1164,8 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | |||
1173 | cached_state, mask); | 1164 | cached_state, mask); |
1174 | } | 1165 | } |
1175 | 1166 | ||
1176 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, | 1167 | int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
1177 | u64 end, struct extent_state **cached_state, | 1168 | struct extent_state **cached_state, gfp_t mask) |
1178 | gfp_t mask) | ||
1179 | { | 1169 | { |
1180 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, | 1170 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, |
1181 | cached_state, mask); | 1171 | cached_state, mask); |
@@ -1293,7 +1283,7 @@ out: | |||
1293 | * returned if we find something, and *start_ret and *end_ret are | 1283 | * returned if we find something, and *start_ret and *end_ret are |
1294 | * set to reflect the state struct that was found. | 1284 | * set to reflect the state struct that was found. |
1295 | * | 1285 | * |
1296 | * If nothing was found, 1 is returned, < 0 on error | 1286 | * If nothing was found, 1 is returned. If found something, return 0. |
1297 | */ | 1287 | */ |
1298 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | 1288 | int find_first_extent_bit(struct extent_io_tree *tree, u64 start, |
1299 | u64 *start_ret, u64 *end_ret, int bits) | 1289 | u64 *start_ret, u64 *end_ret, int bits) |
@@ -1923,6 +1913,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | |||
1923 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | 1913 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { |
1924 | /* try to remap that extent elsewhere? */ | 1914 | /* try to remap that extent elsewhere? */ |
1925 | bio_put(bio); | 1915 | bio_put(bio); |
1916 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); | ||
1926 | return -EIO; | 1917 | return -EIO; |
1927 | } | 1918 | } |
1928 | 1919 | ||
@@ -2222,17 +2213,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) | |||
2222 | uptodate = 0; | 2213 | uptodate = 0; |
2223 | } | 2214 | } |
2224 | 2215 | ||
2225 | if (!uptodate && tree->ops && | ||
2226 | tree->ops->writepage_io_failed_hook) { | ||
2227 | ret = tree->ops->writepage_io_failed_hook(NULL, page, | ||
2228 | start, end, NULL); | ||
2229 | /* Writeback already completed */ | ||
2230 | if (ret == 0) | ||
2231 | return 1; | ||
2232 | } | ||
2233 | |||
2234 | if (!uptodate) { | 2216 | if (!uptodate) { |
2235 | clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); | ||
2236 | ClearPageUptodate(page); | 2217 | ClearPageUptodate(page); |
2237 | SetPageError(page); | 2218 | SetPageError(page); |
2238 | } | 2219 | } |
@@ -2347,10 +2328,23 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2347 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { | 2328 | if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { |
2348 | ret = tree->ops->readpage_end_io_hook(page, start, end, | 2329 | ret = tree->ops->readpage_end_io_hook(page, start, end, |
2349 | state, mirror); | 2330 | state, mirror); |
2350 | if (ret) | 2331 | if (ret) { |
2332 | /* no IO indicated but software detected errors | ||
2333 | * in the block, either checksum errors or | ||
2334 | * issues with the contents */ | ||
2335 | struct btrfs_root *root = | ||
2336 | BTRFS_I(page->mapping->host)->root; | ||
2337 | struct btrfs_device *device; | ||
2338 | |||
2351 | uptodate = 0; | 2339 | uptodate = 0; |
2352 | else | 2340 | device = btrfs_find_device_for_logical( |
2341 | root, start, mirror); | ||
2342 | if (device) | ||
2343 | btrfs_dev_stat_inc_and_print(device, | ||
2344 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | ||
2345 | } else { | ||
2353 | clean_io_failure(start, page); | 2346 | clean_io_failure(start, page); |
2347 | } | ||
2354 | } | 2348 | } |
2355 | 2349 | ||
2356 | if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { | 2350 | if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { |
@@ -3164,7 +3158,7 @@ static int write_one_eb(struct extent_buffer *eb, | |||
3164 | u64 offset = eb->start; | 3158 | u64 offset = eb->start; |
3165 | unsigned long i, num_pages; | 3159 | unsigned long i, num_pages; |
3166 | int rw = (epd->sync_io ? WRITE_SYNC : WRITE); | 3160 | int rw = (epd->sync_io ? WRITE_SYNC : WRITE); |
3167 | int ret; | 3161 | int ret = 0; |
3168 | 3162 | ||
3169 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 3163 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); |
3170 | num_pages = num_extent_pages(eb->start, eb->len); | 3164 | num_pages = num_extent_pages(eb->start, eb->len); |
@@ -3930,6 +3924,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3930 | eb->start = start; | 3924 | eb->start = start; |
3931 | eb->len = len; | 3925 | eb->len = len; |
3932 | eb->tree = tree; | 3926 | eb->tree = tree; |
3927 | eb->bflags = 0; | ||
3933 | rwlock_init(&eb->lock); | 3928 | rwlock_init(&eb->lock); |
3934 | atomic_set(&eb->write_locks, 0); | 3929 | atomic_set(&eb->write_locks, 0); |
3935 | atomic_set(&eb->read_locks, 0); | 3930 | atomic_set(&eb->read_locks, 0); |
@@ -3967,6 +3962,60 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3967 | return eb; | 3962 | return eb; |
3968 | } | 3963 | } |
3969 | 3964 | ||
3965 | struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) | ||
3966 | { | ||
3967 | unsigned long i; | ||
3968 | struct page *p; | ||
3969 | struct extent_buffer *new; | ||
3970 | unsigned long num_pages = num_extent_pages(src->start, src->len); | ||
3971 | |||
3972 | new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC); | ||
3973 | if (new == NULL) | ||
3974 | return NULL; | ||
3975 | |||
3976 | for (i = 0; i < num_pages; i++) { | ||
3977 | p = alloc_page(GFP_ATOMIC); | ||
3978 | BUG_ON(!p); | ||
3979 | attach_extent_buffer_page(new, p); | ||
3980 | WARN_ON(PageDirty(p)); | ||
3981 | SetPageUptodate(p); | ||
3982 | new->pages[i] = p; | ||
3983 | } | ||
3984 | |||
3985 | copy_extent_buffer(new, src, 0, 0, src->len); | ||
3986 | set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags); | ||
3987 | set_bit(EXTENT_BUFFER_DUMMY, &new->bflags); | ||
3988 | |||
3989 | return new; | ||
3990 | } | ||
3991 | |||
3992 | struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) | ||
3993 | { | ||
3994 | struct extent_buffer *eb; | ||
3995 | unsigned long num_pages = num_extent_pages(0, len); | ||
3996 | unsigned long i; | ||
3997 | |||
3998 | eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC); | ||
3999 | if (!eb) | ||
4000 | return NULL; | ||
4001 | |||
4002 | for (i = 0; i < num_pages; i++) { | ||
4003 | eb->pages[i] = alloc_page(GFP_ATOMIC); | ||
4004 | if (!eb->pages[i]) | ||
4005 | goto err; | ||
4006 | } | ||
4007 | set_extent_buffer_uptodate(eb); | ||
4008 | btrfs_set_header_nritems(eb, 0); | ||
4009 | set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); | ||
4010 | |||
4011 | return eb; | ||
4012 | err: | ||
4013 | for (i--; i > 0; i--) | ||
4014 | __free_page(eb->pages[i]); | ||
4015 | __free_extent_buffer(eb); | ||
4016 | return NULL; | ||
4017 | } | ||
4018 | |||
3970 | static int extent_buffer_under_io(struct extent_buffer *eb) | 4019 | static int extent_buffer_under_io(struct extent_buffer *eb) |
3971 | { | 4020 | { |
3972 | return (atomic_read(&eb->io_pages) || | 4021 | return (atomic_read(&eb->io_pages) || |
@@ -3981,18 +4030,21 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | |||
3981 | unsigned long start_idx) | 4030 | unsigned long start_idx) |
3982 | { | 4031 | { |
3983 | unsigned long index; | 4032 | unsigned long index; |
4033 | unsigned long num_pages; | ||
3984 | struct page *page; | 4034 | struct page *page; |
4035 | int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); | ||
3985 | 4036 | ||
3986 | BUG_ON(extent_buffer_under_io(eb)); | 4037 | BUG_ON(extent_buffer_under_io(eb)); |
3987 | 4038 | ||
3988 | index = num_extent_pages(eb->start, eb->len); | 4039 | num_pages = num_extent_pages(eb->start, eb->len); |
4040 | index = start_idx + num_pages; | ||
3989 | if (start_idx >= index) | 4041 | if (start_idx >= index) |
3990 | return; | 4042 | return; |
3991 | 4043 | ||
3992 | do { | 4044 | do { |
3993 | index--; | 4045 | index--; |
3994 | page = extent_buffer_page(eb, index); | 4046 | page = extent_buffer_page(eb, index); |
3995 | if (page) { | 4047 | if (page && mapped) { |
3996 | spin_lock(&page->mapping->private_lock); | 4048 | spin_lock(&page->mapping->private_lock); |
3997 | /* | 4049 | /* |
3998 | * We do this since we'll remove the pages after we've | 4050 | * We do this since we'll remove the pages after we've |
@@ -4017,6 +4069,8 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | |||
4017 | } | 4069 | } |
4018 | spin_unlock(&page->mapping->private_lock); | 4070 | spin_unlock(&page->mapping->private_lock); |
4019 | 4071 | ||
4072 | } | ||
4073 | if (page) { | ||
4020 | /* One for when we alloced the page */ | 4074 | /* One for when we alloced the page */ |
4021 | page_cache_release(page); | 4075 | page_cache_release(page); |
4022 | } | 4076 | } |
@@ -4235,14 +4289,18 @@ static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask) | |||
4235 | { | 4289 | { |
4236 | WARN_ON(atomic_read(&eb->refs) == 0); | 4290 | WARN_ON(atomic_read(&eb->refs) == 0); |
4237 | if (atomic_dec_and_test(&eb->refs)) { | 4291 | if (atomic_dec_and_test(&eb->refs)) { |
4238 | struct extent_io_tree *tree = eb->tree; | 4292 | if (test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) { |
4293 | spin_unlock(&eb->refs_lock); | ||
4294 | } else { | ||
4295 | struct extent_io_tree *tree = eb->tree; | ||
4239 | 4296 | ||
4240 | spin_unlock(&eb->refs_lock); | 4297 | spin_unlock(&eb->refs_lock); |
4241 | 4298 | ||
4242 | spin_lock(&tree->buffer_lock); | 4299 | spin_lock(&tree->buffer_lock); |
4243 | radix_tree_delete(&tree->buffer, | 4300 | radix_tree_delete(&tree->buffer, |
4244 | eb->start >> PAGE_CACHE_SHIFT); | 4301 | eb->start >> PAGE_CACHE_SHIFT); |
4245 | spin_unlock(&tree->buffer_lock); | 4302 | spin_unlock(&tree->buffer_lock); |
4303 | } | ||
4246 | 4304 | ||
4247 | /* Should be safe to release our pages at this point */ | 4305 | /* Should be safe to release our pages at this point */ |
4248 | btrfs_release_extent_buffer_page(eb, 0); | 4306 | btrfs_release_extent_buffer_page(eb, 0); |
@@ -4260,6 +4318,10 @@ void free_extent_buffer(struct extent_buffer *eb) | |||
4260 | 4318 | ||
4261 | spin_lock(&eb->refs_lock); | 4319 | spin_lock(&eb->refs_lock); |
4262 | if (atomic_read(&eb->refs) == 2 && | 4320 | if (atomic_read(&eb->refs) == 2 && |
4321 | test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) | ||
4322 | atomic_dec(&eb->refs); | ||
4323 | |||
4324 | if (atomic_read(&eb->refs) == 2 && | ||
4263 | test_bit(EXTENT_BUFFER_STALE, &eb->bflags) && | 4325 | test_bit(EXTENT_BUFFER_STALE, &eb->bflags) && |
4264 | !extent_buffer_under_io(eb) && | 4326 | !extent_buffer_under_io(eb) && |
4265 | test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) | 4327 | test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index b516c3b8dec6..25900af5b15d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -39,6 +39,7 @@ | |||
39 | #define EXTENT_BUFFER_STALE 6 | 39 | #define EXTENT_BUFFER_STALE 6 |
40 | #define EXTENT_BUFFER_WRITEBACK 7 | 40 | #define EXTENT_BUFFER_WRITEBACK 7 |
41 | #define EXTENT_BUFFER_IOERR 8 | 41 | #define EXTENT_BUFFER_IOERR 8 |
42 | #define EXTENT_BUFFER_DUMMY 9 | ||
42 | 43 | ||
43 | /* these are flags for extent_clear_unlock_delalloc */ | 44 | /* these are flags for extent_clear_unlock_delalloc */ |
44 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 | 45 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 |
@@ -75,9 +76,6 @@ struct extent_io_ops { | |||
75 | unsigned long bio_flags); | 76 | unsigned long bio_flags); |
76 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); | 77 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); |
77 | int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); | 78 | int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); |
78 | int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, | ||
79 | u64 start, u64 end, | ||
80 | struct extent_state *state); | ||
81 | int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, | 79 | int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, |
82 | struct extent_state *state, int mirror); | 80 | struct extent_state *state, int mirror); |
83 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, | 81 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, |
@@ -225,6 +223,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
225 | struct extent_state **cached_state, gfp_t mask); | 223 | struct extent_state **cached_state, gfp_t mask); |
226 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 224 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
227 | struct extent_state **cached_state, gfp_t mask); | 225 | struct extent_state **cached_state, gfp_t mask); |
226 | int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | ||
227 | struct extent_state **cached_state, gfp_t mask); | ||
228 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 228 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
229 | gfp_t mask); | 229 | gfp_t mask); |
230 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 230 | int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -265,6 +265,8 @@ void set_page_extent_mapped(struct page *page); | |||
265 | 265 | ||
266 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | 266 | struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, |
267 | u64 start, unsigned long len); | 267 | u64 start, unsigned long len); |
268 | struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len); | ||
269 | struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); | ||
268 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, | 270 | struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, |
269 | u64 start, unsigned long len); | 271 | u64 start, unsigned long len); |
270 | void free_extent_buffer(struct extent_buffer *eb); | 272 | void free_extent_buffer(struct extent_buffer *eb); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 53bf2d764bbc..70dc8ca73e25 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -65,6 +65,21 @@ struct inode_defrag { | |||
65 | int cycled; | 65 | int cycled; |
66 | }; | 66 | }; |
67 | 67 | ||
68 | static int __compare_inode_defrag(struct inode_defrag *defrag1, | ||
69 | struct inode_defrag *defrag2) | ||
70 | { | ||
71 | if (defrag1->root > defrag2->root) | ||
72 | return 1; | ||
73 | else if (defrag1->root < defrag2->root) | ||
74 | return -1; | ||
75 | else if (defrag1->ino > defrag2->ino) | ||
76 | return 1; | ||
77 | else if (defrag1->ino < defrag2->ino) | ||
78 | return -1; | ||
79 | else | ||
80 | return 0; | ||
81 | } | ||
82 | |||
68 | /* pop a record for an inode into the defrag tree. The lock | 83 | /* pop a record for an inode into the defrag tree. The lock |
69 | * must be held already | 84 | * must be held already |
70 | * | 85 | * |
@@ -81,15 +96,17 @@ static void __btrfs_add_inode_defrag(struct inode *inode, | |||
81 | struct inode_defrag *entry; | 96 | struct inode_defrag *entry; |
82 | struct rb_node **p; | 97 | struct rb_node **p; |
83 | struct rb_node *parent = NULL; | 98 | struct rb_node *parent = NULL; |
99 | int ret; | ||
84 | 100 | ||
85 | p = &root->fs_info->defrag_inodes.rb_node; | 101 | p = &root->fs_info->defrag_inodes.rb_node; |
86 | while (*p) { | 102 | while (*p) { |
87 | parent = *p; | 103 | parent = *p; |
88 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 104 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
89 | 105 | ||
90 | if (defrag->ino < entry->ino) | 106 | ret = __compare_inode_defrag(defrag, entry); |
107 | if (ret < 0) | ||
91 | p = &parent->rb_left; | 108 | p = &parent->rb_left; |
92 | else if (defrag->ino > entry->ino) | 109 | else if (ret > 0) |
93 | p = &parent->rb_right; | 110 | p = &parent->rb_right; |
94 | else { | 111 | else { |
95 | /* if we're reinserting an entry for | 112 | /* if we're reinserting an entry for |
@@ -103,7 +120,7 @@ static void __btrfs_add_inode_defrag(struct inode *inode, | |||
103 | goto exists; | 120 | goto exists; |
104 | } | 121 | } |
105 | } | 122 | } |
106 | BTRFS_I(inode)->in_defrag = 1; | 123 | set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); |
107 | rb_link_node(&defrag->rb_node, parent, p); | 124 | rb_link_node(&defrag->rb_node, parent, p); |
108 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); | 125 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); |
109 | return; | 126 | return; |
@@ -131,7 +148,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
131 | if (btrfs_fs_closing(root->fs_info)) | 148 | if (btrfs_fs_closing(root->fs_info)) |
132 | return 0; | 149 | return 0; |
133 | 150 | ||
134 | if (BTRFS_I(inode)->in_defrag) | 151 | if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) |
135 | return 0; | 152 | return 0; |
136 | 153 | ||
137 | if (trans) | 154 | if (trans) |
@@ -148,7 +165,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
148 | defrag->root = root->root_key.objectid; | 165 | defrag->root = root->root_key.objectid; |
149 | 166 | ||
150 | spin_lock(&root->fs_info->defrag_inodes_lock); | 167 | spin_lock(&root->fs_info->defrag_inodes_lock); |
151 | if (!BTRFS_I(inode)->in_defrag) | 168 | if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) |
152 | __btrfs_add_inode_defrag(inode, defrag); | 169 | __btrfs_add_inode_defrag(inode, defrag); |
153 | else | 170 | else |
154 | kfree(defrag); | 171 | kfree(defrag); |
@@ -159,28 +176,35 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
159 | /* | 176 | /* |
160 | * must be called with the defrag_inodes lock held | 177 | * must be called with the defrag_inodes lock held |
161 | */ | 178 | */ |
162 | struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino, | 179 | struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, |
180 | u64 root, u64 ino, | ||
163 | struct rb_node **next) | 181 | struct rb_node **next) |
164 | { | 182 | { |
165 | struct inode_defrag *entry = NULL; | 183 | struct inode_defrag *entry = NULL; |
184 | struct inode_defrag tmp; | ||
166 | struct rb_node *p; | 185 | struct rb_node *p; |
167 | struct rb_node *parent = NULL; | 186 | struct rb_node *parent = NULL; |
187 | int ret; | ||
188 | |||
189 | tmp.ino = ino; | ||
190 | tmp.root = root; | ||
168 | 191 | ||
169 | p = info->defrag_inodes.rb_node; | 192 | p = info->defrag_inodes.rb_node; |
170 | while (p) { | 193 | while (p) { |
171 | parent = p; | 194 | parent = p; |
172 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 195 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
173 | 196 | ||
174 | if (ino < entry->ino) | 197 | ret = __compare_inode_defrag(&tmp, entry); |
198 | if (ret < 0) | ||
175 | p = parent->rb_left; | 199 | p = parent->rb_left; |
176 | else if (ino > entry->ino) | 200 | else if (ret > 0) |
177 | p = parent->rb_right; | 201 | p = parent->rb_right; |
178 | else | 202 | else |
179 | return entry; | 203 | return entry; |
180 | } | 204 | } |
181 | 205 | ||
182 | if (next) { | 206 | if (next) { |
183 | while (parent && ino > entry->ino) { | 207 | while (parent && __compare_inode_defrag(&tmp, entry) > 0) { |
184 | parent = rb_next(parent); | 208 | parent = rb_next(parent); |
185 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 209 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
186 | } | 210 | } |
@@ -202,6 +226,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
202 | struct btrfs_key key; | 226 | struct btrfs_key key; |
203 | struct btrfs_ioctl_defrag_range_args range; | 227 | struct btrfs_ioctl_defrag_range_args range; |
204 | u64 first_ino = 0; | 228 | u64 first_ino = 0; |
229 | u64 root_objectid = 0; | ||
205 | int num_defrag; | 230 | int num_defrag; |
206 | int defrag_batch = 1024; | 231 | int defrag_batch = 1024; |
207 | 232 | ||
@@ -214,11 +239,14 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
214 | n = NULL; | 239 | n = NULL; |
215 | 240 | ||
216 | /* find an inode to defrag */ | 241 | /* find an inode to defrag */ |
217 | defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n); | 242 | defrag = btrfs_find_defrag_inode(fs_info, root_objectid, |
243 | first_ino, &n); | ||
218 | if (!defrag) { | 244 | if (!defrag) { |
219 | if (n) | 245 | if (n) { |
220 | defrag = rb_entry(n, struct inode_defrag, rb_node); | 246 | defrag = rb_entry(n, struct inode_defrag, |
221 | else if (first_ino) { | 247 | rb_node); |
248 | } else if (root_objectid || first_ino) { | ||
249 | root_objectid = 0; | ||
222 | first_ino = 0; | 250 | first_ino = 0; |
223 | continue; | 251 | continue; |
224 | } else { | 252 | } else { |
@@ -228,6 +256,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
228 | 256 | ||
229 | /* remove it from the rbtree */ | 257 | /* remove it from the rbtree */ |
230 | first_ino = defrag->ino + 1; | 258 | first_ino = defrag->ino + 1; |
259 | root_objectid = defrag->root; | ||
231 | rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); | 260 | rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); |
232 | 261 | ||
233 | if (btrfs_fs_closing(fs_info)) | 262 | if (btrfs_fs_closing(fs_info)) |
@@ -252,7 +281,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
252 | goto next; | 281 | goto next; |
253 | 282 | ||
254 | /* do a chunk of defrag */ | 283 | /* do a chunk of defrag */ |
255 | BTRFS_I(inode)->in_defrag = 0; | 284 | clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); |
256 | range.start = defrag->last_offset; | 285 | range.start = defrag->last_offset; |
257 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, | 286 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, |
258 | defrag_batch); | 287 | defrag_batch); |
@@ -1404,12 +1433,11 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1404 | goto out; | 1433 | goto out; |
1405 | } | 1434 | } |
1406 | 1435 | ||
1407 | err = btrfs_update_time(file); | 1436 | err = file_update_time(file); |
1408 | if (err) { | 1437 | if (err) { |
1409 | mutex_unlock(&inode->i_mutex); | 1438 | mutex_unlock(&inode->i_mutex); |
1410 | goto out; | 1439 | goto out; |
1411 | } | 1440 | } |
1412 | BTRFS_I(inode)->sequence++; | ||
1413 | 1441 | ||
1414 | start_pos = round_down(pos, root->sectorsize); | 1442 | start_pos = round_down(pos, root->sectorsize); |
1415 | if (start_pos > i_size_read(inode)) { | 1443 | if (start_pos > i_size_read(inode)) { |
@@ -1466,8 +1494,8 @@ int btrfs_release_file(struct inode *inode, struct file *filp) | |||
1466 | * flush down new bytes that may have been written if the | 1494 | * flush down new bytes that may have been written if the |
1467 | * application were using truncate to replace a file in place. | 1495 | * application were using truncate to replace a file in place. |
1468 | */ | 1496 | */ |
1469 | if (BTRFS_I(inode)->ordered_data_close) { | 1497 | if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
1470 | BTRFS_I(inode)->ordered_data_close = 0; | 1498 | &BTRFS_I(inode)->runtime_flags)) { |
1471 | btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); | 1499 | btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); |
1472 | if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | 1500 | if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) |
1473 | filemap_flush(inode->i_mapping); | 1501 | filemap_flush(inode->i_mapping); |
@@ -1498,14 +1526,15 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1498 | 1526 | ||
1499 | trace_btrfs_sync_file(file, datasync); | 1527 | trace_btrfs_sync_file(file, datasync); |
1500 | 1528 | ||
1501 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | ||
1502 | if (ret) | ||
1503 | return ret; | ||
1504 | mutex_lock(&inode->i_mutex); | 1529 | mutex_lock(&inode->i_mutex); |
1505 | 1530 | ||
1506 | /* we wait first, since the writeback may change the inode */ | 1531 | /* |
1532 | * we wait first, since the writeback may change the inode, also wait | ||
1533 | * ordered range does a filemape_write_and_wait_range which is why we | ||
1534 | * don't do it above like other file systems. | ||
1535 | */ | ||
1507 | root->log_batch++; | 1536 | root->log_batch++; |
1508 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 1537 | btrfs_wait_ordered_range(inode, start, end); |
1509 | root->log_batch++; | 1538 | root->log_batch++; |
1510 | 1539 | ||
1511 | /* | 1540 | /* |
@@ -1523,7 +1552,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1523 | * syncing | 1552 | * syncing |
1524 | */ | 1553 | */ |
1525 | smp_mb(); | 1554 | smp_mb(); |
1526 | if (BTRFS_I(inode)->last_trans <= | 1555 | if (btrfs_inode_in_log(inode, root->fs_info->generation) || |
1556 | BTRFS_I(inode)->last_trans <= | ||
1527 | root->fs_info->last_trans_committed) { | 1557 | root->fs_info->last_trans_committed) { |
1528 | BTRFS_I(inode)->last_trans = 0; | 1558 | BTRFS_I(inode)->last_trans = 0; |
1529 | mutex_unlock(&inode->i_mutex); | 1559 | mutex_unlock(&inode->i_mutex); |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 202008ec367d..81296c57405a 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -33,6 +33,8 @@ | |||
33 | 33 | ||
34 | static int link_free_space(struct btrfs_free_space_ctl *ctl, | 34 | static int link_free_space(struct btrfs_free_space_ctl *ctl, |
35 | struct btrfs_free_space *info); | 35 | struct btrfs_free_space *info); |
36 | static void unlink_free_space(struct btrfs_free_space_ctl *ctl, | ||
37 | struct btrfs_free_space *info); | ||
36 | 38 | ||
37 | static struct inode *__lookup_free_space_inode(struct btrfs_root *root, | 39 | static struct inode *__lookup_free_space_inode(struct btrfs_root *root, |
38 | struct btrfs_path *path, | 40 | struct btrfs_path *path, |
@@ -75,7 +77,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, | |||
75 | return ERR_PTR(-ENOENT); | 77 | return ERR_PTR(-ENOENT); |
76 | } | 78 | } |
77 | 79 | ||
78 | inode->i_mapping->flags &= ~__GFP_FS; | 80 | mapping_set_gfp_mask(inode->i_mapping, |
81 | mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); | ||
79 | 82 | ||
80 | return inode; | 83 | return inode; |
81 | } | 84 | } |
@@ -365,7 +368,7 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, | |||
365 | 368 | ||
366 | static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) | 369 | static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) |
367 | { | 370 | { |
368 | u64 *val; | 371 | __le64 *val; |
369 | 372 | ||
370 | io_ctl_map_page(io_ctl, 1); | 373 | io_ctl_map_page(io_ctl, 1); |
371 | 374 | ||
@@ -388,7 +391,7 @@ static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) | |||
388 | 391 | ||
389 | static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) | 392 | static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) |
390 | { | 393 | { |
391 | u64 *gen; | 394 | __le64 *gen; |
392 | 395 | ||
393 | /* | 396 | /* |
394 | * Skip the crc area. If we don't check crcs then we just have a 64bit | 397 | * Skip the crc area. If we don't check crcs then we just have a 64bit |
@@ -584,6 +587,44 @@ static int io_ctl_read_bitmap(struct io_ctl *io_ctl, | |||
584 | return 0; | 587 | return 0; |
585 | } | 588 | } |
586 | 589 | ||
590 | /* | ||
591 | * Since we attach pinned extents after the fact we can have contiguous sections | ||
592 | * of free space that are split up in entries. This poses a problem with the | ||
593 | * tree logging stuff since it could have allocated across what appears to be 2 | ||
594 | * entries since we would have merged the entries when adding the pinned extents | ||
595 | * back to the free space cache. So run through the space cache that we just | ||
596 | * loaded and merge contiguous entries. This will make the log replay stuff not | ||
597 | * blow up and it will make for nicer allocator behavior. | ||
598 | */ | ||
599 | static void merge_space_tree(struct btrfs_free_space_ctl *ctl) | ||
600 | { | ||
601 | struct btrfs_free_space *e, *prev = NULL; | ||
602 | struct rb_node *n; | ||
603 | |||
604 | again: | ||
605 | spin_lock(&ctl->tree_lock); | ||
606 | for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { | ||
607 | e = rb_entry(n, struct btrfs_free_space, offset_index); | ||
608 | if (!prev) | ||
609 | goto next; | ||
610 | if (e->bitmap || prev->bitmap) | ||
611 | goto next; | ||
612 | if (prev->offset + prev->bytes == e->offset) { | ||
613 | unlink_free_space(ctl, prev); | ||
614 | unlink_free_space(ctl, e); | ||
615 | prev->bytes += e->bytes; | ||
616 | kmem_cache_free(btrfs_free_space_cachep, e); | ||
617 | link_free_space(ctl, prev); | ||
618 | prev = NULL; | ||
619 | spin_unlock(&ctl->tree_lock); | ||
620 | goto again; | ||
621 | } | ||
622 | next: | ||
623 | prev = e; | ||
624 | } | ||
625 | spin_unlock(&ctl->tree_lock); | ||
626 | } | ||
627 | |||
587 | int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | 628 | int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, |
588 | struct btrfs_free_space_ctl *ctl, | 629 | struct btrfs_free_space_ctl *ctl, |
589 | struct btrfs_path *path, u64 offset) | 630 | struct btrfs_path *path, u64 offset) |
@@ -726,6 +767,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
726 | } | 767 | } |
727 | 768 | ||
728 | io_ctl_drop_pages(&io_ctl); | 769 | io_ctl_drop_pages(&io_ctl); |
770 | merge_space_tree(ctl); | ||
729 | ret = 1; | 771 | ret = 1; |
730 | out: | 772 | out: |
731 | io_ctl_free(&io_ctl); | 773 | io_ctl_free(&io_ctl); |
@@ -972,9 +1014,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
972 | goto out; | 1014 | goto out; |
973 | 1015 | ||
974 | 1016 | ||
975 | ret = filemap_write_and_wait(inode->i_mapping); | 1017 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
976 | if (ret) | ||
977 | goto out; | ||
978 | 1018 | ||
979 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 1019 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
980 | key.offset = offset; | 1020 | key.offset = offset; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ceb7b9c9edcc..f6ab6f5e635a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -89,7 +89,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | |||
89 | 89 | ||
90 | static int btrfs_setsize(struct inode *inode, loff_t newsize); | 90 | static int btrfs_setsize(struct inode *inode, loff_t newsize); |
91 | static int btrfs_truncate(struct inode *inode); | 91 | static int btrfs_truncate(struct inode *inode); |
92 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); | 92 | static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); |
93 | static noinline int cow_file_range(struct inode *inode, | 93 | static noinline int cow_file_range(struct inode *inode, |
94 | struct page *locked_page, | 94 | struct page *locked_page, |
95 | u64 start, u64 end, int *page_started, | 95 | u64 start, u64 end, int *page_started, |
@@ -257,10 +257,13 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
257 | ret = insert_inline_extent(trans, root, inode, start, | 257 | ret = insert_inline_extent(trans, root, inode, start, |
258 | inline_len, compressed_size, | 258 | inline_len, compressed_size, |
259 | compress_type, compressed_pages); | 259 | compress_type, compressed_pages); |
260 | if (ret) { | 260 | if (ret && ret != -ENOSPC) { |
261 | btrfs_abort_transaction(trans, root, ret); | 261 | btrfs_abort_transaction(trans, root, ret); |
262 | return ret; | 262 | return ret; |
263 | } else if (ret == -ENOSPC) { | ||
264 | return 1; | ||
263 | } | 265 | } |
266 | |||
264 | btrfs_delalloc_release_metadata(inode, end + 1 - start); | 267 | btrfs_delalloc_release_metadata(inode, end + 1 - start); |
265 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); | 268 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
266 | return 0; | 269 | return 0; |
@@ -1572,11 +1575,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1572 | if (btrfs_is_free_space_inode(root, inode)) | 1575 | if (btrfs_is_free_space_inode(root, inode)) |
1573 | metadata = 2; | 1576 | metadata = 2; |
1574 | 1577 | ||
1575 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); | ||
1576 | if (ret) | ||
1577 | return ret; | ||
1578 | |||
1579 | if (!(rw & REQ_WRITE)) { | 1578 | if (!(rw & REQ_WRITE)) { |
1579 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); | ||
1580 | if (ret) | ||
1581 | return ret; | ||
1582 | |||
1580 | if (bio_flags & EXTENT_BIO_COMPRESSED) { | 1583 | if (bio_flags & EXTENT_BIO_COMPRESSED) { |
1581 | return btrfs_submit_compressed_read(inode, bio, | 1584 | return btrfs_submit_compressed_read(inode, bio, |
1582 | mirror_num, bio_flags); | 1585 | mirror_num, bio_flags); |
@@ -1815,25 +1818,24 @@ out: | |||
1815 | * an ordered extent if the range of bytes in the file it covers are | 1818 | * an ordered extent if the range of bytes in the file it covers are |
1816 | * fully written. | 1819 | * fully written. |
1817 | */ | 1820 | */ |
1818 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | 1821 | static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) |
1819 | { | 1822 | { |
1823 | struct inode *inode = ordered_extent->inode; | ||
1820 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1824 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1821 | struct btrfs_trans_handle *trans = NULL; | 1825 | struct btrfs_trans_handle *trans = NULL; |
1822 | struct btrfs_ordered_extent *ordered_extent = NULL; | ||
1823 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1826 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
1824 | struct extent_state *cached_state = NULL; | 1827 | struct extent_state *cached_state = NULL; |
1825 | int compress_type = 0; | 1828 | int compress_type = 0; |
1826 | int ret; | 1829 | int ret; |
1827 | bool nolock; | 1830 | bool nolock; |
1828 | 1831 | ||
1829 | ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, | ||
1830 | end - start + 1); | ||
1831 | if (!ret) | ||
1832 | return 0; | ||
1833 | BUG_ON(!ordered_extent); /* Logic error */ | ||
1834 | |||
1835 | nolock = btrfs_is_free_space_inode(root, inode); | 1832 | nolock = btrfs_is_free_space_inode(root, inode); |
1836 | 1833 | ||
1834 | if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) { | ||
1835 | ret = -EIO; | ||
1836 | goto out; | ||
1837 | } | ||
1838 | |||
1837 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { | 1839 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { |
1838 | BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ | 1840 | BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ |
1839 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1841 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
@@ -1889,12 +1891,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1889 | ordered_extent->file_offset, | 1891 | ordered_extent->file_offset, |
1890 | ordered_extent->len); | 1892 | ordered_extent->len); |
1891 | } | 1893 | } |
1892 | unlock_extent_cached(io_tree, ordered_extent->file_offset, | 1894 | |
1893 | ordered_extent->file_offset + | ||
1894 | ordered_extent->len - 1, &cached_state, GFP_NOFS); | ||
1895 | if (ret < 0) { | 1895 | if (ret < 0) { |
1896 | btrfs_abort_transaction(trans, root, ret); | 1896 | btrfs_abort_transaction(trans, root, ret); |
1897 | goto out; | 1897 | goto out_unlock; |
1898 | } | 1898 | } |
1899 | 1899 | ||
1900 | add_pending_csums(trans, inode, ordered_extent->file_offset, | 1900 | add_pending_csums(trans, inode, ordered_extent->file_offset, |
@@ -1905,10 +1905,14 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1905 | ret = btrfs_update_inode_fallback(trans, root, inode); | 1905 | ret = btrfs_update_inode_fallback(trans, root, inode); |
1906 | if (ret) { /* -ENOMEM or corruption */ | 1906 | if (ret) { /* -ENOMEM or corruption */ |
1907 | btrfs_abort_transaction(trans, root, ret); | 1907 | btrfs_abort_transaction(trans, root, ret); |
1908 | goto out; | 1908 | goto out_unlock; |
1909 | } | 1909 | } |
1910 | } | 1910 | } |
1911 | ret = 0; | 1911 | ret = 0; |
1912 | out_unlock: | ||
1913 | unlock_extent_cached(io_tree, ordered_extent->file_offset, | ||
1914 | ordered_extent->file_offset + | ||
1915 | ordered_extent->len - 1, &cached_state, GFP_NOFS); | ||
1912 | out: | 1916 | out: |
1913 | if (root != root->fs_info->tree_root) | 1917 | if (root != root->fs_info->tree_root) |
1914 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | 1918 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); |
@@ -1919,26 +1923,57 @@ out: | |||
1919 | btrfs_end_transaction(trans, root); | 1923 | btrfs_end_transaction(trans, root); |
1920 | } | 1924 | } |
1921 | 1925 | ||
1926 | if (ret) | ||
1927 | clear_extent_uptodate(io_tree, ordered_extent->file_offset, | ||
1928 | ordered_extent->file_offset + | ||
1929 | ordered_extent->len - 1, NULL, GFP_NOFS); | ||
1930 | |||
1931 | /* | ||
1932 | * This needs to be dont to make sure anybody waiting knows we are done | ||
1933 | * upating everything for this ordered extent. | ||
1934 | */ | ||
1935 | btrfs_remove_ordered_extent(inode, ordered_extent); | ||
1936 | |||
1922 | /* once for us */ | 1937 | /* once for us */ |
1923 | btrfs_put_ordered_extent(ordered_extent); | 1938 | btrfs_put_ordered_extent(ordered_extent); |
1924 | /* once for the tree */ | 1939 | /* once for the tree */ |
1925 | btrfs_put_ordered_extent(ordered_extent); | 1940 | btrfs_put_ordered_extent(ordered_extent); |
1926 | 1941 | ||
1927 | return 0; | 1942 | return ret; |
1928 | out_unlock: | 1943 | } |
1929 | unlock_extent_cached(io_tree, ordered_extent->file_offset, | 1944 | |
1930 | ordered_extent->file_offset + | 1945 | static void finish_ordered_fn(struct btrfs_work *work) |
1931 | ordered_extent->len - 1, &cached_state, GFP_NOFS); | 1946 | { |
1932 | goto out; | 1947 | struct btrfs_ordered_extent *ordered_extent; |
1948 | ordered_extent = container_of(work, struct btrfs_ordered_extent, work); | ||
1949 | btrfs_finish_ordered_io(ordered_extent); | ||
1933 | } | 1950 | } |
1934 | 1951 | ||
1935 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | 1952 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, |
1936 | struct extent_state *state, int uptodate) | 1953 | struct extent_state *state, int uptodate) |
1937 | { | 1954 | { |
1955 | struct inode *inode = page->mapping->host; | ||
1956 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1957 | struct btrfs_ordered_extent *ordered_extent = NULL; | ||
1958 | struct btrfs_workers *workers; | ||
1959 | |||
1938 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); | 1960 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); |
1939 | 1961 | ||
1940 | ClearPagePrivate2(page); | 1962 | ClearPagePrivate2(page); |
1941 | return btrfs_finish_ordered_io(page->mapping->host, start, end); | 1963 | if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, |
1964 | end - start + 1, uptodate)) | ||
1965 | return 0; | ||
1966 | |||
1967 | ordered_extent->work.func = finish_ordered_fn; | ||
1968 | ordered_extent->work.flags = 0; | ||
1969 | |||
1970 | if (btrfs_is_free_space_inode(root, inode)) | ||
1971 | workers = &root->fs_info->endio_freespace_worker; | ||
1972 | else | ||
1973 | workers = &root->fs_info->endio_write_workers; | ||
1974 | btrfs_queue_worker(workers, &ordered_extent->work); | ||
1975 | |||
1976 | return 0; | ||
1942 | } | 1977 | } |
1943 | 1978 | ||
1944 | /* | 1979 | /* |
@@ -2072,12 +2107,12 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | |||
2072 | struct btrfs_block_rsv *block_rsv; | 2107 | struct btrfs_block_rsv *block_rsv; |
2073 | int ret; | 2108 | int ret; |
2074 | 2109 | ||
2075 | if (!list_empty(&root->orphan_list) || | 2110 | if (atomic_read(&root->orphan_inodes) || |
2076 | root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) | 2111 | root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) |
2077 | return; | 2112 | return; |
2078 | 2113 | ||
2079 | spin_lock(&root->orphan_lock); | 2114 | spin_lock(&root->orphan_lock); |
2080 | if (!list_empty(&root->orphan_list)) { | 2115 | if (atomic_read(&root->orphan_inodes)) { |
2081 | spin_unlock(&root->orphan_lock); | 2116 | spin_unlock(&root->orphan_lock); |
2082 | return; | 2117 | return; |
2083 | } | 2118 | } |
@@ -2134,8 +2169,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2134 | block_rsv = NULL; | 2169 | block_rsv = NULL; |
2135 | } | 2170 | } |
2136 | 2171 | ||
2137 | if (list_empty(&BTRFS_I(inode)->i_orphan)) { | 2172 | if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
2138 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2173 | &BTRFS_I(inode)->runtime_flags)) { |
2139 | #if 0 | 2174 | #if 0 |
2140 | /* | 2175 | /* |
2141 | * For proper ENOSPC handling, we should do orphan | 2176 | * For proper ENOSPC handling, we should do orphan |
@@ -2148,12 +2183,12 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2148 | insert = 1; | 2183 | insert = 1; |
2149 | #endif | 2184 | #endif |
2150 | insert = 1; | 2185 | insert = 1; |
2186 | atomic_dec(&root->orphan_inodes); | ||
2151 | } | 2187 | } |
2152 | 2188 | ||
2153 | if (!BTRFS_I(inode)->orphan_meta_reserved) { | 2189 | if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, |
2154 | BTRFS_I(inode)->orphan_meta_reserved = 1; | 2190 | &BTRFS_I(inode)->runtime_flags)) |
2155 | reserve = 1; | 2191 | reserve = 1; |
2156 | } | ||
2157 | spin_unlock(&root->orphan_lock); | 2192 | spin_unlock(&root->orphan_lock); |
2158 | 2193 | ||
2159 | /* grab metadata reservation from transaction handle */ | 2194 | /* grab metadata reservation from transaction handle */ |
@@ -2166,6 +2201,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2166 | if (insert >= 1) { | 2201 | if (insert >= 1) { |
2167 | ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); | 2202 | ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); |
2168 | if (ret && ret != -EEXIST) { | 2203 | if (ret && ret != -EEXIST) { |
2204 | clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, | ||
2205 | &BTRFS_I(inode)->runtime_flags); | ||
2169 | btrfs_abort_transaction(trans, root, ret); | 2206 | btrfs_abort_transaction(trans, root, ret); |
2170 | return ret; | 2207 | return ret; |
2171 | } | 2208 | } |
@@ -2196,15 +2233,13 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2196 | int ret = 0; | 2233 | int ret = 0; |
2197 | 2234 | ||
2198 | spin_lock(&root->orphan_lock); | 2235 | spin_lock(&root->orphan_lock); |
2199 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 2236 | if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
2200 | list_del_init(&BTRFS_I(inode)->i_orphan); | 2237 | &BTRFS_I(inode)->runtime_flags)) |
2201 | delete_item = 1; | 2238 | delete_item = 1; |
2202 | } | ||
2203 | 2239 | ||
2204 | if (BTRFS_I(inode)->orphan_meta_reserved) { | 2240 | if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, |
2205 | BTRFS_I(inode)->orphan_meta_reserved = 0; | 2241 | &BTRFS_I(inode)->runtime_flags)) |
2206 | release_rsv = 1; | 2242 | release_rsv = 1; |
2207 | } | ||
2208 | spin_unlock(&root->orphan_lock); | 2243 | spin_unlock(&root->orphan_lock); |
2209 | 2244 | ||
2210 | if (trans && delete_item) { | 2245 | if (trans && delete_item) { |
@@ -2212,8 +2247,10 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2212 | BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ | 2247 | BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ |
2213 | } | 2248 | } |
2214 | 2249 | ||
2215 | if (release_rsv) | 2250 | if (release_rsv) { |
2216 | btrfs_orphan_release_metadata(inode); | 2251 | btrfs_orphan_release_metadata(inode); |
2252 | atomic_dec(&root->orphan_inodes); | ||
2253 | } | ||
2217 | 2254 | ||
2218 | return 0; | 2255 | return 0; |
2219 | } | 2256 | } |
@@ -2341,6 +2378,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2341 | ret = PTR_ERR(trans); | 2378 | ret = PTR_ERR(trans); |
2342 | goto out; | 2379 | goto out; |
2343 | } | 2380 | } |
2381 | printk(KERN_ERR "auto deleting %Lu\n", | ||
2382 | found_key.objectid); | ||
2344 | ret = btrfs_del_orphan_item(trans, root, | 2383 | ret = btrfs_del_orphan_item(trans, root, |
2345 | found_key.objectid); | 2384 | found_key.objectid); |
2346 | BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ | 2385 | BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ |
@@ -2352,9 +2391,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2352 | * add this inode to the orphan list so btrfs_orphan_del does | 2391 | * add this inode to the orphan list so btrfs_orphan_del does |
2353 | * the proper thing when we hit it | 2392 | * the proper thing when we hit it |
2354 | */ | 2393 | */ |
2355 | spin_lock(&root->orphan_lock); | 2394 | set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
2356 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2395 | &BTRFS_I(inode)->runtime_flags); |
2357 | spin_unlock(&root->orphan_lock); | ||
2358 | 2396 | ||
2359 | /* if we have links, this was a truncate, lets do that */ | 2397 | /* if we have links, this was a truncate, lets do that */ |
2360 | if (inode->i_nlink) { | 2398 | if (inode->i_nlink) { |
@@ -2510,7 +2548,7 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2510 | 2548 | ||
2511 | inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); | 2549 | inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); |
2512 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); | 2550 | BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); |
2513 | BTRFS_I(inode)->sequence = btrfs_inode_sequence(leaf, inode_item); | 2551 | inode->i_version = btrfs_inode_sequence(leaf, inode_item); |
2514 | inode->i_generation = BTRFS_I(inode)->generation; | 2552 | inode->i_generation = BTRFS_I(inode)->generation; |
2515 | inode->i_rdev = 0; | 2553 | inode->i_rdev = 0; |
2516 | rdev = btrfs_inode_rdev(leaf, inode_item); | 2554 | rdev = btrfs_inode_rdev(leaf, inode_item); |
@@ -2594,7 +2632,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2594 | 2632 | ||
2595 | btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); | 2633 | btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); |
2596 | btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); | 2634 | btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); |
2597 | btrfs_set_inode_sequence(leaf, item, BTRFS_I(inode)->sequence); | 2635 | btrfs_set_inode_sequence(leaf, item, inode->i_version); |
2598 | btrfs_set_inode_transid(leaf, item, trans->transid); | 2636 | btrfs_set_inode_transid(leaf, item, trans->transid); |
2599 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | 2637 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); |
2600 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | 2638 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); |
@@ -2752,6 +2790,8 @@ err: | |||
2752 | goto out; | 2790 | goto out; |
2753 | 2791 | ||
2754 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | 2792 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); |
2793 | inode_inc_iversion(inode); | ||
2794 | inode_inc_iversion(dir); | ||
2755 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 2795 | inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
2756 | btrfs_update_inode(trans, root, dir); | 2796 | btrfs_update_inode(trans, root, dir); |
2757 | out: | 2797 | out: |
@@ -3089,6 +3129,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | |||
3089 | } | 3129 | } |
3090 | 3130 | ||
3091 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | 3131 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); |
3132 | inode_inc_iversion(dir); | ||
3092 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 3133 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
3093 | ret = btrfs_update_inode(trans, root, dir); | 3134 | ret = btrfs_update_inode(trans, root, dir); |
3094 | if (ret) | 3135 | if (ret) |
@@ -3607,7 +3648,8 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) | |||
3607 | * any new writes get down to disk quickly. | 3648 | * any new writes get down to disk quickly. |
3608 | */ | 3649 | */ |
3609 | if (newsize == 0) | 3650 | if (newsize == 0) |
3610 | BTRFS_I(inode)->ordered_data_close = 1; | 3651 | set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
3652 | &BTRFS_I(inode)->runtime_flags); | ||
3611 | 3653 | ||
3612 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ | 3654 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ |
3613 | truncate_setsize(inode, newsize); | 3655 | truncate_setsize(inode, newsize); |
@@ -3638,6 +3680,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3638 | 3680 | ||
3639 | if (attr->ia_valid) { | 3681 | if (attr->ia_valid) { |
3640 | setattr_copy(inode, attr); | 3682 | setattr_copy(inode, attr); |
3683 | inode_inc_iversion(inode); | ||
3641 | err = btrfs_dirty_inode(inode); | 3684 | err = btrfs_dirty_inode(inode); |
3642 | 3685 | ||
3643 | if (!err && attr->ia_valid & ATTR_MODE) | 3686 | if (!err && attr->ia_valid & ATTR_MODE) |
@@ -3671,7 +3714,8 @@ void btrfs_evict_inode(struct inode *inode) | |||
3671 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 3714 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
3672 | 3715 | ||
3673 | if (root->fs_info->log_root_recovering) { | 3716 | if (root->fs_info->log_root_recovering) { |
3674 | BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); | 3717 | BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
3718 | &BTRFS_I(inode)->runtime_flags)); | ||
3675 | goto no_delete; | 3719 | goto no_delete; |
3676 | } | 3720 | } |
3677 | 3721 | ||
@@ -4066,7 +4110,7 @@ static struct inode *new_simple_dir(struct super_block *s, | |||
4066 | 4110 | ||
4067 | BTRFS_I(inode)->root = root; | 4111 | BTRFS_I(inode)->root = root; |
4068 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); | 4112 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); |
4069 | BTRFS_I(inode)->dummy_inode = 1; | 4113 | set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags); |
4070 | 4114 | ||
4071 | inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; | 4115 | inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; |
4072 | inode->i_op = &btrfs_dir_ro_inode_operations; | 4116 | inode->i_op = &btrfs_dir_ro_inode_operations; |
@@ -4370,7 +4414,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4370 | int ret = 0; | 4414 | int ret = 0; |
4371 | bool nolock = false; | 4415 | bool nolock = false; |
4372 | 4416 | ||
4373 | if (BTRFS_I(inode)->dummy_inode) | 4417 | if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) |
4374 | return 0; | 4418 | return 0; |
4375 | 4419 | ||
4376 | if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) | 4420 | if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) |
@@ -4403,7 +4447,7 @@ int btrfs_dirty_inode(struct inode *inode) | |||
4403 | struct btrfs_trans_handle *trans; | 4447 | struct btrfs_trans_handle *trans; |
4404 | int ret; | 4448 | int ret; |
4405 | 4449 | ||
4406 | if (BTRFS_I(inode)->dummy_inode) | 4450 | if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) |
4407 | return 0; | 4451 | return 0; |
4408 | 4452 | ||
4409 | trans = btrfs_join_transaction(root); | 4453 | trans = btrfs_join_transaction(root); |
@@ -4431,46 +4475,18 @@ int btrfs_dirty_inode(struct inode *inode) | |||
4431 | * This is a copy of file_update_time. We need this so we can return error on | 4475 | * This is a copy of file_update_time. We need this so we can return error on |
4432 | * ENOSPC for updating the inode in the case of file write and mmap writes. | 4476 | * ENOSPC for updating the inode in the case of file write and mmap writes. |
4433 | */ | 4477 | */ |
4434 | int btrfs_update_time(struct file *file) | 4478 | static int btrfs_update_time(struct inode *inode, struct timespec *now, |
4479 | int flags) | ||
4435 | { | 4480 | { |
4436 | struct inode *inode = file->f_path.dentry->d_inode; | 4481 | if (flags & S_VERSION) |
4437 | struct timespec now; | ||
4438 | int ret; | ||
4439 | enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; | ||
4440 | |||
4441 | /* First try to exhaust all avenues to not sync */ | ||
4442 | if (IS_NOCMTIME(inode)) | ||
4443 | return 0; | ||
4444 | |||
4445 | now = current_fs_time(inode->i_sb); | ||
4446 | if (!timespec_equal(&inode->i_mtime, &now)) | ||
4447 | sync_it = S_MTIME; | ||
4448 | |||
4449 | if (!timespec_equal(&inode->i_ctime, &now)) | ||
4450 | sync_it |= S_CTIME; | ||
4451 | |||
4452 | if (IS_I_VERSION(inode)) | ||
4453 | sync_it |= S_VERSION; | ||
4454 | |||
4455 | if (!sync_it) | ||
4456 | return 0; | ||
4457 | |||
4458 | /* Finally allowed to write? Takes lock. */ | ||
4459 | if (mnt_want_write_file(file)) | ||
4460 | return 0; | ||
4461 | |||
4462 | /* Only change inode inside the lock region */ | ||
4463 | if (sync_it & S_VERSION) | ||
4464 | inode_inc_iversion(inode); | 4482 | inode_inc_iversion(inode); |
4465 | if (sync_it & S_CTIME) | 4483 | if (flags & S_CTIME) |
4466 | inode->i_ctime = now; | 4484 | inode->i_ctime = *now; |
4467 | if (sync_it & S_MTIME) | 4485 | if (flags & S_MTIME) |
4468 | inode->i_mtime = now; | 4486 | inode->i_mtime = *now; |
4469 | ret = btrfs_dirty_inode(inode); | 4487 | if (flags & S_ATIME) |
4470 | if (!ret) | 4488 | inode->i_atime = *now; |
4471 | mark_inode_dirty_sync(inode); | 4489 | return btrfs_dirty_inode(inode); |
4472 | mnt_drop_write(file->f_path.mnt); | ||
4473 | return ret; | ||
4474 | } | 4490 | } |
4475 | 4491 | ||
4476 | /* | 4492 | /* |
@@ -4730,6 +4746,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
4730 | 4746 | ||
4731 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | 4747 | btrfs_i_size_write(parent_inode, parent_inode->i_size + |
4732 | name_len * 2); | 4748 | name_len * 2); |
4749 | inode_inc_iversion(parent_inode); | ||
4733 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | 4750 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; |
4734 | ret = btrfs_update_inode(trans, root, parent_inode); | 4751 | ret = btrfs_update_inode(trans, root, parent_inode); |
4735 | if (ret) | 4752 | if (ret) |
@@ -4937,6 +4954,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4937 | } | 4954 | } |
4938 | 4955 | ||
4939 | btrfs_inc_nlink(inode); | 4956 | btrfs_inc_nlink(inode); |
4957 | inode_inc_iversion(inode); | ||
4940 | inode->i_ctime = CURRENT_TIME; | 4958 | inode->i_ctime = CURRENT_TIME; |
4941 | ihold(inode); | 4959 | ihold(inode); |
4942 | 4960 | ||
@@ -5903,9 +5921,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) | |||
5903 | struct btrfs_dio_private *dip = bio->bi_private; | 5921 | struct btrfs_dio_private *dip = bio->bi_private; |
5904 | struct inode *inode = dip->inode; | 5922 | struct inode *inode = dip->inode; |
5905 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5923 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5906 | struct btrfs_trans_handle *trans; | ||
5907 | struct btrfs_ordered_extent *ordered = NULL; | 5924 | struct btrfs_ordered_extent *ordered = NULL; |
5908 | struct extent_state *cached_state = NULL; | ||
5909 | u64 ordered_offset = dip->logical_offset; | 5925 | u64 ordered_offset = dip->logical_offset; |
5910 | u64 ordered_bytes = dip->bytes; | 5926 | u64 ordered_bytes = dip->bytes; |
5911 | int ret; | 5927 | int ret; |
@@ -5915,73 +5931,14 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) | |||
5915 | again: | 5931 | again: |
5916 | ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, | 5932 | ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, |
5917 | &ordered_offset, | 5933 | &ordered_offset, |
5918 | ordered_bytes); | 5934 | ordered_bytes, !err); |
5919 | if (!ret) | 5935 | if (!ret) |
5920 | goto out_test; | 5936 | goto out_test; |
5921 | 5937 | ||
5922 | BUG_ON(!ordered); | 5938 | ordered->work.func = finish_ordered_fn; |
5923 | 5939 | ordered->work.flags = 0; | |
5924 | trans = btrfs_join_transaction(root); | 5940 | btrfs_queue_worker(&root->fs_info->endio_write_workers, |
5925 | if (IS_ERR(trans)) { | 5941 | &ordered->work); |
5926 | err = -ENOMEM; | ||
5927 | goto out; | ||
5928 | } | ||
5929 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
5930 | |||
5931 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { | ||
5932 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | ||
5933 | if (!ret) | ||
5934 | err = btrfs_update_inode_fallback(trans, root, inode); | ||
5935 | goto out; | ||
5936 | } | ||
5937 | |||
5938 | lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
5939 | ordered->file_offset + ordered->len - 1, 0, | ||
5940 | &cached_state); | ||
5941 | |||
5942 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { | ||
5943 | ret = btrfs_mark_extent_written(trans, inode, | ||
5944 | ordered->file_offset, | ||
5945 | ordered->file_offset + | ||
5946 | ordered->len); | ||
5947 | if (ret) { | ||
5948 | err = ret; | ||
5949 | goto out_unlock; | ||
5950 | } | ||
5951 | } else { | ||
5952 | ret = insert_reserved_file_extent(trans, inode, | ||
5953 | ordered->file_offset, | ||
5954 | ordered->start, | ||
5955 | ordered->disk_len, | ||
5956 | ordered->len, | ||
5957 | ordered->len, | ||
5958 | 0, 0, 0, | ||
5959 | BTRFS_FILE_EXTENT_REG); | ||
5960 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
5961 | ordered->file_offset, ordered->len); | ||
5962 | if (ret) { | ||
5963 | err = ret; | ||
5964 | WARN_ON(1); | ||
5965 | goto out_unlock; | ||
5966 | } | ||
5967 | } | ||
5968 | |||
5969 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | ||
5970 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | ||
5971 | if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) | ||
5972 | btrfs_update_inode_fallback(trans, root, inode); | ||
5973 | ret = 0; | ||
5974 | out_unlock: | ||
5975 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
5976 | ordered->file_offset + ordered->len - 1, | ||
5977 | &cached_state, GFP_NOFS); | ||
5978 | out: | ||
5979 | btrfs_delalloc_release_metadata(inode, ordered->len); | ||
5980 | btrfs_end_transaction(trans, root); | ||
5981 | ordered_offset = ordered->file_offset + ordered->len; | ||
5982 | btrfs_put_ordered_extent(ordered); | ||
5983 | btrfs_put_ordered_extent(ordered); | ||
5984 | |||
5985 | out_test: | 5942 | out_test: |
5986 | /* | 5943 | /* |
5987 | * our bio might span multiple ordered extents. If we haven't | 5944 | * our bio might span multiple ordered extents. If we haven't |
@@ -5990,12 +5947,12 @@ out_test: | |||
5990 | if (ordered_offset < dip->logical_offset + dip->bytes) { | 5947 | if (ordered_offset < dip->logical_offset + dip->bytes) { |
5991 | ordered_bytes = dip->logical_offset + dip->bytes - | 5948 | ordered_bytes = dip->logical_offset + dip->bytes - |
5992 | ordered_offset; | 5949 | ordered_offset; |
5950 | ordered = NULL; | ||
5993 | goto again; | 5951 | goto again; |
5994 | } | 5952 | } |
5995 | out_done: | 5953 | out_done: |
5996 | bio->bi_private = dip->private; | 5954 | bio->bi_private = dip->private; |
5997 | 5955 | ||
5998 | kfree(dip->csums); | ||
5999 | kfree(dip); | 5956 | kfree(dip); |
6000 | 5957 | ||
6001 | /* If we had an error make sure to clear the uptodate flag */ | 5958 | /* If we had an error make sure to clear the uptodate flag */ |
@@ -6063,9 +6020,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
6063 | int ret; | 6020 | int ret; |
6064 | 6021 | ||
6065 | bio_get(bio); | 6022 | bio_get(bio); |
6066 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 6023 | |
6067 | if (ret) | 6024 | if (!write) { |
6068 | goto err; | 6025 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
6026 | if (ret) | ||
6027 | goto err; | ||
6028 | } | ||
6069 | 6029 | ||
6070 | if (skip_sum) | 6030 | if (skip_sum) |
6071 | goto map; | 6031 | goto map; |
@@ -6485,13 +6445,13 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) | |||
6485 | 6445 | ||
6486 | static void btrfs_invalidatepage(struct page *page, unsigned long offset) | 6446 | static void btrfs_invalidatepage(struct page *page, unsigned long offset) |
6487 | { | 6447 | { |
6448 | struct inode *inode = page->mapping->host; | ||
6488 | struct extent_io_tree *tree; | 6449 | struct extent_io_tree *tree; |
6489 | struct btrfs_ordered_extent *ordered; | 6450 | struct btrfs_ordered_extent *ordered; |
6490 | struct extent_state *cached_state = NULL; | 6451 | struct extent_state *cached_state = NULL; |
6491 | u64 page_start = page_offset(page); | 6452 | u64 page_start = page_offset(page); |
6492 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; | 6453 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; |
6493 | 6454 | ||
6494 | |||
6495 | /* | 6455 | /* |
6496 | * we have the page locked, so new writeback can't start, | 6456 | * we have the page locked, so new writeback can't start, |
6497 | * and the dirty bit won't be cleared while we are here. | 6457 | * and the dirty bit won't be cleared while we are here. |
@@ -6501,13 +6461,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
6501 | */ | 6461 | */ |
6502 | wait_on_page_writeback(page); | 6462 | wait_on_page_writeback(page); |
6503 | 6463 | ||
6504 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 6464 | tree = &BTRFS_I(inode)->io_tree; |
6505 | if (offset) { | 6465 | if (offset) { |
6506 | btrfs_releasepage(page, GFP_NOFS); | 6466 | btrfs_releasepage(page, GFP_NOFS); |
6507 | return; | 6467 | return; |
6508 | } | 6468 | } |
6509 | lock_extent_bits(tree, page_start, page_end, 0, &cached_state); | 6469 | lock_extent_bits(tree, page_start, page_end, 0, &cached_state); |
6510 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, | 6470 | ordered = btrfs_lookup_ordered_extent(inode, |
6511 | page_offset(page)); | 6471 | page_offset(page)); |
6512 | if (ordered) { | 6472 | if (ordered) { |
6513 | /* | 6473 | /* |
@@ -6522,9 +6482,10 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
6522 | * whoever cleared the private bit is responsible | 6482 | * whoever cleared the private bit is responsible |
6523 | * for the finish_ordered_io | 6483 | * for the finish_ordered_io |
6524 | */ | 6484 | */ |
6525 | if (TestClearPagePrivate2(page)) { | 6485 | if (TestClearPagePrivate2(page) && |
6526 | btrfs_finish_ordered_io(page->mapping->host, | 6486 | btrfs_dec_test_ordered_pending(inode, &ordered, page_start, |
6527 | page_start, page_end); | 6487 | PAGE_CACHE_SIZE, 1)) { |
6488 | btrfs_finish_ordered_io(ordered); | ||
6528 | } | 6489 | } |
6529 | btrfs_put_ordered_extent(ordered); | 6490 | btrfs_put_ordered_extent(ordered); |
6530 | cached_state = NULL; | 6491 | cached_state = NULL; |
@@ -6576,7 +6537,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
6576 | 6537 | ||
6577 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); | 6538 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
6578 | if (!ret) { | 6539 | if (!ret) { |
6579 | ret = btrfs_update_time(vma->vm_file); | 6540 | ret = file_update_time(vma->vm_file); |
6580 | reserved = 1; | 6541 | reserved = 1; |
6581 | } | 6542 | } |
6582 | if (ret) { | 6543 | if (ret) { |
@@ -6771,7 +6732,8 @@ static int btrfs_truncate(struct inode *inode) | |||
6771 | * using truncate to replace the contents of the file will | 6732 | * using truncate to replace the contents of the file will |
6772 | * end up with a zero length file after a crash. | 6733 | * end up with a zero length file after a crash. |
6773 | */ | 6734 | */ |
6774 | if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) | 6735 | if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
6736 | &BTRFS_I(inode)->runtime_flags)) | ||
6775 | btrfs_add_ordered_operation(trans, root, inode); | 6737 | btrfs_add_ordered_operation(trans, root, inode); |
6776 | 6738 | ||
6777 | while (1) { | 6739 | while (1) { |
@@ -6894,7 +6856,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6894 | ei->root = NULL; | 6856 | ei->root = NULL; |
6895 | ei->space_info = NULL; | 6857 | ei->space_info = NULL; |
6896 | ei->generation = 0; | 6858 | ei->generation = 0; |
6897 | ei->sequence = 0; | ||
6898 | ei->last_trans = 0; | 6859 | ei->last_trans = 0; |
6899 | ei->last_sub_trans = 0; | 6860 | ei->last_sub_trans = 0; |
6900 | ei->logged_trans = 0; | 6861 | ei->logged_trans = 0; |
@@ -6909,11 +6870,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6909 | ei->outstanding_extents = 0; | 6870 | ei->outstanding_extents = 0; |
6910 | ei->reserved_extents = 0; | 6871 | ei->reserved_extents = 0; |
6911 | 6872 | ||
6912 | ei->ordered_data_close = 0; | 6873 | ei->runtime_flags = 0; |
6913 | ei->orphan_meta_reserved = 0; | ||
6914 | ei->dummy_inode = 0; | ||
6915 | ei->in_defrag = 0; | ||
6916 | ei->delalloc_meta_reserved = 0; | ||
6917 | ei->force_compress = BTRFS_COMPRESS_NONE; | 6874 | ei->force_compress = BTRFS_COMPRESS_NONE; |
6918 | 6875 | ||
6919 | ei->delayed_node = NULL; | 6876 | ei->delayed_node = NULL; |
@@ -6927,7 +6884,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6927 | mutex_init(&ei->log_mutex); | 6884 | mutex_init(&ei->log_mutex); |
6928 | mutex_init(&ei->delalloc_mutex); | 6885 | mutex_init(&ei->delalloc_mutex); |
6929 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 6886 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
6930 | INIT_LIST_HEAD(&ei->i_orphan); | ||
6931 | INIT_LIST_HEAD(&ei->delalloc_inodes); | 6887 | INIT_LIST_HEAD(&ei->delalloc_inodes); |
6932 | INIT_LIST_HEAD(&ei->ordered_operations); | 6888 | INIT_LIST_HEAD(&ei->ordered_operations); |
6933 | RB_CLEAR_NODE(&ei->rb_node); | 6889 | RB_CLEAR_NODE(&ei->rb_node); |
@@ -6972,13 +6928,12 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6972 | spin_unlock(&root->fs_info->ordered_extent_lock); | 6928 | spin_unlock(&root->fs_info->ordered_extent_lock); |
6973 | } | 6929 | } |
6974 | 6930 | ||
6975 | spin_lock(&root->orphan_lock); | 6931 | if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
6976 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 6932 | &BTRFS_I(inode)->runtime_flags)) { |
6977 | printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", | 6933 | printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", |
6978 | (unsigned long long)btrfs_ino(inode)); | 6934 | (unsigned long long)btrfs_ino(inode)); |
6979 | list_del_init(&BTRFS_I(inode)->i_orphan); | 6935 | atomic_dec(&root->orphan_inodes); |
6980 | } | 6936 | } |
6981 | spin_unlock(&root->orphan_lock); | ||
6982 | 6937 | ||
6983 | while (1) { | 6938 | while (1) { |
6984 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); | 6939 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); |
@@ -7193,6 +7148,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
7193 | if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode)) | 7148 | if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode)) |
7194 | btrfs_add_ordered_operation(trans, root, old_inode); | 7149 | btrfs_add_ordered_operation(trans, root, old_inode); |
7195 | 7150 | ||
7151 | inode_inc_iversion(old_dir); | ||
7152 | inode_inc_iversion(new_dir); | ||
7153 | inode_inc_iversion(old_inode); | ||
7196 | old_dir->i_ctime = old_dir->i_mtime = ctime; | 7154 | old_dir->i_ctime = old_dir->i_mtime = ctime; |
7197 | new_dir->i_ctime = new_dir->i_mtime = ctime; | 7155 | new_dir->i_ctime = new_dir->i_mtime = ctime; |
7198 | old_inode->i_ctime = ctime; | 7156 | old_inode->i_ctime = ctime; |
@@ -7219,6 +7177,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
7219 | } | 7177 | } |
7220 | 7178 | ||
7221 | if (new_inode) { | 7179 | if (new_inode) { |
7180 | inode_inc_iversion(new_inode); | ||
7222 | new_inode->i_ctime = CURRENT_TIME; | 7181 | new_inode->i_ctime = CURRENT_TIME; |
7223 | if (unlikely(btrfs_ino(new_inode) == | 7182 | if (unlikely(btrfs_ino(new_inode) == |
7224 | BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { | 7183 | BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { |
@@ -7490,6 +7449,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
7490 | cur_offset += ins.offset; | 7449 | cur_offset += ins.offset; |
7491 | *alloc_hint = ins.objectid + ins.offset; | 7450 | *alloc_hint = ins.objectid + ins.offset; |
7492 | 7451 | ||
7452 | inode_inc_iversion(inode); | ||
7493 | inode->i_ctime = CURRENT_TIME; | 7453 | inode->i_ctime = CURRENT_TIME; |
7494 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; | 7454 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; |
7495 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 7455 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
@@ -7647,6 +7607,7 @@ static const struct inode_operations btrfs_file_inode_operations = { | |||
7647 | .permission = btrfs_permission, | 7607 | .permission = btrfs_permission, |
7648 | .fiemap = btrfs_fiemap, | 7608 | .fiemap = btrfs_fiemap, |
7649 | .get_acl = btrfs_get_acl, | 7609 | .get_acl = btrfs_get_acl, |
7610 | .update_time = btrfs_update_time, | ||
7650 | }; | 7611 | }; |
7651 | static const struct inode_operations btrfs_special_inode_operations = { | 7612 | static const struct inode_operations btrfs_special_inode_operations = { |
7652 | .getattr = btrfs_getattr, | 7613 | .getattr = btrfs_getattr, |
@@ -7657,6 +7618,7 @@ static const struct inode_operations btrfs_special_inode_operations = { | |||
7657 | .listxattr = btrfs_listxattr, | 7618 | .listxattr = btrfs_listxattr, |
7658 | .removexattr = btrfs_removexattr, | 7619 | .removexattr = btrfs_removexattr, |
7659 | .get_acl = btrfs_get_acl, | 7620 | .get_acl = btrfs_get_acl, |
7621 | .update_time = btrfs_update_time, | ||
7660 | }; | 7622 | }; |
7661 | static const struct inode_operations btrfs_symlink_inode_operations = { | 7623 | static const struct inode_operations btrfs_symlink_inode_operations = { |
7662 | .readlink = generic_readlink, | 7624 | .readlink = generic_readlink, |
@@ -7670,6 +7632,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { | |||
7670 | .listxattr = btrfs_listxattr, | 7632 | .listxattr = btrfs_listxattr, |
7671 | .removexattr = btrfs_removexattr, | 7633 | .removexattr = btrfs_removexattr, |
7672 | .get_acl = btrfs_get_acl, | 7634 | .get_acl = btrfs_get_acl, |
7635 | .update_time = btrfs_update_time, | ||
7673 | }; | 7636 | }; |
7674 | 7637 | ||
7675 | const struct dentry_operations btrfs_dentry_operations = { | 7638 | const struct dentry_operations btrfs_dentry_operations = { |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 14f8e1faa46e..24b776c08d99 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -261,6 +261,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
261 | } | 261 | } |
262 | 262 | ||
263 | btrfs_update_iflags(inode); | 263 | btrfs_update_iflags(inode); |
264 | inode_inc_iversion(inode); | ||
264 | inode->i_ctime = CURRENT_TIME; | 265 | inode->i_ctime = CURRENT_TIME; |
265 | ret = btrfs_update_inode(trans, root, inode); | 266 | ret = btrfs_update_inode(trans, root, inode); |
266 | 267 | ||
@@ -367,7 +368,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
367 | return PTR_ERR(trans); | 368 | return PTR_ERR(trans); |
368 | 369 | ||
369 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 370 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
370 | 0, objectid, NULL, 0, 0, 0, 0); | 371 | 0, objectid, NULL, 0, 0, 0); |
371 | if (IS_ERR(leaf)) { | 372 | if (IS_ERR(leaf)) { |
372 | ret = PTR_ERR(leaf); | 373 | ret = PTR_ERR(leaf); |
373 | goto fail; | 374 | goto fail; |
@@ -2262,10 +2263,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) | |||
2262 | di_args->bytes_used = dev->bytes_used; | 2263 | di_args->bytes_used = dev->bytes_used; |
2263 | di_args->total_bytes = dev->total_bytes; | 2264 | di_args->total_bytes = dev->total_bytes; |
2264 | memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); | 2265 | memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); |
2265 | if (dev->name) | 2266 | if (dev->name) { |
2266 | strncpy(di_args->path, dev->name, sizeof(di_args->path)); | 2267 | strncpy(di_args->path, dev->name, sizeof(di_args->path)); |
2267 | else | 2268 | di_args->path[sizeof(di_args->path) - 1] = 0; |
2269 | } else { | ||
2268 | di_args->path[0] = '\0'; | 2270 | di_args->path[0] = '\0'; |
2271 | } | ||
2269 | 2272 | ||
2270 | out: | 2273 | out: |
2271 | if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) | 2274 | if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) |
@@ -2622,6 +2625,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
2622 | btrfs_mark_buffer_dirty(leaf); | 2625 | btrfs_mark_buffer_dirty(leaf); |
2623 | btrfs_release_path(path); | 2626 | btrfs_release_path(path); |
2624 | 2627 | ||
2628 | inode_inc_iversion(inode); | ||
2625 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 2629 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
2626 | 2630 | ||
2627 | /* | 2631 | /* |
@@ -2914,7 +2918,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) | |||
2914 | up_read(&info->groups_sem); | 2918 | up_read(&info->groups_sem); |
2915 | } | 2919 | } |
2916 | 2920 | ||
2917 | user_dest = (struct btrfs_ioctl_space_info *) | 2921 | user_dest = (struct btrfs_ioctl_space_info __user *) |
2918 | (arg + sizeof(struct btrfs_ioctl_space_args)); | 2922 | (arg + sizeof(struct btrfs_ioctl_space_args)); |
2919 | 2923 | ||
2920 | if (copy_to_user(user_dest, dest_orig, alloc_size)) | 2924 | if (copy_to_user(user_dest, dest_orig, alloc_size)) |
@@ -3042,6 +3046,28 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, | |||
3042 | return ret; | 3046 | return ret; |
3043 | } | 3047 | } |
3044 | 3048 | ||
3049 | static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root, | ||
3050 | void __user *arg, int reset_after_read) | ||
3051 | { | ||
3052 | struct btrfs_ioctl_get_dev_stats *sa; | ||
3053 | int ret; | ||
3054 | |||
3055 | if (reset_after_read && !capable(CAP_SYS_ADMIN)) | ||
3056 | return -EPERM; | ||
3057 | |||
3058 | sa = memdup_user(arg, sizeof(*sa)); | ||
3059 | if (IS_ERR(sa)) | ||
3060 | return PTR_ERR(sa); | ||
3061 | |||
3062 | ret = btrfs_get_dev_stats(root, sa, reset_after_read); | ||
3063 | |||
3064 | if (copy_to_user(arg, sa, sizeof(*sa))) | ||
3065 | ret = -EFAULT; | ||
3066 | |||
3067 | kfree(sa); | ||
3068 | return ret; | ||
3069 | } | ||
3070 | |||
3045 | static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) | 3071 | static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) |
3046 | { | 3072 | { |
3047 | int ret = 0; | 3073 | int ret = 0; |
@@ -3212,8 +3238,9 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, | |||
3212 | } | 3238 | } |
3213 | } | 3239 | } |
3214 | 3240 | ||
3215 | static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) | 3241 | static long btrfs_ioctl_balance(struct file *file, void __user *arg) |
3216 | { | 3242 | { |
3243 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
3217 | struct btrfs_fs_info *fs_info = root->fs_info; | 3244 | struct btrfs_fs_info *fs_info = root->fs_info; |
3218 | struct btrfs_ioctl_balance_args *bargs; | 3245 | struct btrfs_ioctl_balance_args *bargs; |
3219 | struct btrfs_balance_control *bctl; | 3246 | struct btrfs_balance_control *bctl; |
@@ -3225,6 +3252,10 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg) | |||
3225 | if (fs_info->sb->s_flags & MS_RDONLY) | 3252 | if (fs_info->sb->s_flags & MS_RDONLY) |
3226 | return -EROFS; | 3253 | return -EROFS; |
3227 | 3254 | ||
3255 | ret = mnt_want_write(file->f_path.mnt); | ||
3256 | if (ret) | ||
3257 | return ret; | ||
3258 | |||
3228 | mutex_lock(&fs_info->volume_mutex); | 3259 | mutex_lock(&fs_info->volume_mutex); |
3229 | mutex_lock(&fs_info->balance_mutex); | 3260 | mutex_lock(&fs_info->balance_mutex); |
3230 | 3261 | ||
@@ -3291,6 +3322,7 @@ out_bargs: | |||
3291 | out: | 3322 | out: |
3292 | mutex_unlock(&fs_info->balance_mutex); | 3323 | mutex_unlock(&fs_info->balance_mutex); |
3293 | mutex_unlock(&fs_info->volume_mutex); | 3324 | mutex_unlock(&fs_info->volume_mutex); |
3325 | mnt_drop_write(file->f_path.mnt); | ||
3294 | return ret; | 3326 | return ret; |
3295 | } | 3327 | } |
3296 | 3328 | ||
@@ -3386,7 +3418,7 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
3386 | case BTRFS_IOC_DEV_INFO: | 3418 | case BTRFS_IOC_DEV_INFO: |
3387 | return btrfs_ioctl_dev_info(root, argp); | 3419 | return btrfs_ioctl_dev_info(root, argp); |
3388 | case BTRFS_IOC_BALANCE: | 3420 | case BTRFS_IOC_BALANCE: |
3389 | return btrfs_ioctl_balance(root, NULL); | 3421 | return btrfs_ioctl_balance(file, NULL); |
3390 | case BTRFS_IOC_CLONE: | 3422 | case BTRFS_IOC_CLONE: |
3391 | return btrfs_ioctl_clone(file, arg, 0, 0, 0); | 3423 | return btrfs_ioctl_clone(file, arg, 0, 0, 0); |
3392 | case BTRFS_IOC_CLONE_RANGE: | 3424 | case BTRFS_IOC_CLONE_RANGE: |
@@ -3419,11 +3451,15 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
3419 | case BTRFS_IOC_SCRUB_PROGRESS: | 3451 | case BTRFS_IOC_SCRUB_PROGRESS: |
3420 | return btrfs_ioctl_scrub_progress(root, argp); | 3452 | return btrfs_ioctl_scrub_progress(root, argp); |
3421 | case BTRFS_IOC_BALANCE_V2: | 3453 | case BTRFS_IOC_BALANCE_V2: |
3422 | return btrfs_ioctl_balance(root, argp); | 3454 | return btrfs_ioctl_balance(file, argp); |
3423 | case BTRFS_IOC_BALANCE_CTL: | 3455 | case BTRFS_IOC_BALANCE_CTL: |
3424 | return btrfs_ioctl_balance_ctl(root, arg); | 3456 | return btrfs_ioctl_balance_ctl(root, arg); |
3425 | case BTRFS_IOC_BALANCE_PROGRESS: | 3457 | case BTRFS_IOC_BALANCE_PROGRESS: |
3426 | return btrfs_ioctl_balance_progress(root, argp); | 3458 | return btrfs_ioctl_balance_progress(root, argp); |
3459 | case BTRFS_IOC_GET_DEV_STATS: | ||
3460 | return btrfs_ioctl_get_dev_stats(root, argp, 0); | ||
3461 | case BTRFS_IOC_GET_AND_RESET_DEV_STATS: | ||
3462 | return btrfs_ioctl_get_dev_stats(root, argp, 1); | ||
3427 | } | 3463 | } |
3428 | 3464 | ||
3429 | return -ENOTTY; | 3465 | return -ENOTTY; |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 086e6bdae1c4..497c530724cf 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
@@ -266,6 +266,35 @@ struct btrfs_ioctl_logical_ino_args { | |||
266 | __u64 inodes; | 266 | __u64 inodes; |
267 | }; | 267 | }; |
268 | 268 | ||
269 | enum btrfs_dev_stat_values { | ||
270 | /* disk I/O failure stats */ | ||
271 | BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */ | ||
272 | BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */ | ||
273 | BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */ | ||
274 | |||
275 | /* stats for indirect indications for I/O failures */ | ||
276 | BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or | ||
277 | * contents is illegal: this is an | ||
278 | * indication that the block was damaged | ||
279 | * during read or write, or written to | ||
280 | * wrong location or read from wrong | ||
281 | * location */ | ||
282 | BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not | ||
283 | * been written */ | ||
284 | |||
285 | BTRFS_DEV_STAT_VALUES_MAX | ||
286 | }; | ||
287 | |||
288 | struct btrfs_ioctl_get_dev_stats { | ||
289 | __u64 devid; /* in */ | ||
290 | __u64 nr_items; /* in/out */ | ||
291 | |||
292 | /* out values: */ | ||
293 | __u64 values[BTRFS_DEV_STAT_VALUES_MAX]; | ||
294 | |||
295 | __u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */ | ||
296 | }; | ||
297 | |||
269 | #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ | 298 | #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ |
270 | struct btrfs_ioctl_vol_args) | 299 | struct btrfs_ioctl_vol_args) |
271 | #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ | 300 | #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ |
@@ -330,5 +359,9 @@ struct btrfs_ioctl_logical_ino_args { | |||
330 | struct btrfs_ioctl_ino_path_args) | 359 | struct btrfs_ioctl_ino_path_args) |
331 | #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ | 360 | #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \ |
332 | struct btrfs_ioctl_ino_path_args) | 361 | struct btrfs_ioctl_ino_path_args) |
362 | #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ | ||
363 | struct btrfs_ioctl_get_dev_stats) | ||
364 | #define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \ | ||
365 | struct btrfs_ioctl_get_dev_stats) | ||
333 | 366 | ||
334 | #endif | 367 | #endif |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index bbf6d0d9aebe..9e138cdc36c5 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -196,7 +196,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
196 | entry->len = len; | 196 | entry->len = len; |
197 | entry->disk_len = disk_len; | 197 | entry->disk_len = disk_len; |
198 | entry->bytes_left = len; | 198 | entry->bytes_left = len; |
199 | entry->inode = inode; | 199 | entry->inode = igrab(inode); |
200 | entry->compress_type = compress_type; | 200 | entry->compress_type = compress_type; |
201 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 201 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
202 | set_bit(type, &entry->flags); | 202 | set_bit(type, &entry->flags); |
@@ -212,12 +212,12 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
212 | 212 | ||
213 | trace_btrfs_ordered_extent_add(inode, entry); | 213 | trace_btrfs_ordered_extent_add(inode, entry); |
214 | 214 | ||
215 | spin_lock(&tree->lock); | 215 | spin_lock_irq(&tree->lock); |
216 | node = tree_insert(&tree->tree, file_offset, | 216 | node = tree_insert(&tree->tree, file_offset, |
217 | &entry->rb_node); | 217 | &entry->rb_node); |
218 | if (node) | 218 | if (node) |
219 | ordered_data_tree_panic(inode, -EEXIST, file_offset); | 219 | ordered_data_tree_panic(inode, -EEXIST, file_offset); |
220 | spin_unlock(&tree->lock); | 220 | spin_unlock_irq(&tree->lock); |
221 | 221 | ||
222 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 222 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
223 | list_add_tail(&entry->root_extent_list, | 223 | list_add_tail(&entry->root_extent_list, |
@@ -264,9 +264,9 @@ void btrfs_add_ordered_sum(struct inode *inode, | |||
264 | struct btrfs_ordered_inode_tree *tree; | 264 | struct btrfs_ordered_inode_tree *tree; |
265 | 265 | ||
266 | tree = &BTRFS_I(inode)->ordered_tree; | 266 | tree = &BTRFS_I(inode)->ordered_tree; |
267 | spin_lock(&tree->lock); | 267 | spin_lock_irq(&tree->lock); |
268 | list_add_tail(&sum->list, &entry->list); | 268 | list_add_tail(&sum->list, &entry->list); |
269 | spin_unlock(&tree->lock); | 269 | spin_unlock_irq(&tree->lock); |
270 | } | 270 | } |
271 | 271 | ||
272 | /* | 272 | /* |
@@ -283,18 +283,19 @@ void btrfs_add_ordered_sum(struct inode *inode, | |||
283 | */ | 283 | */ |
284 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, | 284 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, |
285 | struct btrfs_ordered_extent **cached, | 285 | struct btrfs_ordered_extent **cached, |
286 | u64 *file_offset, u64 io_size) | 286 | u64 *file_offset, u64 io_size, int uptodate) |
287 | { | 287 | { |
288 | struct btrfs_ordered_inode_tree *tree; | 288 | struct btrfs_ordered_inode_tree *tree; |
289 | struct rb_node *node; | 289 | struct rb_node *node; |
290 | struct btrfs_ordered_extent *entry = NULL; | 290 | struct btrfs_ordered_extent *entry = NULL; |
291 | int ret; | 291 | int ret; |
292 | unsigned long flags; | ||
292 | u64 dec_end; | 293 | u64 dec_end; |
293 | u64 dec_start; | 294 | u64 dec_start; |
294 | u64 to_dec; | 295 | u64 to_dec; |
295 | 296 | ||
296 | tree = &BTRFS_I(inode)->ordered_tree; | 297 | tree = &BTRFS_I(inode)->ordered_tree; |
297 | spin_lock(&tree->lock); | 298 | spin_lock_irqsave(&tree->lock, flags); |
298 | node = tree_search(tree, *file_offset); | 299 | node = tree_search(tree, *file_offset); |
299 | if (!node) { | 300 | if (!node) { |
300 | ret = 1; | 301 | ret = 1; |
@@ -323,6 +324,9 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, | |||
323 | (unsigned long long)to_dec); | 324 | (unsigned long long)to_dec); |
324 | } | 325 | } |
325 | entry->bytes_left -= to_dec; | 326 | entry->bytes_left -= to_dec; |
327 | if (!uptodate) | ||
328 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); | ||
329 | |||
326 | if (entry->bytes_left == 0) | 330 | if (entry->bytes_left == 0) |
327 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 331 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
328 | else | 332 | else |
@@ -332,7 +336,7 @@ out: | |||
332 | *cached = entry; | 336 | *cached = entry; |
333 | atomic_inc(&entry->refs); | 337 | atomic_inc(&entry->refs); |
334 | } | 338 | } |
335 | spin_unlock(&tree->lock); | 339 | spin_unlock_irqrestore(&tree->lock, flags); |
336 | return ret == 0; | 340 | return ret == 0; |
337 | } | 341 | } |
338 | 342 | ||
@@ -347,15 +351,21 @@ out: | |||
347 | */ | 351 | */ |
348 | int btrfs_dec_test_ordered_pending(struct inode *inode, | 352 | int btrfs_dec_test_ordered_pending(struct inode *inode, |
349 | struct btrfs_ordered_extent **cached, | 353 | struct btrfs_ordered_extent **cached, |
350 | u64 file_offset, u64 io_size) | 354 | u64 file_offset, u64 io_size, int uptodate) |
351 | { | 355 | { |
352 | struct btrfs_ordered_inode_tree *tree; | 356 | struct btrfs_ordered_inode_tree *tree; |
353 | struct rb_node *node; | 357 | struct rb_node *node; |
354 | struct btrfs_ordered_extent *entry = NULL; | 358 | struct btrfs_ordered_extent *entry = NULL; |
359 | unsigned long flags; | ||
355 | int ret; | 360 | int ret; |
356 | 361 | ||
357 | tree = &BTRFS_I(inode)->ordered_tree; | 362 | tree = &BTRFS_I(inode)->ordered_tree; |
358 | spin_lock(&tree->lock); | 363 | spin_lock_irqsave(&tree->lock, flags); |
364 | if (cached && *cached) { | ||
365 | entry = *cached; | ||
366 | goto have_entry; | ||
367 | } | ||
368 | |||
359 | node = tree_search(tree, file_offset); | 369 | node = tree_search(tree, file_offset); |
360 | if (!node) { | 370 | if (!node) { |
361 | ret = 1; | 371 | ret = 1; |
@@ -363,6 +373,7 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
363 | } | 373 | } |
364 | 374 | ||
365 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | 375 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); |
376 | have_entry: | ||
366 | if (!offset_in_entry(entry, file_offset)) { | 377 | if (!offset_in_entry(entry, file_offset)) { |
367 | ret = 1; | 378 | ret = 1; |
368 | goto out; | 379 | goto out; |
@@ -374,6 +385,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
374 | (unsigned long long)io_size); | 385 | (unsigned long long)io_size); |
375 | } | 386 | } |
376 | entry->bytes_left -= io_size; | 387 | entry->bytes_left -= io_size; |
388 | if (!uptodate) | ||
389 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); | ||
390 | |||
377 | if (entry->bytes_left == 0) | 391 | if (entry->bytes_left == 0) |
378 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 392 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
379 | else | 393 | else |
@@ -383,7 +397,7 @@ out: | |||
383 | *cached = entry; | 397 | *cached = entry; |
384 | atomic_inc(&entry->refs); | 398 | atomic_inc(&entry->refs); |
385 | } | 399 | } |
386 | spin_unlock(&tree->lock); | 400 | spin_unlock_irqrestore(&tree->lock, flags); |
387 | return ret == 0; | 401 | return ret == 0; |
388 | } | 402 | } |
389 | 403 | ||
@@ -399,6 +413,8 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | |||
399 | trace_btrfs_ordered_extent_put(entry->inode, entry); | 413 | trace_btrfs_ordered_extent_put(entry->inode, entry); |
400 | 414 | ||
401 | if (atomic_dec_and_test(&entry->refs)) { | 415 | if (atomic_dec_and_test(&entry->refs)) { |
416 | if (entry->inode) | ||
417 | btrfs_add_delayed_iput(entry->inode); | ||
402 | while (!list_empty(&entry->list)) { | 418 | while (!list_empty(&entry->list)) { |
403 | cur = entry->list.next; | 419 | cur = entry->list.next; |
404 | sum = list_entry(cur, struct btrfs_ordered_sum, list); | 420 | sum = list_entry(cur, struct btrfs_ordered_sum, list); |
@@ -411,21 +427,22 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) | |||
411 | 427 | ||
412 | /* | 428 | /* |
413 | * remove an ordered extent from the tree. No references are dropped | 429 | * remove an ordered extent from the tree. No references are dropped |
414 | * and you must wake_up entry->wait. You must hold the tree lock | 430 | * and waiters are woken up. |
415 | * while you call this function. | ||
416 | */ | 431 | */ |
417 | static void __btrfs_remove_ordered_extent(struct inode *inode, | 432 | void btrfs_remove_ordered_extent(struct inode *inode, |
418 | struct btrfs_ordered_extent *entry) | 433 | struct btrfs_ordered_extent *entry) |
419 | { | 434 | { |
420 | struct btrfs_ordered_inode_tree *tree; | 435 | struct btrfs_ordered_inode_tree *tree; |
421 | struct btrfs_root *root = BTRFS_I(inode)->root; | 436 | struct btrfs_root *root = BTRFS_I(inode)->root; |
422 | struct rb_node *node; | 437 | struct rb_node *node; |
423 | 438 | ||
424 | tree = &BTRFS_I(inode)->ordered_tree; | 439 | tree = &BTRFS_I(inode)->ordered_tree; |
440 | spin_lock_irq(&tree->lock); | ||
425 | node = &entry->rb_node; | 441 | node = &entry->rb_node; |
426 | rb_erase(node, &tree->tree); | 442 | rb_erase(node, &tree->tree); |
427 | tree->last = NULL; | 443 | tree->last = NULL; |
428 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 444 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
445 | spin_unlock_irq(&tree->lock); | ||
429 | 446 | ||
430 | spin_lock(&root->fs_info->ordered_extent_lock); | 447 | spin_lock(&root->fs_info->ordered_extent_lock); |
431 | list_del_init(&entry->root_extent_list); | 448 | list_del_init(&entry->root_extent_list); |
@@ -442,21 +459,6 @@ static void __btrfs_remove_ordered_extent(struct inode *inode, | |||
442 | list_del_init(&BTRFS_I(inode)->ordered_operations); | 459 | list_del_init(&BTRFS_I(inode)->ordered_operations); |
443 | } | 460 | } |
444 | spin_unlock(&root->fs_info->ordered_extent_lock); | 461 | spin_unlock(&root->fs_info->ordered_extent_lock); |
445 | } | ||
446 | |||
447 | /* | ||
448 | * remove an ordered extent from the tree. No references are dropped | ||
449 | * but any waiters are woken. | ||
450 | */ | ||
451 | void btrfs_remove_ordered_extent(struct inode *inode, | ||
452 | struct btrfs_ordered_extent *entry) | ||
453 | { | ||
454 | struct btrfs_ordered_inode_tree *tree; | ||
455 | |||
456 | tree = &BTRFS_I(inode)->ordered_tree; | ||
457 | spin_lock(&tree->lock); | ||
458 | __btrfs_remove_ordered_extent(inode, entry); | ||
459 | spin_unlock(&tree->lock); | ||
460 | wake_up(&entry->wait); | 462 | wake_up(&entry->wait); |
461 | } | 463 | } |
462 | 464 | ||
@@ -621,19 +623,11 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
621 | if (orig_end > INT_LIMIT(loff_t)) | 623 | if (orig_end > INT_LIMIT(loff_t)) |
622 | orig_end = INT_LIMIT(loff_t); | 624 | orig_end = INT_LIMIT(loff_t); |
623 | } | 625 | } |
624 | again: | 626 | |
625 | /* start IO across the range first to instantiate any delalloc | 627 | /* start IO across the range first to instantiate any delalloc |
626 | * extents | 628 | * extents |
627 | */ | 629 | */ |
628 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); | 630 | filemap_write_and_wait_range(inode->i_mapping, start, orig_end); |
629 | |||
630 | /* The compression code will leave pages locked but return from | ||
631 | * writepage without setting the page writeback. Starting again | ||
632 | * with WB_SYNC_ALL will end up waiting for the IO to actually start. | ||
633 | */ | ||
634 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); | ||
635 | |||
636 | filemap_fdatawait_range(inode->i_mapping, start, orig_end); | ||
637 | 631 | ||
638 | end = orig_end; | 632 | end = orig_end; |
639 | found = 0; | 633 | found = 0; |
@@ -657,11 +651,6 @@ again: | |||
657 | break; | 651 | break; |
658 | end--; | 652 | end--; |
659 | } | 653 | } |
660 | if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, | ||
661 | EXTENT_DELALLOC, 0, NULL)) { | ||
662 | schedule_timeout(1); | ||
663 | goto again; | ||
664 | } | ||
665 | } | 654 | } |
666 | 655 | ||
667 | /* | 656 | /* |
@@ -676,7 +665,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, | |||
676 | struct btrfs_ordered_extent *entry = NULL; | 665 | struct btrfs_ordered_extent *entry = NULL; |
677 | 666 | ||
678 | tree = &BTRFS_I(inode)->ordered_tree; | 667 | tree = &BTRFS_I(inode)->ordered_tree; |
679 | spin_lock(&tree->lock); | 668 | spin_lock_irq(&tree->lock); |
680 | node = tree_search(tree, file_offset); | 669 | node = tree_search(tree, file_offset); |
681 | if (!node) | 670 | if (!node) |
682 | goto out; | 671 | goto out; |
@@ -687,7 +676,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, | |||
687 | if (entry) | 676 | if (entry) |
688 | atomic_inc(&entry->refs); | 677 | atomic_inc(&entry->refs); |
689 | out: | 678 | out: |
690 | spin_unlock(&tree->lock); | 679 | spin_unlock_irq(&tree->lock); |
691 | return entry; | 680 | return entry; |
692 | } | 681 | } |
693 | 682 | ||
@@ -703,7 +692,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | |||
703 | struct btrfs_ordered_extent *entry = NULL; | 692 | struct btrfs_ordered_extent *entry = NULL; |
704 | 693 | ||
705 | tree = &BTRFS_I(inode)->ordered_tree; | 694 | tree = &BTRFS_I(inode)->ordered_tree; |
706 | spin_lock(&tree->lock); | 695 | spin_lock_irq(&tree->lock); |
707 | node = tree_search(tree, file_offset); | 696 | node = tree_search(tree, file_offset); |
708 | if (!node) { | 697 | if (!node) { |
709 | node = tree_search(tree, file_offset + len); | 698 | node = tree_search(tree, file_offset + len); |
@@ -728,7 +717,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | |||
728 | out: | 717 | out: |
729 | if (entry) | 718 | if (entry) |
730 | atomic_inc(&entry->refs); | 719 | atomic_inc(&entry->refs); |
731 | spin_unlock(&tree->lock); | 720 | spin_unlock_irq(&tree->lock); |
732 | return entry; | 721 | return entry; |
733 | } | 722 | } |
734 | 723 | ||
@@ -744,7 +733,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) | |||
744 | struct btrfs_ordered_extent *entry = NULL; | 733 | struct btrfs_ordered_extent *entry = NULL; |
745 | 734 | ||
746 | tree = &BTRFS_I(inode)->ordered_tree; | 735 | tree = &BTRFS_I(inode)->ordered_tree; |
747 | spin_lock(&tree->lock); | 736 | spin_lock_irq(&tree->lock); |
748 | node = tree_search(tree, file_offset); | 737 | node = tree_search(tree, file_offset); |
749 | if (!node) | 738 | if (!node) |
750 | goto out; | 739 | goto out; |
@@ -752,7 +741,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) | |||
752 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | 741 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); |
753 | atomic_inc(&entry->refs); | 742 | atomic_inc(&entry->refs); |
754 | out: | 743 | out: |
755 | spin_unlock(&tree->lock); | 744 | spin_unlock_irq(&tree->lock); |
756 | return entry; | 745 | return entry; |
757 | } | 746 | } |
758 | 747 | ||
@@ -764,7 +753,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
764 | struct btrfs_ordered_extent *ordered) | 753 | struct btrfs_ordered_extent *ordered) |
765 | { | 754 | { |
766 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; | 755 | struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; |
767 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
768 | u64 disk_i_size; | 756 | u64 disk_i_size; |
769 | u64 new_i_size; | 757 | u64 new_i_size; |
770 | u64 i_size_test; | 758 | u64 i_size_test; |
@@ -779,7 +767,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
779 | else | 767 | else |
780 | offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); | 768 | offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); |
781 | 769 | ||
782 | spin_lock(&tree->lock); | 770 | spin_lock_irq(&tree->lock); |
783 | disk_i_size = BTRFS_I(inode)->disk_i_size; | 771 | disk_i_size = BTRFS_I(inode)->disk_i_size; |
784 | 772 | ||
785 | /* truncate file */ | 773 | /* truncate file */ |
@@ -798,14 +786,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
798 | } | 786 | } |
799 | 787 | ||
800 | /* | 788 | /* |
801 | * we can't update the disk_isize if there are delalloc bytes | ||
802 | * between disk_i_size and this ordered extent | ||
803 | */ | ||
804 | if (test_range_bit(io_tree, disk_i_size, offset - 1, | ||
805 | EXTENT_DELALLOC, 0, NULL)) { | ||
806 | goto out; | ||
807 | } | ||
808 | /* | ||
809 | * walk backward from this ordered extent to disk_i_size. | 789 | * walk backward from this ordered extent to disk_i_size. |
810 | * if we find an ordered extent then we can't update disk i_size | 790 | * if we find an ordered extent then we can't update disk i_size |
811 | * yet | 791 | * yet |
@@ -825,15 +805,18 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
825 | } | 805 | } |
826 | node = prev; | 806 | node = prev; |
827 | } | 807 | } |
828 | while (node) { | 808 | for (; node; node = rb_prev(node)) { |
829 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); | 809 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); |
810 | |||
811 | /* We treat this entry as if it doesnt exist */ | ||
812 | if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) | ||
813 | continue; | ||
830 | if (test->file_offset + test->len <= disk_i_size) | 814 | if (test->file_offset + test->len <= disk_i_size) |
831 | break; | 815 | break; |
832 | if (test->file_offset >= i_size) | 816 | if (test->file_offset >= i_size) |
833 | break; | 817 | break; |
834 | if (test->file_offset >= disk_i_size) | 818 | if (test->file_offset >= disk_i_size) |
835 | goto out; | 819 | goto out; |
836 | node = rb_prev(node); | ||
837 | } | 820 | } |
838 | new_i_size = min_t(u64, offset, i_size); | 821 | new_i_size = min_t(u64, offset, i_size); |
839 | 822 | ||
@@ -851,43 +834,49 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
851 | else | 834 | else |
852 | node = rb_first(&tree->tree); | 835 | node = rb_first(&tree->tree); |
853 | } | 836 | } |
854 | i_size_test = 0; | 837 | |
855 | if (node) { | 838 | /* |
856 | /* | 839 | * We are looking for an area between our current extent and the next |
857 | * do we have an area where IO might have finished | 840 | * ordered extent to update the i_size to. There are 3 cases here |
858 | * between our ordered extent and the next one. | 841 | * |
859 | */ | 842 | * 1) We don't actually have anything and we can update to i_size. |
843 | * 2) We have stuff but they already did their i_size update so again we | ||
844 | * can just update to i_size. | ||
845 | * 3) We have an outstanding ordered extent so the most we can update | ||
846 | * our disk_i_size to is the start of the next offset. | ||
847 | */ | ||
848 | i_size_test = i_size; | ||
849 | for (; node; node = rb_next(node)) { | ||
860 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); | 850 | test = rb_entry(node, struct btrfs_ordered_extent, rb_node); |
861 | if (test->file_offset > offset) | 851 | |
852 | if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) | ||
853 | continue; | ||
854 | if (test->file_offset > offset) { | ||
862 | i_size_test = test->file_offset; | 855 | i_size_test = test->file_offset; |
863 | } else { | 856 | break; |
864 | i_size_test = i_size; | 857 | } |
865 | } | 858 | } |
866 | 859 | ||
867 | /* | 860 | /* |
868 | * i_size_test is the end of a region after this ordered | 861 | * i_size_test is the end of a region after this ordered |
869 | * extent where there are no ordered extents. As long as there | 862 | * extent where there are no ordered extents, we can safely set |
870 | * are no delalloc bytes in this area, it is safe to update | 863 | * disk_i_size to this. |
871 | * disk_i_size to the end of the region. | ||
872 | */ | 864 | */ |
873 | if (i_size_test > offset && | 865 | if (i_size_test > offset) |
874 | !test_range_bit(io_tree, offset, i_size_test - 1, | ||
875 | EXTENT_DELALLOC, 0, NULL)) { | ||
876 | new_i_size = min_t(u64, i_size_test, i_size); | 866 | new_i_size = min_t(u64, i_size_test, i_size); |
877 | } | ||
878 | BTRFS_I(inode)->disk_i_size = new_i_size; | 867 | BTRFS_I(inode)->disk_i_size = new_i_size; |
879 | ret = 0; | 868 | ret = 0; |
880 | out: | 869 | out: |
881 | /* | 870 | /* |
882 | * we need to remove the ordered extent with the tree lock held | 871 | * We need to do this because we can't remove ordered extents until |
883 | * so that other people calling this function don't find our fully | 872 | * after the i_disk_size has been updated and then the inode has been |
884 | * processed ordered entry and skip updating the i_size | 873 | * updated to reflect the change, so we need to tell anybody who finds |
874 | * this ordered extent that we've already done all the real work, we | ||
875 | * just haven't completed all the other work. | ||
885 | */ | 876 | */ |
886 | if (ordered) | 877 | if (ordered) |
887 | __btrfs_remove_ordered_extent(inode, ordered); | 878 | set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags); |
888 | spin_unlock(&tree->lock); | 879 | spin_unlock_irq(&tree->lock); |
889 | if (ordered) | ||
890 | wake_up(&ordered->wait); | ||
891 | return ret; | 880 | return ret; |
892 | } | 881 | } |
893 | 882 | ||
@@ -912,7 +901,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | |||
912 | if (!ordered) | 901 | if (!ordered) |
913 | return 1; | 902 | return 1; |
914 | 903 | ||
915 | spin_lock(&tree->lock); | 904 | spin_lock_irq(&tree->lock); |
916 | list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { | 905 | list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { |
917 | if (disk_bytenr >= ordered_sum->bytenr) { | 906 | if (disk_bytenr >= ordered_sum->bytenr) { |
918 | num_sectors = ordered_sum->len / sectorsize; | 907 | num_sectors = ordered_sum->len / sectorsize; |
@@ -927,7 +916,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, | |||
927 | } | 916 | } |
928 | } | 917 | } |
929 | out: | 918 | out: |
930 | spin_unlock(&tree->lock); | 919 | spin_unlock_irq(&tree->lock); |
931 | btrfs_put_ordered_extent(ordered); | 920 | btrfs_put_ordered_extent(ordered); |
932 | return ret; | 921 | return ret; |
933 | } | 922 | } |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index c355ad4dc1a6..e03c560d2997 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -74,6 +74,12 @@ struct btrfs_ordered_sum { | |||
74 | 74 | ||
75 | #define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ | 75 | #define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ |
76 | 76 | ||
77 | #define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */ | ||
78 | |||
79 | #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent | ||
80 | * has done its due diligence in updating | ||
81 | * the isize. */ | ||
82 | |||
77 | struct btrfs_ordered_extent { | 83 | struct btrfs_ordered_extent { |
78 | /* logical offset in the file */ | 84 | /* logical offset in the file */ |
79 | u64 file_offset; | 85 | u64 file_offset; |
@@ -113,6 +119,8 @@ struct btrfs_ordered_extent { | |||
113 | 119 | ||
114 | /* a per root list of all the pending ordered extents */ | 120 | /* a per root list of all the pending ordered extents */ |
115 | struct list_head root_extent_list; | 121 | struct list_head root_extent_list; |
122 | |||
123 | struct btrfs_work work; | ||
116 | }; | 124 | }; |
117 | 125 | ||
118 | 126 | ||
@@ -143,10 +151,11 @@ void btrfs_remove_ordered_extent(struct inode *inode, | |||
143 | struct btrfs_ordered_extent *entry); | 151 | struct btrfs_ordered_extent *entry); |
144 | int btrfs_dec_test_ordered_pending(struct inode *inode, | 152 | int btrfs_dec_test_ordered_pending(struct inode *inode, |
145 | struct btrfs_ordered_extent **cached, | 153 | struct btrfs_ordered_extent **cached, |
146 | u64 file_offset, u64 io_size); | 154 | u64 file_offset, u64 io_size, int uptodate); |
147 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, | 155 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, |
148 | struct btrfs_ordered_extent **cached, | 156 | struct btrfs_ordered_extent **cached, |
149 | u64 *file_offset, u64 io_size); | 157 | u64 *file_offset, u64 io_size, |
158 | int uptodate); | ||
150 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 159 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
151 | u64 start, u64 len, u64 disk_len, int type); | 160 | u64 start, u64 len, u64 disk_len, int type); |
152 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | 161 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index f38e452486b8..5e23684887eb 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
@@ -294,6 +294,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
294 | btrfs_dev_extent_chunk_offset(l, dev_extent), | 294 | btrfs_dev_extent_chunk_offset(l, dev_extent), |
295 | (unsigned long long) | 295 | (unsigned long long) |
296 | btrfs_dev_extent_length(l, dev_extent)); | 296 | btrfs_dev_extent_length(l, dev_extent)); |
297 | case BTRFS_DEV_STATS_KEY: | ||
298 | printk(KERN_INFO "\t\tdevice stats\n"); | ||
299 | break; | ||
297 | }; | 300 | }; |
298 | } | 301 | } |
299 | } | 302 | } |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index ac5d01085884..48a4882d8ad5 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
@@ -718,13 +718,18 @@ static void reada_start_machine_worker(struct btrfs_work *work) | |||
718 | { | 718 | { |
719 | struct reada_machine_work *rmw; | 719 | struct reada_machine_work *rmw; |
720 | struct btrfs_fs_info *fs_info; | 720 | struct btrfs_fs_info *fs_info; |
721 | int old_ioprio; | ||
721 | 722 | ||
722 | rmw = container_of(work, struct reada_machine_work, work); | 723 | rmw = container_of(work, struct reada_machine_work, work); |
723 | fs_info = rmw->fs_info; | 724 | fs_info = rmw->fs_info; |
724 | 725 | ||
725 | kfree(rmw); | 726 | kfree(rmw); |
726 | 727 | ||
728 | old_ioprio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current), | ||
729 | task_nice_ioprio(current)); | ||
730 | set_task_ioprio(current, BTRFS_IOPRIO_READA); | ||
727 | __reada_start_machine(fs_info); | 731 | __reada_start_machine(fs_info); |
732 | set_task_ioprio(current, old_ioprio); | ||
728 | } | 733 | } |
729 | 734 | ||
730 | static void __reada_start_machine(struct btrfs_fs_info *fs_info) | 735 | static void __reada_start_machine(struct btrfs_fs_info *fs_info) |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2f3d6f917fb3..a38cfa4f251e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -50,7 +50,7 @@ struct scrub_dev; | |||
50 | struct scrub_page { | 50 | struct scrub_page { |
51 | struct scrub_block *sblock; | 51 | struct scrub_block *sblock; |
52 | struct page *page; | 52 | struct page *page; |
53 | struct block_device *bdev; | 53 | struct btrfs_device *dev; |
54 | u64 flags; /* extent flags */ | 54 | u64 flags; /* extent flags */ |
55 | u64 generation; | 55 | u64 generation; |
56 | u64 logical; | 56 | u64 logical; |
@@ -86,6 +86,7 @@ struct scrub_block { | |||
86 | unsigned int header_error:1; | 86 | unsigned int header_error:1; |
87 | unsigned int checksum_error:1; | 87 | unsigned int checksum_error:1; |
88 | unsigned int no_io_error_seen:1; | 88 | unsigned int no_io_error_seen:1; |
89 | unsigned int generation_error:1; /* also sets header_error */ | ||
89 | }; | 90 | }; |
90 | }; | 91 | }; |
91 | 92 | ||
@@ -675,6 +676,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
675 | sdev->stat.read_errors++; | 676 | sdev->stat.read_errors++; |
676 | sdev->stat.uncorrectable_errors++; | 677 | sdev->stat.uncorrectable_errors++; |
677 | spin_unlock(&sdev->stat_lock); | 678 | spin_unlock(&sdev->stat_lock); |
679 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
680 | BTRFS_DEV_STAT_READ_ERRS); | ||
678 | goto out; | 681 | goto out; |
679 | } | 682 | } |
680 | 683 | ||
@@ -686,6 +689,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
686 | sdev->stat.read_errors++; | 689 | sdev->stat.read_errors++; |
687 | sdev->stat.uncorrectable_errors++; | 690 | sdev->stat.uncorrectable_errors++; |
688 | spin_unlock(&sdev->stat_lock); | 691 | spin_unlock(&sdev->stat_lock); |
692 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
693 | BTRFS_DEV_STAT_READ_ERRS); | ||
689 | goto out; | 694 | goto out; |
690 | } | 695 | } |
691 | BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS); | 696 | BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS); |
@@ -699,6 +704,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
699 | sdev->stat.read_errors++; | 704 | sdev->stat.read_errors++; |
700 | sdev->stat.uncorrectable_errors++; | 705 | sdev->stat.uncorrectable_errors++; |
701 | spin_unlock(&sdev->stat_lock); | 706 | spin_unlock(&sdev->stat_lock); |
707 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
708 | BTRFS_DEV_STAT_READ_ERRS); | ||
702 | goto out; | 709 | goto out; |
703 | } | 710 | } |
704 | 711 | ||
@@ -725,12 +732,16 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
725 | spin_unlock(&sdev->stat_lock); | 732 | spin_unlock(&sdev->stat_lock); |
726 | if (__ratelimit(&_rs)) | 733 | if (__ratelimit(&_rs)) |
727 | scrub_print_warning("i/o error", sblock_to_check); | 734 | scrub_print_warning("i/o error", sblock_to_check); |
735 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
736 | BTRFS_DEV_STAT_READ_ERRS); | ||
728 | } else if (sblock_bad->checksum_error) { | 737 | } else if (sblock_bad->checksum_error) { |
729 | spin_lock(&sdev->stat_lock); | 738 | spin_lock(&sdev->stat_lock); |
730 | sdev->stat.csum_errors++; | 739 | sdev->stat.csum_errors++; |
731 | spin_unlock(&sdev->stat_lock); | 740 | spin_unlock(&sdev->stat_lock); |
732 | if (__ratelimit(&_rs)) | 741 | if (__ratelimit(&_rs)) |
733 | scrub_print_warning("checksum error", sblock_to_check); | 742 | scrub_print_warning("checksum error", sblock_to_check); |
743 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
744 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | ||
734 | } else if (sblock_bad->header_error) { | 745 | } else if (sblock_bad->header_error) { |
735 | spin_lock(&sdev->stat_lock); | 746 | spin_lock(&sdev->stat_lock); |
736 | sdev->stat.verify_errors++; | 747 | sdev->stat.verify_errors++; |
@@ -738,6 +749,12 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
738 | if (__ratelimit(&_rs)) | 749 | if (__ratelimit(&_rs)) |
739 | scrub_print_warning("checksum/header error", | 750 | scrub_print_warning("checksum/header error", |
740 | sblock_to_check); | 751 | sblock_to_check); |
752 | if (sblock_bad->generation_error) | ||
753 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
754 | BTRFS_DEV_STAT_GENERATION_ERRS); | ||
755 | else | ||
756 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
757 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | ||
741 | } | 758 | } |
742 | 759 | ||
743 | if (sdev->readonly) | 760 | if (sdev->readonly) |
@@ -998,8 +1015,8 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev, | |||
998 | page = sblock->pagev + page_index; | 1015 | page = sblock->pagev + page_index; |
999 | page->logical = logical; | 1016 | page->logical = logical; |
1000 | page->physical = bbio->stripes[mirror_index].physical; | 1017 | page->physical = bbio->stripes[mirror_index].physical; |
1001 | /* for missing devices, bdev is NULL */ | 1018 | /* for missing devices, dev->bdev is NULL */ |
1002 | page->bdev = bbio->stripes[mirror_index].dev->bdev; | 1019 | page->dev = bbio->stripes[mirror_index].dev; |
1003 | page->mirror_num = mirror_index + 1; | 1020 | page->mirror_num = mirror_index + 1; |
1004 | page->page = alloc_page(GFP_NOFS); | 1021 | page->page = alloc_page(GFP_NOFS); |
1005 | if (!page->page) { | 1022 | if (!page->page) { |
@@ -1043,7 +1060,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
1043 | struct scrub_page *page = sblock->pagev + page_num; | 1060 | struct scrub_page *page = sblock->pagev + page_num; |
1044 | DECLARE_COMPLETION_ONSTACK(complete); | 1061 | DECLARE_COMPLETION_ONSTACK(complete); |
1045 | 1062 | ||
1046 | if (page->bdev == NULL) { | 1063 | if (page->dev->bdev == NULL) { |
1047 | page->io_error = 1; | 1064 | page->io_error = 1; |
1048 | sblock->no_io_error_seen = 0; | 1065 | sblock->no_io_error_seen = 0; |
1049 | continue; | 1066 | continue; |
@@ -1053,7 +1070,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
1053 | bio = bio_alloc(GFP_NOFS, 1); | 1070 | bio = bio_alloc(GFP_NOFS, 1); |
1054 | if (!bio) | 1071 | if (!bio) |
1055 | return -EIO; | 1072 | return -EIO; |
1056 | bio->bi_bdev = page->bdev; | 1073 | bio->bi_bdev = page->dev->bdev; |
1057 | bio->bi_sector = page->physical >> 9; | 1074 | bio->bi_sector = page->physical >> 9; |
1058 | bio->bi_end_io = scrub_complete_bio_end_io; | 1075 | bio->bi_end_io = scrub_complete_bio_end_io; |
1059 | bio->bi_private = &complete; | 1076 | bio->bi_private = &complete; |
@@ -1102,11 +1119,14 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
1102 | h = (struct btrfs_header *)mapped_buffer; | 1119 | h = (struct btrfs_header *)mapped_buffer; |
1103 | 1120 | ||
1104 | if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) || | 1121 | if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) || |
1105 | generation != le64_to_cpu(h->generation) || | ||
1106 | memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || | 1122 | memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || |
1107 | memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, | 1123 | memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, |
1108 | BTRFS_UUID_SIZE)) | 1124 | BTRFS_UUID_SIZE)) { |
1109 | sblock->header_error = 1; | 1125 | sblock->header_error = 1; |
1126 | } else if (generation != le64_to_cpu(h->generation)) { | ||
1127 | sblock->header_error = 1; | ||
1128 | sblock->generation_error = 1; | ||
1129 | } | ||
1110 | csum = h->csum; | 1130 | csum = h->csum; |
1111 | } else { | 1131 | } else { |
1112 | if (!have_csum) | 1132 | if (!have_csum) |
@@ -1182,7 +1202,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
1182 | bio = bio_alloc(GFP_NOFS, 1); | 1202 | bio = bio_alloc(GFP_NOFS, 1); |
1183 | if (!bio) | 1203 | if (!bio) |
1184 | return -EIO; | 1204 | return -EIO; |
1185 | bio->bi_bdev = page_bad->bdev; | 1205 | bio->bi_bdev = page_bad->dev->bdev; |
1186 | bio->bi_sector = page_bad->physical >> 9; | 1206 | bio->bi_sector = page_bad->physical >> 9; |
1187 | bio->bi_end_io = scrub_complete_bio_end_io; | 1207 | bio->bi_end_io = scrub_complete_bio_end_io; |
1188 | bio->bi_private = &complete; | 1208 | bio->bi_private = &complete; |
@@ -1196,6 +1216,12 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
1196 | 1216 | ||
1197 | /* this will also unplug the queue */ | 1217 | /* this will also unplug the queue */ |
1198 | wait_for_completion(&complete); | 1218 | wait_for_completion(&complete); |
1219 | if (!bio_flagged(bio, BIO_UPTODATE)) { | ||
1220 | btrfs_dev_stat_inc_and_print(page_bad->dev, | ||
1221 | BTRFS_DEV_STAT_WRITE_ERRS); | ||
1222 | bio_put(bio); | ||
1223 | return -EIO; | ||
1224 | } | ||
1199 | bio_put(bio); | 1225 | bio_put(bio); |
1200 | } | 1226 | } |
1201 | 1227 | ||
@@ -1352,7 +1378,8 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
1352 | u64 mapped_size; | 1378 | u64 mapped_size; |
1353 | void *p; | 1379 | void *p; |
1354 | u32 crc = ~(u32)0; | 1380 | u32 crc = ~(u32)0; |
1355 | int fail = 0; | 1381 | int fail_gen = 0; |
1382 | int fail_cor = 0; | ||
1356 | u64 len; | 1383 | u64 len; |
1357 | int index; | 1384 | int index; |
1358 | 1385 | ||
@@ -1363,13 +1390,13 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
1363 | memcpy(on_disk_csum, s->csum, sdev->csum_size); | 1390 | memcpy(on_disk_csum, s->csum, sdev->csum_size); |
1364 | 1391 | ||
1365 | if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr)) | 1392 | if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr)) |
1366 | ++fail; | 1393 | ++fail_cor; |
1367 | 1394 | ||
1368 | if (sblock->pagev[0].generation != le64_to_cpu(s->generation)) | 1395 | if (sblock->pagev[0].generation != le64_to_cpu(s->generation)) |
1369 | ++fail; | 1396 | ++fail_gen; |
1370 | 1397 | ||
1371 | if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) | 1398 | if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) |
1372 | ++fail; | 1399 | ++fail_cor; |
1373 | 1400 | ||
1374 | len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE; | 1401 | len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE; |
1375 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; | 1402 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; |
@@ -1394,9 +1421,9 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
1394 | 1421 | ||
1395 | btrfs_csum_final(crc, calculated_csum); | 1422 | btrfs_csum_final(crc, calculated_csum); |
1396 | if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) | 1423 | if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) |
1397 | ++fail; | 1424 | ++fail_cor; |
1398 | 1425 | ||
1399 | if (fail) { | 1426 | if (fail_cor + fail_gen) { |
1400 | /* | 1427 | /* |
1401 | * if we find an error in a super block, we just report it. | 1428 | * if we find an error in a super block, we just report it. |
1402 | * They will get written with the next transaction commit | 1429 | * They will get written with the next transaction commit |
@@ -1405,9 +1432,15 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
1405 | spin_lock(&sdev->stat_lock); | 1432 | spin_lock(&sdev->stat_lock); |
1406 | ++sdev->stat.super_errors; | 1433 | ++sdev->stat.super_errors; |
1407 | spin_unlock(&sdev->stat_lock); | 1434 | spin_unlock(&sdev->stat_lock); |
1435 | if (fail_cor) | ||
1436 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
1437 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | ||
1438 | else | ||
1439 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
1440 | BTRFS_DEV_STAT_GENERATION_ERRS); | ||
1408 | } | 1441 | } |
1409 | 1442 | ||
1410 | return fail; | 1443 | return fail_cor + fail_gen; |
1411 | } | 1444 | } |
1412 | 1445 | ||
1413 | static void scrub_block_get(struct scrub_block *sblock) | 1446 | static void scrub_block_get(struct scrub_block *sblock) |
@@ -1551,7 +1584,7 @@ static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len, | |||
1551 | return -ENOMEM; | 1584 | return -ENOMEM; |
1552 | } | 1585 | } |
1553 | spage->sblock = sblock; | 1586 | spage->sblock = sblock; |
1554 | spage->bdev = sdev->dev->bdev; | 1587 | spage->dev = sdev->dev; |
1555 | spage->flags = flags; | 1588 | spage->flags = flags; |
1556 | spage->generation = gen; | 1589 | spage->generation = gen; |
1557 | spage->logical = logical; | 1590 | spage->logical = logical; |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c5f8fca4195f..96eb9fef7bd2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -188,7 +188,8 @@ void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...) | |||
188 | va_start(args, fmt); | 188 | va_start(args, fmt); |
189 | 189 | ||
190 | if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') { | 190 | if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') { |
191 | strncpy(lvl, fmt, 3); | 191 | memcpy(lvl, fmt, 3); |
192 | lvl[3] = '\0'; | ||
192 | fmt += 3; | 193 | fmt += 3; |
193 | type = logtypes[fmt[1] - '0']; | 194 | type = logtypes[fmt[1] - '0']; |
194 | } else | 195 | } else |
@@ -435,11 +436,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
435 | case Opt_thread_pool: | 436 | case Opt_thread_pool: |
436 | intarg = 0; | 437 | intarg = 0; |
437 | match_int(&args[0], &intarg); | 438 | match_int(&args[0], &intarg); |
438 | if (intarg) { | 439 | if (intarg) |
439 | info->thread_pool_size = intarg; | 440 | info->thread_pool_size = intarg; |
440 | printk(KERN_INFO "btrfs: thread pool %d\n", | ||
441 | info->thread_pool_size); | ||
442 | } | ||
443 | break; | 441 | break; |
444 | case Opt_max_inline: | 442 | case Opt_max_inline: |
445 | num = match_strdup(&args[0]); | 443 | num = match_strdup(&args[0]); |
@@ -769,7 +767,7 @@ static int btrfs_fill_super(struct super_block *sb, | |||
769 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 767 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
770 | sb->s_flags |= MS_POSIXACL; | 768 | sb->s_flags |= MS_POSIXACL; |
771 | #endif | 769 | #endif |
772 | 770 | sb->s_flags |= MS_I_VERSION; | |
773 | err = open_ctree(sb, fs_devices, (char *)data); | 771 | err = open_ctree(sb, fs_devices, (char *)data); |
774 | if (err) { | 772 | if (err) { |
775 | printk("btrfs: open_ctree failed\n"); | 773 | printk("btrfs: open_ctree failed\n"); |
@@ -925,63 +923,48 @@ static inline int is_subvolume_inode(struct inode *inode) | |||
925 | */ | 923 | */ |
926 | static char *setup_root_args(char *args) | 924 | static char *setup_root_args(char *args) |
927 | { | 925 | { |
928 | unsigned copied = 0; | 926 | unsigned len = strlen(args) + 2 + 1; |
929 | unsigned len = strlen(args) + 2; | 927 | char *src, *dst, *buf; |
930 | char *pos; | ||
931 | char *ret; | ||
932 | 928 | ||
933 | /* | 929 | /* |
934 | * We need the same args as before, but minus | 930 | * We need the same args as before, but with this substitution: |
935 | * | 931 | * s!subvol=[^,]+!subvolid=0! |
936 | * subvol=a | ||
937 | * | ||
938 | * and add | ||
939 | * | ||
940 | * subvolid=0 | ||
941 | * | 932 | * |
942 | * which is a difference of 2 characters, so we allocate strlen(args) + | 933 | * Since the replacement string is up to 2 bytes longer than the |
943 | * 2 characters. | 934 | * original, allocate strlen(args) + 2 + 1 bytes. |
944 | */ | 935 | */ |
945 | ret = kzalloc(len * sizeof(char), GFP_NOFS); | ||
946 | if (!ret) | ||
947 | return NULL; | ||
948 | pos = strstr(args, "subvol="); | ||
949 | 936 | ||
937 | src = strstr(args, "subvol="); | ||
950 | /* This shouldn't happen, but just in case.. */ | 938 | /* This shouldn't happen, but just in case.. */ |
951 | if (!pos) { | 939 | if (!src) |
952 | kfree(ret); | 940 | return NULL; |
941 | |||
942 | buf = dst = kmalloc(len, GFP_NOFS); | ||
943 | if (!buf) | ||
953 | return NULL; | 944 | return NULL; |
954 | } | ||
955 | 945 | ||
956 | /* | 946 | /* |
957 | * The subvol=<> arg is not at the front of the string, copy everybody | 947 | * If the subvol= arg is not at the start of the string, |
958 | * up to that into ret. | 948 | * copy whatever precedes it into buf. |
959 | */ | 949 | */ |
960 | if (pos != args) { | 950 | if (src != args) { |
961 | *pos = '\0'; | 951 | *src++ = '\0'; |
962 | strcpy(ret, args); | 952 | strcpy(buf, args); |
963 | copied += strlen(args); | 953 | dst += strlen(args); |
964 | pos++; | ||
965 | } | 954 | } |
966 | 955 | ||
967 | strncpy(ret + copied, "subvolid=0", len - copied); | 956 | strcpy(dst, "subvolid=0"); |
968 | 957 | dst += strlen("subvolid=0"); | |
969 | /* Length of subvolid=0 */ | ||
970 | copied += 10; | ||
971 | 958 | ||
972 | /* | 959 | /* |
973 | * If there is no , after the subvol= option then we know there's no | 960 | * If there is a "," after the original subvol=... string, |
974 | * other options and we can just return. | 961 | * copy that suffix into our buffer. Otherwise, we're done. |
975 | */ | 962 | */ |
976 | pos = strchr(pos, ','); | 963 | src = strchr(src, ','); |
977 | if (!pos) | 964 | if (src) |
978 | return ret; | 965 | strcpy(dst, src); |
979 | 966 | ||
980 | /* Copy the rest of the arguments into our buffer */ | 967 | return buf; |
981 | strncpy(ret + copied, pos, len - copied); | ||
982 | copied += strlen(pos); | ||
983 | |||
984 | return ret; | ||
985 | } | 968 | } |
986 | 969 | ||
987 | static struct dentry *mount_subvol(const char *subvol_name, int flags, | 970 | static struct dentry *mount_subvol(const char *subvol_name, int flags, |
@@ -1118,6 +1101,40 @@ error_fs_info: | |||
1118 | return ERR_PTR(error); | 1101 | return ERR_PTR(error); |
1119 | } | 1102 | } |
1120 | 1103 | ||
1104 | static void btrfs_set_max_workers(struct btrfs_workers *workers, int new_limit) | ||
1105 | { | ||
1106 | spin_lock_irq(&workers->lock); | ||
1107 | workers->max_workers = new_limit; | ||
1108 | spin_unlock_irq(&workers->lock); | ||
1109 | } | ||
1110 | |||
1111 | static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, | ||
1112 | int new_pool_size, int old_pool_size) | ||
1113 | { | ||
1114 | if (new_pool_size == old_pool_size) | ||
1115 | return; | ||
1116 | |||
1117 | fs_info->thread_pool_size = new_pool_size; | ||
1118 | |||
1119 | printk(KERN_INFO "btrfs: resize thread pool %d -> %d\n", | ||
1120 | old_pool_size, new_pool_size); | ||
1121 | |||
1122 | btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size); | ||
1123 | btrfs_set_max_workers(&fs_info->workers, new_pool_size); | ||
1124 | btrfs_set_max_workers(&fs_info->delalloc_workers, new_pool_size); | ||
1125 | btrfs_set_max_workers(&fs_info->submit_workers, new_pool_size); | ||
1126 | btrfs_set_max_workers(&fs_info->caching_workers, new_pool_size); | ||
1127 | btrfs_set_max_workers(&fs_info->fixup_workers, new_pool_size); | ||
1128 | btrfs_set_max_workers(&fs_info->endio_workers, new_pool_size); | ||
1129 | btrfs_set_max_workers(&fs_info->endio_meta_workers, new_pool_size); | ||
1130 | btrfs_set_max_workers(&fs_info->endio_meta_write_workers, new_pool_size); | ||
1131 | btrfs_set_max_workers(&fs_info->endio_write_workers, new_pool_size); | ||
1132 | btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size); | ||
1133 | btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size); | ||
1134 | btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size); | ||
1135 | btrfs_set_max_workers(&fs_info->scrub_workers, new_pool_size); | ||
1136 | } | ||
1137 | |||
1121 | static int btrfs_remount(struct super_block *sb, int *flags, char *data) | 1138 | static int btrfs_remount(struct super_block *sb, int *flags, char *data) |
1122 | { | 1139 | { |
1123 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | 1140 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); |
@@ -1137,6 +1154,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
1137 | goto restore; | 1154 | goto restore; |
1138 | } | 1155 | } |
1139 | 1156 | ||
1157 | btrfs_resize_thread_pool(fs_info, | ||
1158 | fs_info->thread_pool_size, old_thread_pool_size); | ||
1159 | |||
1140 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 1160 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
1141 | return 0; | 1161 | return 0; |
1142 | 1162 | ||
@@ -1180,7 +1200,8 @@ restore: | |||
1180 | fs_info->compress_type = old_compress_type; | 1200 | fs_info->compress_type = old_compress_type; |
1181 | fs_info->max_inline = old_max_inline; | 1201 | fs_info->max_inline = old_max_inline; |
1182 | fs_info->alloc_start = old_alloc_start; | 1202 | fs_info->alloc_start = old_alloc_start; |
1183 | fs_info->thread_pool_size = old_thread_pool_size; | 1203 | btrfs_resize_thread_pool(fs_info, |
1204 | old_thread_pool_size, fs_info->thread_pool_size); | ||
1184 | fs_info->metadata_ratio = old_metadata_ratio; | 1205 | fs_info->metadata_ratio = old_metadata_ratio; |
1185 | return ret; | 1206 | return ret; |
1186 | } | 1207 | } |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 36422254ef67..1791c6e3d834 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include "locking.h" | 28 | #include "locking.h" |
29 | #include "tree-log.h" | 29 | #include "tree-log.h" |
30 | #include "inode-map.h" | 30 | #include "inode-map.h" |
31 | #include "volumes.h" | ||
31 | 32 | ||
32 | #define BTRFS_ROOT_TRANS_TAG 0 | 33 | #define BTRFS_ROOT_TRANS_TAG 0 |
33 | 34 | ||
@@ -55,48 +56,49 @@ static noinline void switch_commit_root(struct btrfs_root *root) | |||
55 | static noinline int join_transaction(struct btrfs_root *root, int nofail) | 56 | static noinline int join_transaction(struct btrfs_root *root, int nofail) |
56 | { | 57 | { |
57 | struct btrfs_transaction *cur_trans; | 58 | struct btrfs_transaction *cur_trans; |
59 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
58 | 60 | ||
59 | spin_lock(&root->fs_info->trans_lock); | 61 | spin_lock(&fs_info->trans_lock); |
60 | loop: | 62 | loop: |
61 | /* The file system has been taken offline. No new transactions. */ | 63 | /* The file system has been taken offline. No new transactions. */ |
62 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 64 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { |
63 | spin_unlock(&root->fs_info->trans_lock); | 65 | spin_unlock(&fs_info->trans_lock); |
64 | return -EROFS; | 66 | return -EROFS; |
65 | } | 67 | } |
66 | 68 | ||
67 | if (root->fs_info->trans_no_join) { | 69 | if (fs_info->trans_no_join) { |
68 | if (!nofail) { | 70 | if (!nofail) { |
69 | spin_unlock(&root->fs_info->trans_lock); | 71 | spin_unlock(&fs_info->trans_lock); |
70 | return -EBUSY; | 72 | return -EBUSY; |
71 | } | 73 | } |
72 | } | 74 | } |
73 | 75 | ||
74 | cur_trans = root->fs_info->running_transaction; | 76 | cur_trans = fs_info->running_transaction; |
75 | if (cur_trans) { | 77 | if (cur_trans) { |
76 | if (cur_trans->aborted) { | 78 | if (cur_trans->aborted) { |
77 | spin_unlock(&root->fs_info->trans_lock); | 79 | spin_unlock(&fs_info->trans_lock); |
78 | return cur_trans->aborted; | 80 | return cur_trans->aborted; |
79 | } | 81 | } |
80 | atomic_inc(&cur_trans->use_count); | 82 | atomic_inc(&cur_trans->use_count); |
81 | atomic_inc(&cur_trans->num_writers); | 83 | atomic_inc(&cur_trans->num_writers); |
82 | cur_trans->num_joined++; | 84 | cur_trans->num_joined++; |
83 | spin_unlock(&root->fs_info->trans_lock); | 85 | spin_unlock(&fs_info->trans_lock); |
84 | return 0; | 86 | return 0; |
85 | } | 87 | } |
86 | spin_unlock(&root->fs_info->trans_lock); | 88 | spin_unlock(&fs_info->trans_lock); |
87 | 89 | ||
88 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); | 90 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); |
89 | if (!cur_trans) | 91 | if (!cur_trans) |
90 | return -ENOMEM; | 92 | return -ENOMEM; |
91 | 93 | ||
92 | spin_lock(&root->fs_info->trans_lock); | 94 | spin_lock(&fs_info->trans_lock); |
93 | if (root->fs_info->running_transaction) { | 95 | if (fs_info->running_transaction) { |
94 | /* | 96 | /* |
95 | * someone started a transaction after we unlocked. Make sure | 97 | * someone started a transaction after we unlocked. Make sure |
96 | * to redo the trans_no_join checks above | 98 | * to redo the trans_no_join checks above |
97 | */ | 99 | */ |
98 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | 100 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); |
99 | cur_trans = root->fs_info->running_transaction; | 101 | cur_trans = fs_info->running_transaction; |
100 | goto loop; | 102 | goto loop; |
101 | } | 103 | } |
102 | 104 | ||
@@ -121,20 +123,38 @@ loop: | |||
121 | cur_trans->delayed_refs.flushing = 0; | 123 | cur_trans->delayed_refs.flushing = 0; |
122 | cur_trans->delayed_refs.run_delayed_start = 0; | 124 | cur_trans->delayed_refs.run_delayed_start = 0; |
123 | cur_trans->delayed_refs.seq = 1; | 125 | cur_trans->delayed_refs.seq = 1; |
126 | |||
127 | /* | ||
128 | * although the tree mod log is per file system and not per transaction, | ||
129 | * the log must never go across transaction boundaries. | ||
130 | */ | ||
131 | smp_mb(); | ||
132 | if (!list_empty(&fs_info->tree_mod_seq_list)) { | ||
133 | printk(KERN_ERR "btrfs: tree_mod_seq_list not empty when " | ||
134 | "creating a fresh transaction\n"); | ||
135 | WARN_ON(1); | ||
136 | } | ||
137 | if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) { | ||
138 | printk(KERN_ERR "btrfs: tree_mod_log rb tree not empty when " | ||
139 | "creating a fresh transaction\n"); | ||
140 | WARN_ON(1); | ||
141 | } | ||
142 | atomic_set(&fs_info->tree_mod_seq, 0); | ||
143 | |||
124 | init_waitqueue_head(&cur_trans->delayed_refs.seq_wait); | 144 | init_waitqueue_head(&cur_trans->delayed_refs.seq_wait); |
125 | spin_lock_init(&cur_trans->commit_lock); | 145 | spin_lock_init(&cur_trans->commit_lock); |
126 | spin_lock_init(&cur_trans->delayed_refs.lock); | 146 | spin_lock_init(&cur_trans->delayed_refs.lock); |
127 | INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head); | 147 | INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head); |
128 | 148 | ||
129 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 149 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
130 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | 150 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
131 | extent_io_tree_init(&cur_trans->dirty_pages, | 151 | extent_io_tree_init(&cur_trans->dirty_pages, |
132 | root->fs_info->btree_inode->i_mapping); | 152 | fs_info->btree_inode->i_mapping); |
133 | root->fs_info->generation++; | 153 | fs_info->generation++; |
134 | cur_trans->transid = root->fs_info->generation; | 154 | cur_trans->transid = fs_info->generation; |
135 | root->fs_info->running_transaction = cur_trans; | 155 | fs_info->running_transaction = cur_trans; |
136 | cur_trans->aborted = 0; | 156 | cur_trans->aborted = 0; |
137 | spin_unlock(&root->fs_info->trans_lock); | 157 | spin_unlock(&fs_info->trans_lock); |
138 | 158 | ||
139 | return 0; | 159 | return 0; |
140 | } | 160 | } |
@@ -758,6 +778,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
758 | if (ret) | 778 | if (ret) |
759 | return ret; | 779 | return ret; |
760 | 780 | ||
781 | ret = btrfs_run_dev_stats(trans, root->fs_info); | ||
782 | BUG_ON(ret); | ||
783 | |||
761 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { | 784 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { |
762 | next = fs_info->dirty_cowonly_roots.next; | 785 | next = fs_info->dirty_cowonly_roots.next; |
763 | list_del_init(next); | 786 | list_del_init(next); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index eb1ae908582c..2017d0ff511c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -1628,7 +1628,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1628 | int i; | 1628 | int i; |
1629 | int ret; | 1629 | int ret; |
1630 | 1630 | ||
1631 | btrfs_read_buffer(eb, gen); | 1631 | ret = btrfs_read_buffer(eb, gen); |
1632 | if (ret) | ||
1633 | return ret; | ||
1632 | 1634 | ||
1633 | level = btrfs_header_level(eb); | 1635 | level = btrfs_header_level(eb); |
1634 | 1636 | ||
@@ -1749,7 +1751,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1749 | 1751 | ||
1750 | path->slots[*level]++; | 1752 | path->slots[*level]++; |
1751 | if (wc->free) { | 1753 | if (wc->free) { |
1752 | btrfs_read_buffer(next, ptr_gen); | 1754 | ret = btrfs_read_buffer(next, ptr_gen); |
1755 | if (ret) { | ||
1756 | free_extent_buffer(next); | ||
1757 | return ret; | ||
1758 | } | ||
1753 | 1759 | ||
1754 | btrfs_tree_lock(next); | 1760 | btrfs_tree_lock(next); |
1755 | btrfs_set_lock_blocking(next); | 1761 | btrfs_set_lock_blocking(next); |
@@ -1766,7 +1772,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1766 | free_extent_buffer(next); | 1772 | free_extent_buffer(next); |
1767 | continue; | 1773 | continue; |
1768 | } | 1774 | } |
1769 | btrfs_read_buffer(next, ptr_gen); | 1775 | ret = btrfs_read_buffer(next, ptr_gen); |
1776 | if (ret) { | ||
1777 | free_extent_buffer(next); | ||
1778 | return ret; | ||
1779 | } | ||
1770 | 1780 | ||
1771 | WARN_ON(*level <= 0); | 1781 | WARN_ON(*level <= 0); |
1772 | if (path->nodes[*level-1]) | 1782 | if (path->nodes[*level-1]) |
@@ -2657,6 +2667,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2657 | btrfs_release_path(path); | 2667 | btrfs_release_path(path); |
2658 | } | 2668 | } |
2659 | btrfs_release_path(path); | 2669 | btrfs_release_path(path); |
2670 | if (ret > 0) | ||
2671 | ret = 0; | ||
2660 | return ret; | 2672 | return ret; |
2661 | } | 2673 | } |
2662 | 2674 | ||
@@ -3028,21 +3040,6 @@ out: | |||
3028 | return ret; | 3040 | return ret; |
3029 | } | 3041 | } |
3030 | 3042 | ||
3031 | static int inode_in_log(struct btrfs_trans_handle *trans, | ||
3032 | struct inode *inode) | ||
3033 | { | ||
3034 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3035 | int ret = 0; | ||
3036 | |||
3037 | mutex_lock(&root->log_mutex); | ||
3038 | if (BTRFS_I(inode)->logged_trans == trans->transid && | ||
3039 | BTRFS_I(inode)->last_sub_trans <= root->last_log_commit) | ||
3040 | ret = 1; | ||
3041 | mutex_unlock(&root->log_mutex); | ||
3042 | return ret; | ||
3043 | } | ||
3044 | |||
3045 | |||
3046 | /* | 3043 | /* |
3047 | * helper function around btrfs_log_inode to make sure newly created | 3044 | * helper function around btrfs_log_inode to make sure newly created |
3048 | * parent directories also end up in the log. A minimal inode and backref | 3045 | * parent directories also end up in the log. A minimal inode and backref |
@@ -3083,7 +3080,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
3083 | if (ret) | 3080 | if (ret) |
3084 | goto end_no_trans; | 3081 | goto end_no_trans; |
3085 | 3082 | ||
3086 | if (inode_in_log(trans, inode)) { | 3083 | if (btrfs_inode_in_log(inode, trans->transid)) { |
3087 | ret = BTRFS_NO_LOG_SYNC; | 3084 | ret = BTRFS_NO_LOG_SYNC; |
3088 | goto end_no_trans; | 3085 | goto end_no_trans; |
3089 | } | 3086 | } |
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index 12f5147bd2b1..ab942f46b3dd 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c | |||
@@ -23,9 +23,9 @@ | |||
23 | * | 23 | * |
24 | * ulist = ulist_alloc(); | 24 | * ulist = ulist_alloc(); |
25 | * ulist_add(ulist, root); | 25 | * ulist_add(ulist, root); |
26 | * elem = NULL; | 26 | * ULIST_ITER_INIT(&uiter); |
27 | * | 27 | * |
28 | * while ((elem = ulist_next(ulist, elem)) { | 28 | * while ((elem = ulist_next(ulist, &uiter)) { |
29 | * for (all child nodes n in elem) | 29 | * for (all child nodes n in elem) |
30 | * ulist_add(ulist, n); | 30 | * ulist_add(ulist, n); |
31 | * do something useful with the node; | 31 | * do something useful with the node; |
@@ -95,7 +95,7 @@ EXPORT_SYMBOL(ulist_reinit); | |||
95 | * | 95 | * |
96 | * The allocated ulist will be returned in an initialized state. | 96 | * The allocated ulist will be returned in an initialized state. |
97 | */ | 97 | */ |
98 | struct ulist *ulist_alloc(unsigned long gfp_mask) | 98 | struct ulist *ulist_alloc(gfp_t gfp_mask) |
99 | { | 99 | { |
100 | struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask); | 100 | struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask); |
101 | 101 | ||
@@ -144,13 +144,22 @@ EXPORT_SYMBOL(ulist_free); | |||
144 | * unaltered. | 144 | * unaltered. |
145 | */ | 145 | */ |
146 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, | 146 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, |
147 | unsigned long gfp_mask) | 147 | gfp_t gfp_mask) |
148 | { | ||
149 | return ulist_add_merge(ulist, val, aux, NULL, gfp_mask); | ||
150 | } | ||
151 | |||
152 | int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, | ||
153 | unsigned long *old_aux, gfp_t gfp_mask) | ||
148 | { | 154 | { |
149 | int i; | 155 | int i; |
150 | 156 | ||
151 | for (i = 0; i < ulist->nnodes; ++i) { | 157 | for (i = 0; i < ulist->nnodes; ++i) { |
152 | if (ulist->nodes[i].val == val) | 158 | if (ulist->nodes[i].val == val) { |
159 | if (old_aux) | ||
160 | *old_aux = ulist->nodes[i].aux; | ||
153 | return 0; | 161 | return 0; |
162 | } | ||
154 | } | 163 | } |
155 | 164 | ||
156 | if (ulist->nnodes >= ulist->nodes_alloced) { | 165 | if (ulist->nnodes >= ulist->nodes_alloced) { |
@@ -188,33 +197,26 @@ EXPORT_SYMBOL(ulist_add); | |||
188 | /** | 197 | /** |
189 | * ulist_next - iterate ulist | 198 | * ulist_next - iterate ulist |
190 | * @ulist: ulist to iterate | 199 | * @ulist: ulist to iterate |
191 | * @prev: previously returned element or %NULL to start iteration | 200 | * @uiter: iterator variable, initialized with ULIST_ITER_INIT(&iterator) |
192 | * | 201 | * |
193 | * Note: locking must be provided by the caller. In case of rwlocks only read | 202 | * Note: locking must be provided by the caller. In case of rwlocks only read |
194 | * locking is needed | 203 | * locking is needed |
195 | * | 204 | * |
196 | * This function is used to iterate an ulist. The iteration is started with | 205 | * This function is used to iterate an ulist. |
197 | * @prev = %NULL. It returns the next element from the ulist or %NULL when the | 206 | * It returns the next element from the ulist or %NULL when the |
198 | * end is reached. No guarantee is made with respect to the order in which | 207 | * end is reached. No guarantee is made with respect to the order in which |
199 | * the elements are returned. They might neither be returned in order of | 208 | * the elements are returned. They might neither be returned in order of |
200 | * addition nor in ascending order. | 209 | * addition nor in ascending order. |
201 | * It is allowed to call ulist_add during an enumeration. Newly added items | 210 | * It is allowed to call ulist_add during an enumeration. Newly added items |
202 | * are guaranteed to show up in the running enumeration. | 211 | * are guaranteed to show up in the running enumeration. |
203 | */ | 212 | */ |
204 | struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev) | 213 | struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter) |
205 | { | 214 | { |
206 | int next; | ||
207 | |||
208 | if (ulist->nnodes == 0) | 215 | if (ulist->nnodes == 0) |
209 | return NULL; | 216 | return NULL; |
210 | 217 | if (uiter->i < 0 || uiter->i >= ulist->nnodes) | |
211 | if (!prev) | ||
212 | return &ulist->nodes[0]; | ||
213 | |||
214 | next = (prev - ulist->nodes) + 1; | ||
215 | if (next < 0 || next >= ulist->nnodes) | ||
216 | return NULL; | 218 | return NULL; |
217 | 219 | ||
218 | return &ulist->nodes[next]; | 220 | return &ulist->nodes[uiter->i++]; |
219 | } | 221 | } |
220 | EXPORT_SYMBOL(ulist_next); | 222 | EXPORT_SYMBOL(ulist_next); |
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h index 2e25dec58ec0..21bdc8ec8130 100644 --- a/fs/btrfs/ulist.h +++ b/fs/btrfs/ulist.h | |||
@@ -24,6 +24,10 @@ | |||
24 | */ | 24 | */ |
25 | #define ULIST_SIZE 16 | 25 | #define ULIST_SIZE 16 |
26 | 26 | ||
27 | struct ulist_iterator { | ||
28 | int i; | ||
29 | }; | ||
30 | |||
27 | /* | 31 | /* |
28 | * element of the list | 32 | * element of the list |
29 | */ | 33 | */ |
@@ -59,10 +63,15 @@ struct ulist { | |||
59 | void ulist_init(struct ulist *ulist); | 63 | void ulist_init(struct ulist *ulist); |
60 | void ulist_fini(struct ulist *ulist); | 64 | void ulist_fini(struct ulist *ulist); |
61 | void ulist_reinit(struct ulist *ulist); | 65 | void ulist_reinit(struct ulist *ulist); |
62 | struct ulist *ulist_alloc(unsigned long gfp_mask); | 66 | struct ulist *ulist_alloc(gfp_t gfp_mask); |
63 | void ulist_free(struct ulist *ulist); | 67 | void ulist_free(struct ulist *ulist); |
64 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, | 68 | int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, |
65 | unsigned long gfp_mask); | 69 | gfp_t gfp_mask); |
66 | struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev); | 70 | int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux, |
71 | unsigned long *old_aux, gfp_t gfp_mask); | ||
72 | struct ulist_node *ulist_next(struct ulist *ulist, | ||
73 | struct ulist_iterator *uiter); | ||
74 | |||
75 | #define ULIST_ITER_INIT(uiter) ((uiter)->i = 0) | ||
67 | 76 | ||
68 | #endif | 77 | #endif |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1411b99555a4..7782020996fe 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/random.h> | 23 | #include <linux/random.h> |
24 | #include <linux/iocontext.h> | 24 | #include <linux/iocontext.h> |
25 | #include <linux/capability.h> | 25 | #include <linux/capability.h> |
26 | #include <linux/ratelimit.h> | ||
26 | #include <linux/kthread.h> | 27 | #include <linux/kthread.h> |
27 | #include <asm/div64.h> | 28 | #include <asm/div64.h> |
28 | #include "compat.h" | 29 | #include "compat.h" |
@@ -39,6 +40,8 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
39 | struct btrfs_root *root, | 40 | struct btrfs_root *root, |
40 | struct btrfs_device *device); | 41 | struct btrfs_device *device); |
41 | static int btrfs_relocate_sys_chunks(struct btrfs_root *root); | 42 | static int btrfs_relocate_sys_chunks(struct btrfs_root *root); |
43 | static void __btrfs_reset_dev_stats(struct btrfs_device *dev); | ||
44 | static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); | ||
42 | 45 | ||
43 | static DEFINE_MUTEX(uuid_mutex); | 46 | static DEFINE_MUTEX(uuid_mutex); |
44 | static LIST_HEAD(fs_uuids); | 47 | static LIST_HEAD(fs_uuids); |
@@ -361,6 +364,7 @@ static noinline int device_list_add(const char *path, | |||
361 | return -ENOMEM; | 364 | return -ENOMEM; |
362 | } | 365 | } |
363 | device->devid = devid; | 366 | device->devid = devid; |
367 | device->dev_stats_valid = 0; | ||
364 | device->work.func = pending_bios_fn; | 368 | device->work.func = pending_bios_fn; |
365 | memcpy(device->uuid, disk_super->dev_item.uuid, | 369 | memcpy(device->uuid, disk_super->dev_item.uuid, |
366 | BTRFS_UUID_SIZE); | 370 | BTRFS_UUID_SIZE); |
@@ -1633,7 +1637,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1633 | int ret = 0; | 1637 | int ret = 0; |
1634 | 1638 | ||
1635 | if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) | 1639 | if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) |
1636 | return -EINVAL; | 1640 | return -EROFS; |
1637 | 1641 | ||
1638 | bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, | 1642 | bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, |
1639 | root->fs_info->bdev_holder); | 1643 | root->fs_info->bdev_holder); |
@@ -4001,13 +4005,58 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
4001 | return 0; | 4005 | return 0; |
4002 | } | 4006 | } |
4003 | 4007 | ||
4008 | static void *merge_stripe_index_into_bio_private(void *bi_private, | ||
4009 | unsigned int stripe_index) | ||
4010 | { | ||
4011 | /* | ||
4012 | * with single, dup, RAID0, RAID1 and RAID10, stripe_index is | ||
4013 | * at most 1. | ||
4014 | * The alternative solution (instead of stealing bits from the | ||
4015 | * pointer) would be to allocate an intermediate structure | ||
4016 | * that contains the old private pointer plus the stripe_index. | ||
4017 | */ | ||
4018 | BUG_ON((((uintptr_t)bi_private) & 3) != 0); | ||
4019 | BUG_ON(stripe_index > 3); | ||
4020 | return (void *)(((uintptr_t)bi_private) | stripe_index); | ||
4021 | } | ||
4022 | |||
4023 | static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private) | ||
4024 | { | ||
4025 | return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3)); | ||
4026 | } | ||
4027 | |||
4028 | static unsigned int extract_stripe_index_from_bio_private(void *bi_private) | ||
4029 | { | ||
4030 | return (unsigned int)((uintptr_t)bi_private) & 3; | ||
4031 | } | ||
4032 | |||
4004 | static void btrfs_end_bio(struct bio *bio, int err) | 4033 | static void btrfs_end_bio(struct bio *bio, int err) |
4005 | { | 4034 | { |
4006 | struct btrfs_bio *bbio = bio->bi_private; | 4035 | struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private); |
4007 | int is_orig_bio = 0; | 4036 | int is_orig_bio = 0; |
4008 | 4037 | ||
4009 | if (err) | 4038 | if (err) { |
4010 | atomic_inc(&bbio->error); | 4039 | atomic_inc(&bbio->error); |
4040 | if (err == -EIO || err == -EREMOTEIO) { | ||
4041 | unsigned int stripe_index = | ||
4042 | extract_stripe_index_from_bio_private( | ||
4043 | bio->bi_private); | ||
4044 | struct btrfs_device *dev; | ||
4045 | |||
4046 | BUG_ON(stripe_index >= bbio->num_stripes); | ||
4047 | dev = bbio->stripes[stripe_index].dev; | ||
4048 | if (bio->bi_rw & WRITE) | ||
4049 | btrfs_dev_stat_inc(dev, | ||
4050 | BTRFS_DEV_STAT_WRITE_ERRS); | ||
4051 | else | ||
4052 | btrfs_dev_stat_inc(dev, | ||
4053 | BTRFS_DEV_STAT_READ_ERRS); | ||
4054 | if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) | ||
4055 | btrfs_dev_stat_inc(dev, | ||
4056 | BTRFS_DEV_STAT_FLUSH_ERRS); | ||
4057 | btrfs_dev_stat_print_on_error(dev); | ||
4058 | } | ||
4059 | } | ||
4011 | 4060 | ||
4012 | if (bio == bbio->orig_bio) | 4061 | if (bio == bbio->orig_bio) |
4013 | is_orig_bio = 1; | 4062 | is_orig_bio = 1; |
@@ -4149,6 +4198,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
4149 | bio = first_bio; | 4198 | bio = first_bio; |
4150 | } | 4199 | } |
4151 | bio->bi_private = bbio; | 4200 | bio->bi_private = bbio; |
4201 | bio->bi_private = merge_stripe_index_into_bio_private( | ||
4202 | bio->bi_private, (unsigned int)dev_nr); | ||
4152 | bio->bi_end_io = btrfs_end_bio; | 4203 | bio->bi_end_io = btrfs_end_bio; |
4153 | bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; | 4204 | bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; |
4154 | dev = bbio->stripes[dev_nr].dev; | 4205 | dev = bbio->stripes[dev_nr].dev; |
@@ -4509,6 +4560,28 @@ int btrfs_read_sys_array(struct btrfs_root *root) | |||
4509 | return ret; | 4560 | return ret; |
4510 | } | 4561 | } |
4511 | 4562 | ||
4563 | struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, | ||
4564 | u64 logical, int mirror_num) | ||
4565 | { | ||
4566 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | ||
4567 | int ret; | ||
4568 | u64 map_length = 0; | ||
4569 | struct btrfs_bio *bbio = NULL; | ||
4570 | struct btrfs_device *device; | ||
4571 | |||
4572 | BUG_ON(mirror_num == 0); | ||
4573 | ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio, | ||
4574 | mirror_num); | ||
4575 | if (ret) { | ||
4576 | BUG_ON(bbio != NULL); | ||
4577 | return NULL; | ||
4578 | } | ||
4579 | BUG_ON(mirror_num != bbio->mirror_num); | ||
4580 | device = bbio->stripes[mirror_num - 1].dev; | ||
4581 | kfree(bbio); | ||
4582 | return device; | ||
4583 | } | ||
4584 | |||
4512 | int btrfs_read_chunk_tree(struct btrfs_root *root) | 4585 | int btrfs_read_chunk_tree(struct btrfs_root *root) |
4513 | { | 4586 | { |
4514 | struct btrfs_path *path; | 4587 | struct btrfs_path *path; |
@@ -4583,3 +4656,230 @@ error: | |||
4583 | btrfs_free_path(path); | 4656 | btrfs_free_path(path); |
4584 | return ret; | 4657 | return ret; |
4585 | } | 4658 | } |
4659 | |||
4660 | static void __btrfs_reset_dev_stats(struct btrfs_device *dev) | ||
4661 | { | ||
4662 | int i; | ||
4663 | |||
4664 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) | ||
4665 | btrfs_dev_stat_reset(dev, i); | ||
4666 | } | ||
4667 | |||
4668 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info) | ||
4669 | { | ||
4670 | struct btrfs_key key; | ||
4671 | struct btrfs_key found_key; | ||
4672 | struct btrfs_root *dev_root = fs_info->dev_root; | ||
4673 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
4674 | struct extent_buffer *eb; | ||
4675 | int slot; | ||
4676 | int ret = 0; | ||
4677 | struct btrfs_device *device; | ||
4678 | struct btrfs_path *path = NULL; | ||
4679 | int i; | ||
4680 | |||
4681 | path = btrfs_alloc_path(); | ||
4682 | if (!path) { | ||
4683 | ret = -ENOMEM; | ||
4684 | goto out; | ||
4685 | } | ||
4686 | |||
4687 | mutex_lock(&fs_devices->device_list_mutex); | ||
4688 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | ||
4689 | int item_size; | ||
4690 | struct btrfs_dev_stats_item *ptr; | ||
4691 | |||
4692 | key.objectid = 0; | ||
4693 | key.type = BTRFS_DEV_STATS_KEY; | ||
4694 | key.offset = device->devid; | ||
4695 | ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); | ||
4696 | if (ret) { | ||
4697 | printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", | ||
4698 | device->name, (unsigned long long)device->devid); | ||
4699 | __btrfs_reset_dev_stats(device); | ||
4700 | device->dev_stats_valid = 1; | ||
4701 | btrfs_release_path(path); | ||
4702 | continue; | ||
4703 | } | ||
4704 | slot = path->slots[0]; | ||
4705 | eb = path->nodes[0]; | ||
4706 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
4707 | item_size = btrfs_item_size_nr(eb, slot); | ||
4708 | |||
4709 | ptr = btrfs_item_ptr(eb, slot, | ||
4710 | struct btrfs_dev_stats_item); | ||
4711 | |||
4712 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { | ||
4713 | if (item_size >= (1 + i) * sizeof(__le64)) | ||
4714 | btrfs_dev_stat_set(device, i, | ||
4715 | btrfs_dev_stats_value(eb, ptr, i)); | ||
4716 | else | ||
4717 | btrfs_dev_stat_reset(device, i); | ||
4718 | } | ||
4719 | |||
4720 | device->dev_stats_valid = 1; | ||
4721 | btrfs_dev_stat_print_on_load(device); | ||
4722 | btrfs_release_path(path); | ||
4723 | } | ||
4724 | mutex_unlock(&fs_devices->device_list_mutex); | ||
4725 | |||
4726 | out: | ||
4727 | btrfs_free_path(path); | ||
4728 | return ret < 0 ? ret : 0; | ||
4729 | } | ||
4730 | |||
4731 | static int update_dev_stat_item(struct btrfs_trans_handle *trans, | ||
4732 | struct btrfs_root *dev_root, | ||
4733 | struct btrfs_device *device) | ||
4734 | { | ||
4735 | struct btrfs_path *path; | ||
4736 | struct btrfs_key key; | ||
4737 | struct extent_buffer *eb; | ||
4738 | struct btrfs_dev_stats_item *ptr; | ||
4739 | int ret; | ||
4740 | int i; | ||
4741 | |||
4742 | key.objectid = 0; | ||
4743 | key.type = BTRFS_DEV_STATS_KEY; | ||
4744 | key.offset = device->devid; | ||
4745 | |||
4746 | path = btrfs_alloc_path(); | ||
4747 | BUG_ON(!path); | ||
4748 | ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); | ||
4749 | if (ret < 0) { | ||
4750 | printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", | ||
4751 | ret, device->name); | ||
4752 | goto out; | ||
4753 | } | ||
4754 | |||
4755 | if (ret == 0 && | ||
4756 | btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) { | ||
4757 | /* need to delete old one and insert a new one */ | ||
4758 | ret = btrfs_del_item(trans, dev_root, path); | ||
4759 | if (ret != 0) { | ||
4760 | printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", | ||
4761 | device->name, ret); | ||
4762 | goto out; | ||
4763 | } | ||
4764 | ret = 1; | ||
4765 | } | ||
4766 | |||
4767 | if (ret == 1) { | ||
4768 | /* need to insert a new item */ | ||
4769 | btrfs_release_path(path); | ||
4770 | ret = btrfs_insert_empty_item(trans, dev_root, path, | ||
4771 | &key, sizeof(*ptr)); | ||
4772 | if (ret < 0) { | ||
4773 | printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", | ||
4774 | device->name, ret); | ||
4775 | goto out; | ||
4776 | } | ||
4777 | } | ||
4778 | |||
4779 | eb = path->nodes[0]; | ||
4780 | ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item); | ||
4781 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) | ||
4782 | btrfs_set_dev_stats_value(eb, ptr, i, | ||
4783 | btrfs_dev_stat_read(device, i)); | ||
4784 | btrfs_mark_buffer_dirty(eb); | ||
4785 | |||
4786 | out: | ||
4787 | btrfs_free_path(path); | ||
4788 | return ret; | ||
4789 | } | ||
4790 | |||
4791 | /* | ||
4792 | * called from commit_transaction. Writes all changed device stats to disk. | ||
4793 | */ | ||
4794 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, | ||
4795 | struct btrfs_fs_info *fs_info) | ||
4796 | { | ||
4797 | struct btrfs_root *dev_root = fs_info->dev_root; | ||
4798 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
4799 | struct btrfs_device *device; | ||
4800 | int ret = 0; | ||
4801 | |||
4802 | mutex_lock(&fs_devices->device_list_mutex); | ||
4803 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | ||
4804 | if (!device->dev_stats_valid || !device->dev_stats_dirty) | ||
4805 | continue; | ||
4806 | |||
4807 | ret = update_dev_stat_item(trans, dev_root, device); | ||
4808 | if (!ret) | ||
4809 | device->dev_stats_dirty = 0; | ||
4810 | } | ||
4811 | mutex_unlock(&fs_devices->device_list_mutex); | ||
4812 | |||
4813 | return ret; | ||
4814 | } | ||
4815 | |||
4816 | void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index) | ||
4817 | { | ||
4818 | btrfs_dev_stat_inc(dev, index); | ||
4819 | btrfs_dev_stat_print_on_error(dev); | ||
4820 | } | ||
4821 | |||
4822 | void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) | ||
4823 | { | ||
4824 | if (!dev->dev_stats_valid) | ||
4825 | return; | ||
4826 | printk_ratelimited(KERN_ERR | ||
4827 | "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", | ||
4828 | dev->name, | ||
4829 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), | ||
4830 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), | ||
4831 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), | ||
4832 | btrfs_dev_stat_read(dev, | ||
4833 | BTRFS_DEV_STAT_CORRUPTION_ERRS), | ||
4834 | btrfs_dev_stat_read(dev, | ||
4835 | BTRFS_DEV_STAT_GENERATION_ERRS)); | ||
4836 | } | ||
4837 | |||
4838 | static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) | ||
4839 | { | ||
4840 | printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", | ||
4841 | dev->name, | ||
4842 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), | ||
4843 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), | ||
4844 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), | ||
4845 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS), | ||
4846 | btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS)); | ||
4847 | } | ||
4848 | |||
4849 | int btrfs_get_dev_stats(struct btrfs_root *root, | ||
4850 | struct btrfs_ioctl_get_dev_stats *stats, | ||
4851 | int reset_after_read) | ||
4852 | { | ||
4853 | struct btrfs_device *dev; | ||
4854 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | ||
4855 | int i; | ||
4856 | |||
4857 | mutex_lock(&fs_devices->device_list_mutex); | ||
4858 | dev = btrfs_find_device(root, stats->devid, NULL, NULL); | ||
4859 | mutex_unlock(&fs_devices->device_list_mutex); | ||
4860 | |||
4861 | if (!dev) { | ||
4862 | printk(KERN_WARNING | ||
4863 | "btrfs: get dev_stats failed, device not found\n"); | ||
4864 | return -ENODEV; | ||
4865 | } else if (!dev->dev_stats_valid) { | ||
4866 | printk(KERN_WARNING | ||
4867 | "btrfs: get dev_stats failed, not yet valid\n"); | ||
4868 | return -ENODEV; | ||
4869 | } else if (reset_after_read) { | ||
4870 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { | ||
4871 | if (stats->nr_items > i) | ||
4872 | stats->values[i] = | ||
4873 | btrfs_dev_stat_read_and_reset(dev, i); | ||
4874 | else | ||
4875 | btrfs_dev_stat_reset(dev, i); | ||
4876 | } | ||
4877 | } else { | ||
4878 | for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) | ||
4879 | if (stats->nr_items > i) | ||
4880 | stats->values[i] = btrfs_dev_stat_read(dev, i); | ||
4881 | } | ||
4882 | if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX) | ||
4883 | stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX; | ||
4884 | return 0; | ||
4885 | } | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index bb6b03f97aaa..3406a88ca83e 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/bio.h> | 22 | #include <linux/bio.h> |
23 | #include <linux/sort.h> | 23 | #include <linux/sort.h> |
24 | #include "async-thread.h" | 24 | #include "async-thread.h" |
25 | #include "ioctl.h" | ||
25 | 26 | ||
26 | #define BTRFS_STRIPE_LEN (64 * 1024) | 27 | #define BTRFS_STRIPE_LEN (64 * 1024) |
27 | 28 | ||
@@ -106,6 +107,11 @@ struct btrfs_device { | |||
106 | struct completion flush_wait; | 107 | struct completion flush_wait; |
107 | int nobarriers; | 108 | int nobarriers; |
108 | 109 | ||
110 | /* disk I/O failure stats. For detailed description refer to | ||
111 | * enum btrfs_dev_stat_values in ioctl.h */ | ||
112 | int dev_stats_valid; | ||
113 | int dev_stats_dirty; /* counters need to be written to disk */ | ||
114 | atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; | ||
109 | }; | 115 | }; |
110 | 116 | ||
111 | struct btrfs_fs_devices { | 117 | struct btrfs_fs_devices { |
@@ -281,4 +287,50 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); | |||
281 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); | 287 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); |
282 | int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, | 288 | int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, |
283 | u64 *start, u64 *max_avail); | 289 | u64 *start, u64 *max_avail); |
290 | struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, | ||
291 | u64 logical, int mirror_num); | ||
292 | void btrfs_dev_stat_print_on_error(struct btrfs_device *device); | ||
293 | void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); | ||
294 | int btrfs_get_dev_stats(struct btrfs_root *root, | ||
295 | struct btrfs_ioctl_get_dev_stats *stats, | ||
296 | int reset_after_read); | ||
297 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); | ||
298 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, | ||
299 | struct btrfs_fs_info *fs_info); | ||
300 | |||
301 | static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, | ||
302 | int index) | ||
303 | { | ||
304 | atomic_inc(dev->dev_stat_values + index); | ||
305 | dev->dev_stats_dirty = 1; | ||
306 | } | ||
307 | |||
308 | static inline int btrfs_dev_stat_read(struct btrfs_device *dev, | ||
309 | int index) | ||
310 | { | ||
311 | return atomic_read(dev->dev_stat_values + index); | ||
312 | } | ||
313 | |||
314 | static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev, | ||
315 | int index) | ||
316 | { | ||
317 | int ret; | ||
318 | |||
319 | ret = atomic_xchg(dev->dev_stat_values + index, 0); | ||
320 | dev->dev_stats_dirty = 1; | ||
321 | return ret; | ||
322 | } | ||
323 | |||
324 | static inline void btrfs_dev_stat_set(struct btrfs_device *dev, | ||
325 | int index, unsigned long val) | ||
326 | { | ||
327 | atomic_set(dev->dev_stat_values + index, val); | ||
328 | dev->dev_stats_dirty = 1; | ||
329 | } | ||
330 | |||
331 | static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, | ||
332 | int index) | ||
333 | { | ||
334 | btrfs_dev_stat_set(dev, index, 0); | ||
335 | } | ||
284 | #endif | 336 | #endif |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index e7a5659087e6..3f4e2d69e83a 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -196,6 +196,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
196 | if (ret) | 196 | if (ret) |
197 | goto out; | 197 | goto out; |
198 | 198 | ||
199 | inode_inc_iversion(inode); | ||
199 | inode->i_ctime = CURRENT_TIME; | 200 | inode->i_ctime = CURRENT_TIME; |
200 | ret = btrfs_update_inode(trans, root, inode); | 201 | ret = btrfs_update_inode(trans, root, inode); |
201 | BUG_ON(ret); | 202 | BUG_ON(ret); |
diff --git a/fs/buffer.c b/fs/buffer.c index ad5938ca357c..838a9cf246bd 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -3152,7 +3152,7 @@ SYSCALL_DEFINE2(bdflush, int, func, long, data) | |||
3152 | /* | 3152 | /* |
3153 | * Buffer-head allocation | 3153 | * Buffer-head allocation |
3154 | */ | 3154 | */ |
3155 | static struct kmem_cache *bh_cachep; | 3155 | static struct kmem_cache *bh_cachep __read_mostly; |
3156 | 3156 | ||
3157 | /* | 3157 | /* |
3158 | * Once the number of bh's in the machine exceeds this level, we start | 3158 | * Once the number of bh's in the machine exceeds this level, we start |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index fbb2a643ef10..8e1b60e557b6 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -40,38 +40,49 @@ struct ceph_nfs_confh { | |||
40 | u32 parent_name_hash; | 40 | u32 parent_name_hash; |
41 | } __attribute__ ((packed)); | 41 | } __attribute__ ((packed)); |
42 | 42 | ||
43 | static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, | 43 | /* |
44 | int connectable) | 44 | * The presence of @parent_inode here tells us whether NFS wants a |
45 | * connectable file handle. However, we want to make a connectionable | ||
46 | * file handle unconditionally so that the MDS gets as much of a hint | ||
47 | * as possible. That means we only use @parent_dentry to indicate | ||
48 | * whether nfsd wants a connectable fh, and whether we should indicate | ||
49 | * failure from a too-small @max_len. | ||
50 | */ | ||
51 | static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, | ||
52 | struct inode *parent_inode) | ||
45 | { | 53 | { |
46 | int type; | 54 | int type; |
47 | struct ceph_nfs_fh *fh = (void *)rawfh; | 55 | struct ceph_nfs_fh *fh = (void *)rawfh; |
48 | struct ceph_nfs_confh *cfh = (void *)rawfh; | 56 | struct ceph_nfs_confh *cfh = (void *)rawfh; |
49 | struct dentry *parent; | ||
50 | struct inode *inode = dentry->d_inode; | ||
51 | int connected_handle_length = sizeof(*cfh)/4; | 57 | int connected_handle_length = sizeof(*cfh)/4; |
52 | int handle_length = sizeof(*fh)/4; | 58 | int handle_length = sizeof(*fh)/4; |
59 | struct dentry *dentry = d_find_alias(inode); | ||
60 | struct dentry *parent; | ||
53 | 61 | ||
54 | /* don't re-export snaps */ | 62 | /* don't re-export snaps */ |
55 | if (ceph_snap(inode) != CEPH_NOSNAP) | 63 | if (ceph_snap(inode) != CEPH_NOSNAP) |
56 | return -EINVAL; | 64 | return -EINVAL; |
57 | 65 | ||
58 | spin_lock(&dentry->d_lock); | 66 | /* if we found an alias, generate a connectable fh */ |
59 | parent = dentry->d_parent; | 67 | if (*max_len >= connected_handle_length && dentry) { |
60 | if (*max_len >= connected_handle_length) { | ||
61 | dout("encode_fh %p connectable\n", dentry); | 68 | dout("encode_fh %p connectable\n", dentry); |
62 | cfh->ino = ceph_ino(dentry->d_inode); | 69 | spin_lock(&dentry->d_lock); |
70 | parent = dentry->d_parent; | ||
71 | cfh->ino = ceph_ino(inode); | ||
63 | cfh->parent_ino = ceph_ino(parent->d_inode); | 72 | cfh->parent_ino = ceph_ino(parent->d_inode); |
64 | cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode, | 73 | cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode, |
65 | dentry); | 74 | dentry); |
66 | *max_len = connected_handle_length; | 75 | *max_len = connected_handle_length; |
67 | type = 2; | 76 | type = 2; |
77 | spin_unlock(&dentry->d_lock); | ||
68 | } else if (*max_len >= handle_length) { | 78 | } else if (*max_len >= handle_length) { |
69 | if (connectable) { | 79 | if (parent_inode) { |
80 | /* nfsd wants connectable */ | ||
70 | *max_len = connected_handle_length; | 81 | *max_len = connected_handle_length; |
71 | type = 255; | 82 | type = 255; |
72 | } else { | 83 | } else { |
73 | dout("encode_fh %p\n", dentry); | 84 | dout("encode_fh %p\n", dentry); |
74 | fh->ino = ceph_ino(dentry->d_inode); | 85 | fh->ino = ceph_ino(inode); |
75 | *max_len = handle_length; | 86 | *max_len = handle_length; |
76 | type = 1; | 87 | type = 1; |
77 | } | 88 | } |
@@ -79,7 +90,6 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, | |||
79 | *max_len = handle_length; | 90 | *max_len = handle_length; |
80 | type = 255; | 91 | type = 255; |
81 | } | 92 | } |
82 | spin_unlock(&dentry->d_lock); | ||
83 | return type; | 93 | return type; |
84 | } | 94 | } |
85 | 95 | ||
diff --git a/fs/compat.c b/fs/compat.c index 3adf3d4c2cd9..6161255fac45 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -871,12 +871,12 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, | |||
871 | { | 871 | { |
872 | int error; | 872 | int error; |
873 | struct file *file; | 873 | struct file *file; |
874 | int fput_needed; | ||
874 | struct compat_readdir_callback buf; | 875 | struct compat_readdir_callback buf; |
875 | 876 | ||
876 | error = -EBADF; | 877 | file = fget_light(fd, &fput_needed); |
877 | file = fget(fd); | ||
878 | if (!file) | 878 | if (!file) |
879 | goto out; | 879 | return -EBADF; |
880 | 880 | ||
881 | buf.result = 0; | 881 | buf.result = 0; |
882 | buf.dirent = dirent; | 882 | buf.dirent = dirent; |
@@ -885,8 +885,7 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, | |||
885 | if (buf.result) | 885 | if (buf.result) |
886 | error = buf.result; | 886 | error = buf.result; |
887 | 887 | ||
888 | fput(file); | 888 | fput_light(file, fput_needed); |
889 | out: | ||
890 | return error; | 889 | return error; |
891 | } | 890 | } |
892 | 891 | ||
@@ -953,16 +952,15 @@ asmlinkage long compat_sys_getdents(unsigned int fd, | |||
953 | struct file * file; | 952 | struct file * file; |
954 | struct compat_linux_dirent __user * lastdirent; | 953 | struct compat_linux_dirent __user * lastdirent; |
955 | struct compat_getdents_callback buf; | 954 | struct compat_getdents_callback buf; |
955 | int fput_needed; | ||
956 | int error; | 956 | int error; |
957 | 957 | ||
958 | error = -EFAULT; | ||
959 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 958 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
960 | goto out; | 959 | return -EFAULT; |
961 | 960 | ||
962 | error = -EBADF; | 961 | file = fget_light(fd, &fput_needed); |
963 | file = fget(fd); | ||
964 | if (!file) | 962 | if (!file) |
965 | goto out; | 963 | return -EBADF; |
966 | 964 | ||
967 | buf.current_dir = dirent; | 965 | buf.current_dir = dirent; |
968 | buf.previous = NULL; | 966 | buf.previous = NULL; |
@@ -979,8 +977,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd, | |||
979 | else | 977 | else |
980 | error = count - buf.count; | 978 | error = count - buf.count; |
981 | } | 979 | } |
982 | fput(file); | 980 | fput_light(file, fput_needed); |
983 | out: | ||
984 | return error; | 981 | return error; |
985 | } | 982 | } |
986 | 983 | ||
@@ -1041,16 +1038,15 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, | |||
1041 | struct file * file; | 1038 | struct file * file; |
1042 | struct linux_dirent64 __user * lastdirent; | 1039 | struct linux_dirent64 __user * lastdirent; |
1043 | struct compat_getdents_callback64 buf; | 1040 | struct compat_getdents_callback64 buf; |
1041 | int fput_needed; | ||
1044 | int error; | 1042 | int error; |
1045 | 1043 | ||
1046 | error = -EFAULT; | ||
1047 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 1044 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
1048 | goto out; | 1045 | return -EFAULT; |
1049 | 1046 | ||
1050 | error = -EBADF; | 1047 | file = fget_light(fd, &fput_needed); |
1051 | file = fget(fd); | ||
1052 | if (!file) | 1048 | if (!file) |
1053 | goto out; | 1049 | return -EBADF; |
1054 | 1050 | ||
1055 | buf.current_dir = dirent; | 1051 | buf.current_dir = dirent; |
1056 | buf.previous = NULL; | 1052 | buf.previous = NULL; |
@@ -1068,8 +1064,7 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, | |||
1068 | else | 1064 | else |
1069 | error = count - buf.count; | 1065 | error = count - buf.count; |
1070 | } | 1066 | } |
1071 | fput(file); | 1067 | fput_light(file, fput_needed); |
1072 | out: | ||
1073 | return error; | 1068 | return error; |
1074 | } | 1069 | } |
1075 | #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ | 1070 | #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ |
diff --git a/fs/dcache.c b/fs/dcache.c index 4435d8b32904..85c9e2bff8e6 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -683,8 +683,6 @@ EXPORT_SYMBOL(dget_parent); | |||
683 | /** | 683 | /** |
684 | * d_find_alias - grab a hashed alias of inode | 684 | * d_find_alias - grab a hashed alias of inode |
685 | * @inode: inode in question | 685 | * @inode: inode in question |
686 | * @want_discon: flag, used by d_splice_alias, to request | ||
687 | * that only a DISCONNECTED alias be returned. | ||
688 | * | 686 | * |
689 | * If inode has a hashed alias, or is a directory and has any alias, | 687 | * If inode has a hashed alias, or is a directory and has any alias, |
690 | * acquire the reference to alias and return it. Otherwise return NULL. | 688 | * acquire the reference to alias and return it. Otherwise return NULL. |
@@ -693,10 +691,9 @@ EXPORT_SYMBOL(dget_parent); | |||
693 | * of a filesystem. | 691 | * of a filesystem. |
694 | * | 692 | * |
695 | * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer | 693 | * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer |
696 | * any other hashed alias over that one unless @want_discon is set, | 694 | * any other hashed alias over that. |
697 | * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. | ||
698 | */ | 695 | */ |
699 | static struct dentry *__d_find_alias(struct inode *inode, int want_discon) | 696 | static struct dentry *__d_find_alias(struct inode *inode) |
700 | { | 697 | { |
701 | struct dentry *alias, *discon_alias; | 698 | struct dentry *alias, *discon_alias; |
702 | 699 | ||
@@ -708,7 +705,7 @@ again: | |||
708 | if (IS_ROOT(alias) && | 705 | if (IS_ROOT(alias) && |
709 | (alias->d_flags & DCACHE_DISCONNECTED)) { | 706 | (alias->d_flags & DCACHE_DISCONNECTED)) { |
710 | discon_alias = alias; | 707 | discon_alias = alias; |
711 | } else if (!want_discon) { | 708 | } else { |
712 | __dget_dlock(alias); | 709 | __dget_dlock(alias); |
713 | spin_unlock(&alias->d_lock); | 710 | spin_unlock(&alias->d_lock); |
714 | return alias; | 711 | return alias; |
@@ -739,7 +736,7 @@ struct dentry *d_find_alias(struct inode *inode) | |||
739 | 736 | ||
740 | if (!list_empty(&inode->i_dentry)) { | 737 | if (!list_empty(&inode->i_dentry)) { |
741 | spin_lock(&inode->i_lock); | 738 | spin_lock(&inode->i_lock); |
742 | de = __d_find_alias(inode, 0); | 739 | de = __d_find_alias(inode); |
743 | spin_unlock(&inode->i_lock); | 740 | spin_unlock(&inode->i_lock); |
744 | } | 741 | } |
745 | return de; | 742 | return de; |
@@ -1650,9 +1647,8 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
1650 | 1647 | ||
1651 | if (inode && S_ISDIR(inode->i_mode)) { | 1648 | if (inode && S_ISDIR(inode->i_mode)) { |
1652 | spin_lock(&inode->i_lock); | 1649 | spin_lock(&inode->i_lock); |
1653 | new = __d_find_alias(inode, 1); | 1650 | new = __d_find_any_alias(inode); |
1654 | if (new) { | 1651 | if (new) { |
1655 | BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); | ||
1656 | spin_unlock(&inode->i_lock); | 1652 | spin_unlock(&inode->i_lock); |
1657 | security_d_instantiate(new, inode); | 1653 | security_d_instantiate(new, inode); |
1658 | d_move(new, dentry); | 1654 | d_move(new, dentry); |
@@ -2482,7 +2478,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) | |||
2482 | struct dentry *alias; | 2478 | struct dentry *alias; |
2483 | 2479 | ||
2484 | /* Does an aliased dentry already exist? */ | 2480 | /* Does an aliased dentry already exist? */ |
2485 | alias = __d_find_alias(inode, 0); | 2481 | alias = __d_find_alias(inode); |
2486 | if (alias) { | 2482 | if (alias) { |
2487 | actual = alias; | 2483 | actual = alias; |
2488 | write_seqlock(&rename_lock); | 2484 | write_seqlock(&rename_lock); |
@@ -2575,7 +2571,7 @@ static int prepend_path(const struct path *path, | |||
2575 | bool slash = false; | 2571 | bool slash = false; |
2576 | int error = 0; | 2572 | int error = 0; |
2577 | 2573 | ||
2578 | br_read_lock(vfsmount_lock); | 2574 | br_read_lock(&vfsmount_lock); |
2579 | while (dentry != root->dentry || vfsmnt != root->mnt) { | 2575 | while (dentry != root->dentry || vfsmnt != root->mnt) { |
2580 | struct dentry * parent; | 2576 | struct dentry * parent; |
2581 | 2577 | ||
@@ -2606,7 +2602,7 @@ static int prepend_path(const struct path *path, | |||
2606 | error = prepend(buffer, buflen, "/", 1); | 2602 | error = prepend(buffer, buflen, "/", 1); |
2607 | 2603 | ||
2608 | out: | 2604 | out: |
2609 | br_read_unlock(vfsmount_lock); | 2605 | br_read_unlock(&vfsmount_lock); |
2610 | return error; | 2606 | return error; |
2611 | 2607 | ||
2612 | global_root: | 2608 | global_root: |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index ab35b113003b..a07441a0a878 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -660,11 +660,10 @@ static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf, | |||
660 | { | 660 | { |
661 | struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); | 661 | struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); |
662 | char *lower_buf; | 662 | char *lower_buf; |
663 | size_t lower_bufsiz = PATH_MAX; | ||
664 | mm_segment_t old_fs; | 663 | mm_segment_t old_fs; |
665 | int rc; | 664 | int rc; |
666 | 665 | ||
667 | lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL); | 666 | lower_buf = kmalloc(PATH_MAX, GFP_KERNEL); |
668 | if (!lower_buf) { | 667 | if (!lower_buf) { |
669 | rc = -ENOMEM; | 668 | rc = -ENOMEM; |
670 | goto out; | 669 | goto out; |
@@ -673,58 +672,29 @@ static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf, | |||
673 | set_fs(get_ds()); | 672 | set_fs(get_ds()); |
674 | rc = lower_dentry->d_inode->i_op->readlink(lower_dentry, | 673 | rc = lower_dentry->d_inode->i_op->readlink(lower_dentry, |
675 | (char __user *)lower_buf, | 674 | (char __user *)lower_buf, |
676 | lower_bufsiz); | 675 | PATH_MAX); |
677 | set_fs(old_fs); | 676 | set_fs(old_fs); |
678 | if (rc < 0) | 677 | if (rc < 0) |
679 | goto out; | 678 | goto out; |
680 | lower_bufsiz = rc; | ||
681 | rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry, | 679 | rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry, |
682 | lower_buf, lower_bufsiz); | 680 | lower_buf, rc); |
683 | out: | 681 | out: |
684 | kfree(lower_buf); | 682 | kfree(lower_buf); |
685 | return rc; | 683 | return rc; |
686 | } | 684 | } |
687 | 685 | ||
688 | static int | 686 | static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) |
689 | ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | ||
690 | { | 687 | { |
691 | char *kbuf; | 688 | char *buf; |
692 | size_t kbufsiz, copied; | 689 | size_t len = PATH_MAX; |
693 | int rc; | 690 | int rc; |
694 | 691 | ||
695 | rc = ecryptfs_readlink_lower(dentry, &kbuf, &kbufsiz); | 692 | rc = ecryptfs_readlink_lower(dentry, &buf, &len); |
696 | if (rc) | 693 | if (rc) |
697 | goto out; | 694 | goto out; |
698 | copied = min_t(size_t, bufsiz, kbufsiz); | ||
699 | rc = copy_to_user(buf, kbuf, copied) ? -EFAULT : copied; | ||
700 | kfree(kbuf); | ||
701 | fsstack_copy_attr_atime(dentry->d_inode, | 695 | fsstack_copy_attr_atime(dentry->d_inode, |
702 | ecryptfs_dentry_to_lower(dentry)->d_inode); | 696 | ecryptfs_dentry_to_lower(dentry)->d_inode); |
703 | out: | 697 | buf[len] = '\0'; |
704 | return rc; | ||
705 | } | ||
706 | |||
707 | static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
708 | { | ||
709 | char *buf; | ||
710 | int len = PAGE_SIZE, rc; | ||
711 | mm_segment_t old_fs; | ||
712 | |||
713 | /* Released in ecryptfs_put_link(); only release here on error */ | ||
714 | buf = kmalloc(len, GFP_KERNEL); | ||
715 | if (!buf) { | ||
716 | buf = ERR_PTR(-ENOMEM); | ||
717 | goto out; | ||
718 | } | ||
719 | old_fs = get_fs(); | ||
720 | set_fs(get_ds()); | ||
721 | rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); | ||
722 | set_fs(old_fs); | ||
723 | if (rc < 0) { | ||
724 | kfree(buf); | ||
725 | buf = ERR_PTR(rc); | ||
726 | } else | ||
727 | buf[rc] = '\0'; | ||
728 | out: | 698 | out: |
729 | nd_set_link(nd, buf); | 699 | nd_set_link(nd, buf); |
730 | return NULL; | 700 | return NULL; |
@@ -1153,7 +1123,7 @@ out: | |||
1153 | } | 1123 | } |
1154 | 1124 | ||
1155 | const struct inode_operations ecryptfs_symlink_iops = { | 1125 | const struct inode_operations ecryptfs_symlink_iops = { |
1156 | .readlink = ecryptfs_readlink, | 1126 | .readlink = generic_readlink, |
1157 | .follow_link = ecryptfs_follow_link, | 1127 | .follow_link = ecryptfs_follow_link, |
1158 | .put_link = ecryptfs_put_link, | 1128 | .put_link = ecryptfs_put_link, |
1159 | .permission = ecryptfs_permission, | 1129 | .permission = ecryptfs_permission, |
@@ -280,10 +280,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm) | |||
280 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); | 280 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); |
281 | INIT_LIST_HEAD(&vma->anon_vma_chain); | 281 | INIT_LIST_HEAD(&vma->anon_vma_chain); |
282 | 282 | ||
283 | err = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); | ||
284 | if (err) | ||
285 | goto err; | ||
286 | |||
287 | err = insert_vm_struct(mm, vma); | 283 | err = insert_vm_struct(mm, vma); |
288 | if (err) | 284 | if (err) |
289 | goto err; | 285 | goto err; |
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index b05acb796135..b0201ca6e9c6 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c | |||
@@ -304,24 +304,23 @@ out: | |||
304 | 304 | ||
305 | /** | 305 | /** |
306 | * export_encode_fh - default export_operations->encode_fh function | 306 | * export_encode_fh - default export_operations->encode_fh function |
307 | * @dentry: the dentry to encode | 307 | * @inode: the object to encode |
308 | * @fh: where to store the file handle fragment | 308 | * @fh: where to store the file handle fragment |
309 | * @max_len: maximum length to store there | 309 | * @max_len: maximum length to store there |
310 | * @connectable: whether to store parent information | 310 | * @parent: parent directory inode, if wanted |
311 | * | 311 | * |
312 | * This default encode_fh function assumes that the 32 inode number | 312 | * This default encode_fh function assumes that the 32 inode number |
313 | * is suitable for locating an inode, and that the generation number | 313 | * is suitable for locating an inode, and that the generation number |
314 | * can be used to check that it is still valid. It places them in the | 314 | * can be used to check that it is still valid. It places them in the |
315 | * filehandle fragment where export_decode_fh expects to find them. | 315 | * filehandle fragment where export_decode_fh expects to find them. |
316 | */ | 316 | */ |
317 | static int export_encode_fh(struct dentry *dentry, struct fid *fid, | 317 | static int export_encode_fh(struct inode *inode, struct fid *fid, |
318 | int *max_len, int connectable) | 318 | int *max_len, struct inode *parent) |
319 | { | 319 | { |
320 | struct inode * inode = dentry->d_inode; | ||
321 | int len = *max_len; | 320 | int len = *max_len; |
322 | int type = FILEID_INO32_GEN; | 321 | int type = FILEID_INO32_GEN; |
323 | 322 | ||
324 | if (connectable && (len < 4)) { | 323 | if (parent && (len < 4)) { |
325 | *max_len = 4; | 324 | *max_len = 4; |
326 | return 255; | 325 | return 255; |
327 | } else if (len < 2) { | 326 | } else if (len < 2) { |
@@ -332,14 +331,9 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid, | |||
332 | len = 2; | 331 | len = 2; |
333 | fid->i32.ino = inode->i_ino; | 332 | fid->i32.ino = inode->i_ino; |
334 | fid->i32.gen = inode->i_generation; | 333 | fid->i32.gen = inode->i_generation; |
335 | if (connectable && !S_ISDIR(inode->i_mode)) { | 334 | if (parent) { |
336 | struct inode *parent; | ||
337 | |||
338 | spin_lock(&dentry->d_lock); | ||
339 | parent = dentry->d_parent->d_inode; | ||
340 | fid->i32.parent_ino = parent->i_ino; | 335 | fid->i32.parent_ino = parent->i_ino; |
341 | fid->i32.parent_gen = parent->i_generation; | 336 | fid->i32.parent_gen = parent->i_generation; |
342 | spin_unlock(&dentry->d_lock); | ||
343 | len = 4; | 337 | len = 4; |
344 | type = FILEID_INO32_GEN_PARENT; | 338 | type = FILEID_INO32_GEN_PARENT; |
345 | } | 339 | } |
@@ -352,11 +346,22 @@ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, | |||
352 | { | 346 | { |
353 | const struct export_operations *nop = dentry->d_sb->s_export_op; | 347 | const struct export_operations *nop = dentry->d_sb->s_export_op; |
354 | int error; | 348 | int error; |
349 | struct dentry *p = NULL; | ||
350 | struct inode *inode = dentry->d_inode, *parent = NULL; | ||
355 | 351 | ||
352 | if (connectable && !S_ISDIR(inode->i_mode)) { | ||
353 | p = dget_parent(dentry); | ||
354 | /* | ||
355 | * note that while p might've ceased to be our parent already, | ||
356 | * it's still pinned by and still positive. | ||
357 | */ | ||
358 | parent = p->d_inode; | ||
359 | } | ||
356 | if (nop->encode_fh) | 360 | if (nop->encode_fh) |
357 | error = nop->encode_fh(dentry, fid->raw, max_len, connectable); | 361 | error = nop->encode_fh(inode, fid->raw, max_len, parent); |
358 | else | 362 | else |
359 | error = export_encode_fh(dentry, fid, max_len, connectable); | 363 | error = export_encode_fh(inode, fid, max_len, parent); |
364 | dput(p); | ||
360 | 365 | ||
361 | return error; | 366 | return error; |
362 | } | 367 | } |
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 9ed1bb1f319f..c22f17021b6e 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig | |||
@@ -2,6 +2,8 @@ config EXT4_FS | |||
2 | tristate "The Extended 4 (ext4) filesystem" | 2 | tristate "The Extended 4 (ext4) filesystem" |
3 | select JBD2 | 3 | select JBD2 |
4 | select CRC16 | 4 | select CRC16 |
5 | select CRYPTO | ||
6 | select CRYPTO_CRC32C | ||
5 | help | 7 | help |
6 | This is the next generation of the ext3 filesystem. | 8 | This is the next generation of the ext3 filesystem. |
7 | 9 | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index c45c41129a35..99b6324290db 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -168,12 +168,14 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
168 | 168 | ||
169 | /* If checksum is bad mark all blocks used to prevent allocation | 169 | /* If checksum is bad mark all blocks used to prevent allocation |
170 | * essentially implementing a per-group read-only flag. */ | 170 | * essentially implementing a per-group read-only flag. */ |
171 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { | 171 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { |
172 | ext4_error(sb, "Checksum bad for group %u", block_group); | 172 | ext4_error(sb, "Checksum bad for group %u", block_group); |
173 | ext4_free_group_clusters_set(sb, gdp, 0); | 173 | ext4_free_group_clusters_set(sb, gdp, 0); |
174 | ext4_free_inodes_set(sb, gdp, 0); | 174 | ext4_free_inodes_set(sb, gdp, 0); |
175 | ext4_itable_unused_set(sb, gdp, 0); | 175 | ext4_itable_unused_set(sb, gdp, 0); |
176 | memset(bh->b_data, 0xff, sb->s_blocksize); | 176 | memset(bh->b_data, 0xff, sb->s_blocksize); |
177 | ext4_block_bitmap_csum_set(sb, block_group, gdp, bh, | ||
178 | EXT4_BLOCKS_PER_GROUP(sb) / 8); | ||
177 | return; | 179 | return; |
178 | } | 180 | } |
179 | memset(bh->b_data, 0, sb->s_blocksize); | 181 | memset(bh->b_data, 0, sb->s_blocksize); |
@@ -210,6 +212,9 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
210 | */ | 212 | */ |
211 | ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group), | 213 | ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group), |
212 | sb->s_blocksize * 8, bh->b_data); | 214 | sb->s_blocksize * 8, bh->b_data); |
215 | ext4_block_bitmap_csum_set(sb, block_group, gdp, bh, | ||
216 | EXT4_BLOCKS_PER_GROUP(sb) / 8); | ||
217 | ext4_group_desc_csum_set(sb, block_group, gdp); | ||
213 | } | 218 | } |
214 | 219 | ||
215 | /* Return the number of free blocks in a block group. It is used when | 220 | /* Return the number of free blocks in a block group. It is used when |
@@ -276,9 +281,9 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, | |||
276 | } | 281 | } |
277 | 282 | ||
278 | static int ext4_valid_block_bitmap(struct super_block *sb, | 283 | static int ext4_valid_block_bitmap(struct super_block *sb, |
279 | struct ext4_group_desc *desc, | 284 | struct ext4_group_desc *desc, |
280 | unsigned int block_group, | 285 | unsigned int block_group, |
281 | struct buffer_head *bh) | 286 | struct buffer_head *bh) |
282 | { | 287 | { |
283 | ext4_grpblk_t offset; | 288 | ext4_grpblk_t offset; |
284 | ext4_grpblk_t next_zero_bit; | 289 | ext4_grpblk_t next_zero_bit; |
@@ -325,6 +330,23 @@ err_out: | |||
325 | block_group, bitmap_blk); | 330 | block_group, bitmap_blk); |
326 | return 0; | 331 | return 0; |
327 | } | 332 | } |
333 | |||
334 | void ext4_validate_block_bitmap(struct super_block *sb, | ||
335 | struct ext4_group_desc *desc, | ||
336 | unsigned int block_group, | ||
337 | struct buffer_head *bh) | ||
338 | { | ||
339 | if (buffer_verified(bh)) | ||
340 | return; | ||
341 | |||
342 | ext4_lock_group(sb, block_group); | ||
343 | if (ext4_valid_block_bitmap(sb, desc, block_group, bh) && | ||
344 | ext4_block_bitmap_csum_verify(sb, block_group, desc, bh, | ||
345 | EXT4_BLOCKS_PER_GROUP(sb) / 8)) | ||
346 | set_buffer_verified(bh); | ||
347 | ext4_unlock_group(sb, block_group); | ||
348 | } | ||
349 | |||
328 | /** | 350 | /** |
329 | * ext4_read_block_bitmap() | 351 | * ext4_read_block_bitmap() |
330 | * @sb: super block | 352 | * @sb: super block |
@@ -355,12 +377,12 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) | |||
355 | } | 377 | } |
356 | 378 | ||
357 | if (bitmap_uptodate(bh)) | 379 | if (bitmap_uptodate(bh)) |
358 | return bh; | 380 | goto verify; |
359 | 381 | ||
360 | lock_buffer(bh); | 382 | lock_buffer(bh); |
361 | if (bitmap_uptodate(bh)) { | 383 | if (bitmap_uptodate(bh)) { |
362 | unlock_buffer(bh); | 384 | unlock_buffer(bh); |
363 | return bh; | 385 | goto verify; |
364 | } | 386 | } |
365 | ext4_lock_group(sb, block_group); | 387 | ext4_lock_group(sb, block_group); |
366 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 388 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
@@ -379,7 +401,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) | |||
379 | */ | 401 | */ |
380 | set_bitmap_uptodate(bh); | 402 | set_bitmap_uptodate(bh); |
381 | unlock_buffer(bh); | 403 | unlock_buffer(bh); |
382 | return bh; | 404 | goto verify; |
383 | } | 405 | } |
384 | /* | 406 | /* |
385 | * submit the buffer_head for reading | 407 | * submit the buffer_head for reading |
@@ -390,6 +412,9 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) | |||
390 | get_bh(bh); | 412 | get_bh(bh); |
391 | submit_bh(READ, bh); | 413 | submit_bh(READ, bh); |
392 | return bh; | 414 | return bh; |
415 | verify: | ||
416 | ext4_validate_block_bitmap(sb, desc, block_group, bh); | ||
417 | return bh; | ||
393 | } | 418 | } |
394 | 419 | ||
395 | /* Returns 0 on success, 1 on error */ | 420 | /* Returns 0 on success, 1 on error */ |
@@ -412,7 +437,7 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, | |||
412 | } | 437 | } |
413 | clear_buffer_new(bh); | 438 | clear_buffer_new(bh); |
414 | /* Panic or remount fs read-only if block bitmap is invalid */ | 439 | /* Panic or remount fs read-only if block bitmap is invalid */ |
415 | ext4_valid_block_bitmap(sb, desc, block_group, bh); | 440 | ext4_validate_block_bitmap(sb, desc, block_group, bh); |
416 | return 0; | 441 | return 0; |
417 | } | 442 | } |
418 | 443 | ||
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index fa3af81ac565..b319721da26a 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c | |||
@@ -29,3 +29,86 @@ unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars) | |||
29 | 29 | ||
30 | #endif /* EXT4FS_DEBUG */ | 30 | #endif /* EXT4FS_DEBUG */ |
31 | 31 | ||
32 | int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, | ||
33 | struct ext4_group_desc *gdp, | ||
34 | struct buffer_head *bh, int sz) | ||
35 | { | ||
36 | __u32 hi; | ||
37 | __u32 provided, calculated; | ||
38 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
39 | |||
40 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
41 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
42 | return 1; | ||
43 | |||
44 | provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo); | ||
45 | calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); | ||
46 | if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) { | ||
47 | hi = le16_to_cpu(gdp->bg_inode_bitmap_csum_hi); | ||
48 | provided |= (hi << 16); | ||
49 | } else | ||
50 | calculated &= 0xFFFF; | ||
51 | |||
52 | return provided == calculated; | ||
53 | } | ||
54 | |||
55 | void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, | ||
56 | struct ext4_group_desc *gdp, | ||
57 | struct buffer_head *bh, int sz) | ||
58 | { | ||
59 | __u32 csum; | ||
60 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
61 | |||
62 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
63 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
64 | return; | ||
65 | |||
66 | csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); | ||
67 | gdp->bg_inode_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF); | ||
68 | if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) | ||
69 | gdp->bg_inode_bitmap_csum_hi = cpu_to_le16(csum >> 16); | ||
70 | } | ||
71 | |||
72 | int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, | ||
73 | struct ext4_group_desc *gdp, | ||
74 | struct buffer_head *bh, int sz) | ||
75 | { | ||
76 | __u32 hi; | ||
77 | __u32 provided, calculated; | ||
78 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
79 | |||
80 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
81 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
82 | return 1; | ||
83 | |||
84 | provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo); | ||
85 | calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); | ||
86 | if (sbi->s_desc_size >= EXT4_BG_BLOCK_BITMAP_CSUM_HI_END) { | ||
87 | hi = le16_to_cpu(gdp->bg_block_bitmap_csum_hi); | ||
88 | provided |= (hi << 16); | ||
89 | } else | ||
90 | calculated &= 0xFFFF; | ||
91 | |||
92 | if (provided == calculated) | ||
93 | return 1; | ||
94 | |||
95 | ext4_error(sb, "Bad block bitmap checksum: block_group = %u", group); | ||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, | ||
100 | struct ext4_group_desc *gdp, | ||
101 | struct buffer_head *bh, int sz) | ||
102 | { | ||
103 | __u32 csum; | ||
104 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
105 | |||
106 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
107 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
108 | return; | ||
109 | |||
110 | csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); | ||
111 | gdp->bg_block_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF); | ||
112 | if (sbi->s_desc_size >= EXT4_BG_BLOCK_BITMAP_CSUM_HI_END) | ||
113 | gdp->bg_block_bitmap_csum_hi = cpu_to_le16(csum >> 16); | ||
114 | } | ||
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index b86786202643..aa39e600d159 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -179,6 +179,18 @@ static int ext4_readdir(struct file *filp, | |||
179 | continue; | 179 | continue; |
180 | } | 180 | } |
181 | 181 | ||
182 | /* Check the checksum */ | ||
183 | if (!buffer_verified(bh) && | ||
184 | !ext4_dirent_csum_verify(inode, | ||
185 | (struct ext4_dir_entry *)bh->b_data)) { | ||
186 | EXT4_ERROR_FILE(filp, 0, "directory fails checksum " | ||
187 | "at offset %llu", | ||
188 | (unsigned long long)filp->f_pos); | ||
189 | filp->f_pos += sb->s_blocksize - offset; | ||
190 | continue; | ||
191 | } | ||
192 | set_buffer_verified(bh); | ||
193 | |||
182 | revalidate: | 194 | revalidate: |
183 | /* If the dir block has changed since the last call to | 195 | /* If the dir block has changed since the last call to |
184 | * readdir(2), then we might be pointing to an invalid | 196 | * readdir(2), then we might be pointing to an invalid |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c21b1de51afb..cfc4e01b3c83 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/wait.h> | 29 | #include <linux/wait.h> |
30 | #include <linux/blockgroup_lock.h> | 30 | #include <linux/blockgroup_lock.h> |
31 | #include <linux/percpu_counter.h> | 31 | #include <linux/percpu_counter.h> |
32 | #include <crypto/hash.h> | ||
32 | #ifdef __KERNEL__ | 33 | #ifdef __KERNEL__ |
33 | #include <linux/compat.h> | 34 | #include <linux/compat.h> |
34 | #endif | 35 | #endif |
@@ -298,7 +299,9 @@ struct ext4_group_desc | |||
298 | __le16 bg_free_inodes_count_lo;/* Free inodes count */ | 299 | __le16 bg_free_inodes_count_lo;/* Free inodes count */ |
299 | __le16 bg_used_dirs_count_lo; /* Directories count */ | 300 | __le16 bg_used_dirs_count_lo; /* Directories count */ |
300 | __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ | 301 | __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ |
301 | __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ | 302 | __le32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */ |
303 | __le16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bbitmap) LE */ | ||
304 | __le16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+ibitmap) LE */ | ||
302 | __le16 bg_itable_unused_lo; /* Unused inodes count */ | 305 | __le16 bg_itable_unused_lo; /* Unused inodes count */ |
303 | __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ | 306 | __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ |
304 | __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ | 307 | __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ |
@@ -308,9 +311,19 @@ struct ext4_group_desc | |||
308 | __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ | 311 | __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */ |
309 | __le16 bg_used_dirs_count_hi; /* Directories count MSB */ | 312 | __le16 bg_used_dirs_count_hi; /* Directories count MSB */ |
310 | __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ | 313 | __le16 bg_itable_unused_hi; /* Unused inodes count MSB */ |
311 | __u32 bg_reserved2[3]; | 314 | __le32 bg_exclude_bitmap_hi; /* Exclude bitmap block MSB */ |
315 | __le16 bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bbitmap) BE */ | ||
316 | __le16 bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+ibitmap) BE */ | ||
317 | __u32 bg_reserved; | ||
312 | }; | 318 | }; |
313 | 319 | ||
320 | #define EXT4_BG_INODE_BITMAP_CSUM_HI_END \ | ||
321 | (offsetof(struct ext4_group_desc, bg_inode_bitmap_csum_hi) + \ | ||
322 | sizeof(__le16)) | ||
323 | #define EXT4_BG_BLOCK_BITMAP_CSUM_HI_END \ | ||
324 | (offsetof(struct ext4_group_desc, bg_block_bitmap_csum_hi) + \ | ||
325 | sizeof(__le16)) | ||
326 | |||
314 | /* | 327 | /* |
315 | * Structure of a flex block group info | 328 | * Structure of a flex block group info |
316 | */ | 329 | */ |
@@ -650,7 +663,8 @@ struct ext4_inode { | |||
650 | __le16 l_i_file_acl_high; | 663 | __le16 l_i_file_acl_high; |
651 | __le16 l_i_uid_high; /* these 2 fields */ | 664 | __le16 l_i_uid_high; /* these 2 fields */ |
652 | __le16 l_i_gid_high; /* were reserved2[0] */ | 665 | __le16 l_i_gid_high; /* were reserved2[0] */ |
653 | __u32 l_i_reserved2; | 666 | __le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */ |
667 | __le16 l_i_reserved; | ||
654 | } linux2; | 668 | } linux2; |
655 | struct { | 669 | struct { |
656 | __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ | 670 | __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ |
@@ -666,7 +680,7 @@ struct ext4_inode { | |||
666 | } masix2; | 680 | } masix2; |
667 | } osd2; /* OS dependent 2 */ | 681 | } osd2; /* OS dependent 2 */ |
668 | __le16 i_extra_isize; | 682 | __le16 i_extra_isize; |
669 | __le16 i_pad1; | 683 | __le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */ |
670 | __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ | 684 | __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ |
671 | __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ | 685 | __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ |
672 | __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ | 686 | __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ |
@@ -768,7 +782,7 @@ do { \ | |||
768 | #define i_gid_low i_gid | 782 | #define i_gid_low i_gid |
769 | #define i_uid_high osd2.linux2.l_i_uid_high | 783 | #define i_uid_high osd2.linux2.l_i_uid_high |
770 | #define i_gid_high osd2.linux2.l_i_gid_high | 784 | #define i_gid_high osd2.linux2.l_i_gid_high |
771 | #define i_reserved2 osd2.linux2.l_i_reserved2 | 785 | #define i_checksum_lo osd2.linux2.l_i_checksum_lo |
772 | 786 | ||
773 | #elif defined(__GNU__) | 787 | #elif defined(__GNU__) |
774 | 788 | ||
@@ -908,6 +922,9 @@ struct ext4_inode_info { | |||
908 | */ | 922 | */ |
909 | tid_t i_sync_tid; | 923 | tid_t i_sync_tid; |
910 | tid_t i_datasync_tid; | 924 | tid_t i_datasync_tid; |
925 | |||
926 | /* Precomputed uuid+inum+igen checksum for seeding inode checksums */ | ||
927 | __u32 i_csum_seed; | ||
911 | }; | 928 | }; |
912 | 929 | ||
913 | /* | 930 | /* |
@@ -1001,6 +1018,9 @@ extern void ext4_set_bits(void *bm, int cur, int len); | |||
1001 | #define EXT4_ERRORS_PANIC 3 /* Panic */ | 1018 | #define EXT4_ERRORS_PANIC 3 /* Panic */ |
1002 | #define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE | 1019 | #define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE |
1003 | 1020 | ||
1021 | /* Metadata checksum algorithm codes */ | ||
1022 | #define EXT4_CRC32C_CHKSUM 1 | ||
1023 | |||
1004 | /* | 1024 | /* |
1005 | * Structure of the super block | 1025 | * Structure of the super block |
1006 | */ | 1026 | */ |
@@ -1087,7 +1107,7 @@ struct ext4_super_block { | |||
1087 | __le64 s_mmp_block; /* Block for multi-mount protection */ | 1107 | __le64 s_mmp_block; /* Block for multi-mount protection */ |
1088 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ | 1108 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ |
1089 | __u8 s_log_groups_per_flex; /* FLEX_BG group size */ | 1109 | __u8 s_log_groups_per_flex; /* FLEX_BG group size */ |
1090 | __u8 s_reserved_char_pad; | 1110 | __u8 s_checksum_type; /* metadata checksum algorithm used */ |
1091 | __le16 s_reserved_pad; | 1111 | __le16 s_reserved_pad; |
1092 | __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ | 1112 | __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ |
1093 | __le32 s_snapshot_inum; /* Inode number of active snapshot */ | 1113 | __le32 s_snapshot_inum; /* Inode number of active snapshot */ |
@@ -1113,7 +1133,8 @@ struct ext4_super_block { | |||
1113 | __le32 s_usr_quota_inum; /* inode for tracking user quota */ | 1133 | __le32 s_usr_quota_inum; /* inode for tracking user quota */ |
1114 | __le32 s_grp_quota_inum; /* inode for tracking group quota */ | 1134 | __le32 s_grp_quota_inum; /* inode for tracking group quota */ |
1115 | __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ | 1135 | __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ |
1116 | __le32 s_reserved[109]; /* Padding to the end of the block */ | 1136 | __le32 s_reserved[108]; /* Padding to the end of the block */ |
1137 | __le32 s_checksum; /* crc32c(superblock) */ | ||
1117 | }; | 1138 | }; |
1118 | 1139 | ||
1119 | #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) | 1140 | #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) |
@@ -1176,6 +1197,7 @@ struct ext4_sb_info { | |||
1176 | struct proc_dir_entry *s_proc; | 1197 | struct proc_dir_entry *s_proc; |
1177 | struct kobject s_kobj; | 1198 | struct kobject s_kobj; |
1178 | struct completion s_kobj_unregister; | 1199 | struct completion s_kobj_unregister; |
1200 | struct super_block *s_sb; | ||
1179 | 1201 | ||
1180 | /* Journaling */ | 1202 | /* Journaling */ |
1181 | struct journal_s *s_journal; | 1203 | struct journal_s *s_journal; |
@@ -1266,6 +1288,12 @@ struct ext4_sb_info { | |||
1266 | 1288 | ||
1267 | /* record the last minlen when FITRIM is called. */ | 1289 | /* record the last minlen when FITRIM is called. */ |
1268 | atomic_t s_last_trim_minblks; | 1290 | atomic_t s_last_trim_minblks; |
1291 | |||
1292 | /* Reference to checksum algorithm driver via cryptoapi */ | ||
1293 | struct crypto_shash *s_chksum_driver; | ||
1294 | |||
1295 | /* Precomputed FS UUID checksum for seeding other checksums */ | ||
1296 | __u32 s_csum_seed; | ||
1269 | }; | 1297 | }; |
1270 | 1298 | ||
1271 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | 1299 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
@@ -1414,6 +1442,12 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | |||
1414 | #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 | 1442 | #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 |
1415 | #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 | 1443 | #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 |
1416 | #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 | 1444 | #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 |
1445 | /* | ||
1446 | * METADATA_CSUM also enables group descriptor checksums (GDT_CSUM). When | ||
1447 | * METADATA_CSUM is set, group descriptor checksums use the same algorithm as | ||
1448 | * all other data structures' checksums. However, the METADATA_CSUM and | ||
1449 | * GDT_CSUM bits are mutually exclusive. | ||
1450 | */ | ||
1417 | #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 | 1451 | #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 |
1418 | 1452 | ||
1419 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 | 1453 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 |
@@ -1461,7 +1495,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | |||
1461 | EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ | 1495 | EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ |
1462 | EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ | 1496 | EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ |
1463 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\ | 1497 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\ |
1464 | EXT4_FEATURE_RO_COMPAT_BIGALLOC) | 1498 | EXT4_FEATURE_RO_COMPAT_BIGALLOC |\ |
1499 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) | ||
1465 | 1500 | ||
1466 | /* | 1501 | /* |
1467 | * Default values for user and/or group using reserved blocks | 1502 | * Default values for user and/or group using reserved blocks |
@@ -1527,6 +1562,18 @@ struct ext4_dir_entry_2 { | |||
1527 | }; | 1562 | }; |
1528 | 1563 | ||
1529 | /* | 1564 | /* |
1565 | * This is a bogus directory entry at the end of each leaf block that | ||
1566 | * records checksums. | ||
1567 | */ | ||
1568 | struct ext4_dir_entry_tail { | ||
1569 | __le32 det_reserved_zero1; /* Pretend to be unused */ | ||
1570 | __le16 det_rec_len; /* 12 */ | ||
1571 | __u8 det_reserved_zero2; /* Zero name length */ | ||
1572 | __u8 det_reserved_ft; /* 0xDE, fake file type */ | ||
1573 | __le32 det_checksum; /* crc32c(uuid+inum+dirblock) */ | ||
1574 | }; | ||
1575 | |||
1576 | /* | ||
1530 | * Ext4 directory file types. Only the low 3 bits are used. The | 1577 | * Ext4 directory file types. Only the low 3 bits are used. The |
1531 | * other bits are reserved for now. | 1578 | * other bits are reserved for now. |
1532 | */ | 1579 | */ |
@@ -1541,6 +1588,8 @@ struct ext4_dir_entry_2 { | |||
1541 | 1588 | ||
1542 | #define EXT4_FT_MAX 8 | 1589 | #define EXT4_FT_MAX 8 |
1543 | 1590 | ||
1591 | #define EXT4_FT_DIR_CSUM 0xDE | ||
1592 | |||
1544 | /* | 1593 | /* |
1545 | * EXT4_DIR_PAD defines the directory entries boundaries | 1594 | * EXT4_DIR_PAD defines the directory entries boundaries |
1546 | * | 1595 | * |
@@ -1609,6 +1658,25 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) | |||
1609 | #define DX_HASH_HALF_MD4_UNSIGNED 4 | 1658 | #define DX_HASH_HALF_MD4_UNSIGNED 4 |
1610 | #define DX_HASH_TEA_UNSIGNED 5 | 1659 | #define DX_HASH_TEA_UNSIGNED 5 |
1611 | 1660 | ||
1661 | static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, | ||
1662 | const void *address, unsigned int length) | ||
1663 | { | ||
1664 | struct { | ||
1665 | struct shash_desc shash; | ||
1666 | char ctx[crypto_shash_descsize(sbi->s_chksum_driver)]; | ||
1667 | } desc; | ||
1668 | int err; | ||
1669 | |||
1670 | desc.shash.tfm = sbi->s_chksum_driver; | ||
1671 | desc.shash.flags = 0; | ||
1672 | *(u32 *)desc.ctx = crc; | ||
1673 | |||
1674 | err = crypto_shash_update(&desc.shash, address, length); | ||
1675 | BUG_ON(err); | ||
1676 | |||
1677 | return *(u32 *)desc.ctx; | ||
1678 | } | ||
1679 | |||
1612 | #ifdef __KERNEL__ | 1680 | #ifdef __KERNEL__ |
1613 | 1681 | ||
1614 | /* hash info structure used by the directory hash */ | 1682 | /* hash info structure used by the directory hash */ |
@@ -1741,7 +1809,8 @@ struct mmp_struct { | |||
1741 | __le16 mmp_check_interval; | 1809 | __le16 mmp_check_interval; |
1742 | 1810 | ||
1743 | __le16 mmp_pad1; | 1811 | __le16 mmp_pad1; |
1744 | __le32 mmp_pad2[227]; | 1812 | __le32 mmp_pad2[226]; |
1813 | __le32 mmp_checksum; /* crc32c(uuid+mmp_block) */ | ||
1745 | }; | 1814 | }; |
1746 | 1815 | ||
1747 | /* arguments passed to the mmp thread */ | 1816 | /* arguments passed to the mmp thread */ |
@@ -1784,8 +1853,24 @@ struct mmpd_data { | |||
1784 | 1853 | ||
1785 | /* bitmap.c */ | 1854 | /* bitmap.c */ |
1786 | extern unsigned int ext4_count_free(struct buffer_head *, unsigned); | 1855 | extern unsigned int ext4_count_free(struct buffer_head *, unsigned); |
1856 | void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, | ||
1857 | struct ext4_group_desc *gdp, | ||
1858 | struct buffer_head *bh, int sz); | ||
1859 | int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, | ||
1860 | struct ext4_group_desc *gdp, | ||
1861 | struct buffer_head *bh, int sz); | ||
1862 | void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, | ||
1863 | struct ext4_group_desc *gdp, | ||
1864 | struct buffer_head *bh, int sz); | ||
1865 | int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, | ||
1866 | struct ext4_group_desc *gdp, | ||
1867 | struct buffer_head *bh, int sz); | ||
1787 | 1868 | ||
1788 | /* balloc.c */ | 1869 | /* balloc.c */ |
1870 | extern void ext4_validate_block_bitmap(struct super_block *sb, | ||
1871 | struct ext4_group_desc *desc, | ||
1872 | unsigned int block_group, | ||
1873 | struct buffer_head *bh); | ||
1789 | extern unsigned int ext4_block_group(struct super_block *sb, | 1874 | extern unsigned int ext4_block_group(struct super_block *sb, |
1790 | ext4_fsblk_t blocknr); | 1875 | ext4_fsblk_t blocknr); |
1791 | extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, | 1876 | extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, |
@@ -1864,7 +1949,7 @@ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate); | |||
1864 | /* mballoc.c */ | 1949 | /* mballoc.c */ |
1865 | extern long ext4_mb_stats; | 1950 | extern long ext4_mb_stats; |
1866 | extern long ext4_mb_max_to_scan; | 1951 | extern long ext4_mb_max_to_scan; |
1867 | extern int ext4_mb_init(struct super_block *, int); | 1952 | extern int ext4_mb_init(struct super_block *); |
1868 | extern int ext4_mb_release(struct super_block *); | 1953 | extern int ext4_mb_release(struct super_block *); |
1869 | extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, | 1954 | extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, |
1870 | struct ext4_allocation_request *, int *); | 1955 | struct ext4_allocation_request *, int *); |
@@ -1936,6 +2021,8 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); | |||
1936 | extern int ext4_ext_migrate(struct inode *); | 2021 | extern int ext4_ext_migrate(struct inode *); |
1937 | 2022 | ||
1938 | /* namei.c */ | 2023 | /* namei.c */ |
2024 | extern int ext4_dirent_csum_verify(struct inode *inode, | ||
2025 | struct ext4_dir_entry *dirent); | ||
1939 | extern int ext4_orphan_add(handle_t *, struct inode *); | 2026 | extern int ext4_orphan_add(handle_t *, struct inode *); |
1940 | extern int ext4_orphan_del(handle_t *, struct inode *); | 2027 | extern int ext4_orphan_del(handle_t *, struct inode *); |
1941 | extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | 2028 | extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, |
@@ -1950,6 +2037,10 @@ extern int ext4_group_extend(struct super_block *sb, | |||
1950 | extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); | 2037 | extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); |
1951 | 2038 | ||
1952 | /* super.c */ | 2039 | /* super.c */ |
2040 | extern int ext4_superblock_csum_verify(struct super_block *sb, | ||
2041 | struct ext4_super_block *es); | ||
2042 | extern void ext4_superblock_csum_set(struct super_block *sb, | ||
2043 | struct ext4_super_block *es); | ||
1953 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); | 2044 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); |
1954 | extern void *ext4_kvzalloc(size_t size, gfp_t flags); | 2045 | extern void *ext4_kvzalloc(size_t size, gfp_t flags); |
1955 | extern void ext4_kvfree(void *ptr); | 2046 | extern void ext4_kvfree(void *ptr); |
@@ -2025,10 +2116,17 @@ extern void ext4_used_dirs_set(struct super_block *sb, | |||
2025 | struct ext4_group_desc *bg, __u32 count); | 2116 | struct ext4_group_desc *bg, __u32 count); |
2026 | extern void ext4_itable_unused_set(struct super_block *sb, | 2117 | extern void ext4_itable_unused_set(struct super_block *sb, |
2027 | struct ext4_group_desc *bg, __u32 count); | 2118 | struct ext4_group_desc *bg, __u32 count); |
2028 | extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, | 2119 | extern int ext4_group_desc_csum_verify(struct super_block *sb, __u32 group, |
2029 | struct ext4_group_desc *gdp); | ||
2030 | extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, | ||
2031 | struct ext4_group_desc *gdp); | 2120 | struct ext4_group_desc *gdp); |
2121 | extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group, | ||
2122 | struct ext4_group_desc *gdp); | ||
2123 | |||
2124 | static inline int ext4_has_group_desc_csum(struct super_block *sb) | ||
2125 | { | ||
2126 | return EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
2127 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM | | ||
2128 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM); | ||
2129 | } | ||
2032 | 2130 | ||
2033 | static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) | 2131 | static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) |
2034 | { | 2132 | { |
@@ -2225,6 +2323,9 @@ static inline void ext4_unlock_group(struct super_block *sb, | |||
2225 | 2323 | ||
2226 | static inline void ext4_mark_super_dirty(struct super_block *sb) | 2324 | static inline void ext4_mark_super_dirty(struct super_block *sb) |
2227 | { | 2325 | { |
2326 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
2327 | |||
2328 | ext4_superblock_csum_set(sb, es); | ||
2228 | if (EXT4_SB(sb)->s_journal == NULL) | 2329 | if (EXT4_SB(sb)->s_journal == NULL) |
2229 | sb->s_dirt =1; | 2330 | sb->s_dirt =1; |
2230 | } | 2331 | } |
@@ -2314,6 +2415,9 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io, | |||
2314 | 2415 | ||
2315 | /* mmp.c */ | 2416 | /* mmp.c */ |
2316 | extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); | 2417 | extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); |
2418 | extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp); | ||
2419 | extern int ext4_mmp_csum_verify(struct super_block *sb, | ||
2420 | struct mmp_struct *mmp); | ||
2317 | 2421 | ||
2318 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ | 2422 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ |
2319 | enum ext4_state_bits { | 2423 | enum ext4_state_bits { |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 0f58b86e3a02..cb1b2c919963 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -63,9 +63,22 @@ | |||
63 | * ext4_inode has i_block array (60 bytes total). | 63 | * ext4_inode has i_block array (60 bytes total). |
64 | * The first 12 bytes store ext4_extent_header; | 64 | * The first 12 bytes store ext4_extent_header; |
65 | * the remainder stores an array of ext4_extent. | 65 | * the remainder stores an array of ext4_extent. |
66 | * For non-inode extent blocks, ext4_extent_tail | ||
67 | * follows the array. | ||
66 | */ | 68 | */ |
67 | 69 | ||
68 | /* | 70 | /* |
71 | * This is the extent tail on-disk structure. | ||
72 | * All other extent structures are 12 bytes long. It turns out that | ||
73 | * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which | ||
74 | * covers all valid ext4 block sizes. Therefore, this tail structure can be | ||
75 | * crammed into the end of the block without having to rebalance the tree. | ||
76 | */ | ||
77 | struct ext4_extent_tail { | ||
78 | __le32 et_checksum; /* crc32c(uuid+inum+extent_block) */ | ||
79 | }; | ||
80 | |||
81 | /* | ||
69 | * This is the extent on-disk structure. | 82 | * This is the extent on-disk structure. |
70 | * It's used at the bottom of the tree. | 83 | * It's used at the bottom of the tree. |
71 | */ | 84 | */ |
@@ -101,6 +114,17 @@ struct ext4_extent_header { | |||
101 | 114 | ||
102 | #define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) | 115 | #define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) |
103 | 116 | ||
117 | #define EXT4_EXTENT_TAIL_OFFSET(hdr) \ | ||
118 | (sizeof(struct ext4_extent_header) + \ | ||
119 | (sizeof(struct ext4_extent) * le16_to_cpu((hdr)->eh_max))) | ||
120 | |||
121 | static inline struct ext4_extent_tail * | ||
122 | find_ext4_extent_tail(struct ext4_extent_header *eh) | ||
123 | { | ||
124 | return (struct ext4_extent_tail *)(((void *)eh) + | ||
125 | EXT4_EXTENT_TAIL_OFFSET(eh)); | ||
126 | } | ||
127 | |||
104 | /* | 128 | /* |
105 | * Array of ext4_ext_path contains path to some extent. | 129 | * Array of ext4_ext_path contains path to some extent. |
106 | * Creation/lookup routines use it for traversal/splitting/etc. | 130 | * Creation/lookup routines use it for traversal/splitting/etc. |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index aca179017582..90f7c2e84db1 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -138,16 +138,23 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, | |||
138 | } | 138 | } |
139 | 139 | ||
140 | int __ext4_handle_dirty_super(const char *where, unsigned int line, | 140 | int __ext4_handle_dirty_super(const char *where, unsigned int line, |
141 | handle_t *handle, struct super_block *sb) | 141 | handle_t *handle, struct super_block *sb, |
142 | int now) | ||
142 | { | 143 | { |
143 | struct buffer_head *bh = EXT4_SB(sb)->s_sbh; | 144 | struct buffer_head *bh = EXT4_SB(sb)->s_sbh; |
144 | int err = 0; | 145 | int err = 0; |
145 | 146 | ||
146 | if (ext4_handle_valid(handle)) { | 147 | if (ext4_handle_valid(handle)) { |
148 | ext4_superblock_csum_set(sb, | ||
149 | (struct ext4_super_block *)bh->b_data); | ||
147 | err = jbd2_journal_dirty_metadata(handle, bh); | 150 | err = jbd2_journal_dirty_metadata(handle, bh); |
148 | if (err) | 151 | if (err) |
149 | ext4_journal_abort_handle(where, line, __func__, | 152 | ext4_journal_abort_handle(where, line, __func__, |
150 | bh, handle, err); | 153 | bh, handle, err); |
154 | } else if (now) { | ||
155 | ext4_superblock_csum_set(sb, | ||
156 | (struct ext4_super_block *)bh->b_data); | ||
157 | mark_buffer_dirty(bh); | ||
151 | } else | 158 | } else |
152 | sb->s_dirt = 1; | 159 | sb->s_dirt = 1; |
153 | return err; | 160 | return err; |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 83b20fcf9400..f440e8f1841f 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -213,7 +213,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, | |||
213 | struct buffer_head *bh); | 213 | struct buffer_head *bh); |
214 | 214 | ||
215 | int __ext4_handle_dirty_super(const char *where, unsigned int line, | 215 | int __ext4_handle_dirty_super(const char *where, unsigned int line, |
216 | handle_t *handle, struct super_block *sb); | 216 | handle_t *handle, struct super_block *sb, |
217 | int now); | ||
217 | 218 | ||
218 | #define ext4_journal_get_write_access(handle, bh) \ | 219 | #define ext4_journal_get_write_access(handle, bh) \ |
219 | __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) | 220 | __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) |
@@ -225,8 +226,10 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, | |||
225 | #define ext4_handle_dirty_metadata(handle, inode, bh) \ | 226 | #define ext4_handle_dirty_metadata(handle, inode, bh) \ |
226 | __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \ | 227 | __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \ |
227 | (bh)) | 228 | (bh)) |
229 | #define ext4_handle_dirty_super_now(handle, sb) \ | ||
230 | __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb), 1) | ||
228 | #define ext4_handle_dirty_super(handle, sb) \ | 231 | #define ext4_handle_dirty_super(handle, sb) \ |
229 | __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) | 232 | __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb), 0) |
230 | 233 | ||
231 | handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); | 234 | handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); |
232 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); | 235 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index abcdeab67f52..91341ec6e06a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -52,6 +52,46 @@ | |||
52 | #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ | 52 | #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ |
53 | #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ | 53 | #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ |
54 | 54 | ||
55 | static __le32 ext4_extent_block_csum(struct inode *inode, | ||
56 | struct ext4_extent_header *eh) | ||
57 | { | ||
58 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
59 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
60 | __u32 csum; | ||
61 | |||
62 | csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)eh, | ||
63 | EXT4_EXTENT_TAIL_OFFSET(eh)); | ||
64 | return cpu_to_le32(csum); | ||
65 | } | ||
66 | |||
67 | static int ext4_extent_block_csum_verify(struct inode *inode, | ||
68 | struct ext4_extent_header *eh) | ||
69 | { | ||
70 | struct ext4_extent_tail *et; | ||
71 | |||
72 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
73 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
74 | return 1; | ||
75 | |||
76 | et = find_ext4_extent_tail(eh); | ||
77 | if (et->et_checksum != ext4_extent_block_csum(inode, eh)) | ||
78 | return 0; | ||
79 | return 1; | ||
80 | } | ||
81 | |||
82 | static void ext4_extent_block_csum_set(struct inode *inode, | ||
83 | struct ext4_extent_header *eh) | ||
84 | { | ||
85 | struct ext4_extent_tail *et; | ||
86 | |||
87 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
88 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
89 | return; | ||
90 | |||
91 | et = find_ext4_extent_tail(eh); | ||
92 | et->et_checksum = ext4_extent_block_csum(inode, eh); | ||
93 | } | ||
94 | |||
55 | static int ext4_split_extent(handle_t *handle, | 95 | static int ext4_split_extent(handle_t *handle, |
56 | struct inode *inode, | 96 | struct inode *inode, |
57 | struct ext4_ext_path *path, | 97 | struct ext4_ext_path *path, |
@@ -117,6 +157,7 @@ static int __ext4_ext_dirty(const char *where, unsigned int line, | |||
117 | { | 157 | { |
118 | int err; | 158 | int err; |
119 | if (path->p_bh) { | 159 | if (path->p_bh) { |
160 | ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh)); | ||
120 | /* path points to block */ | 161 | /* path points to block */ |
121 | err = __ext4_handle_dirty_metadata(where, line, handle, | 162 | err = __ext4_handle_dirty_metadata(where, line, handle, |
122 | inode, path->p_bh); | 163 | inode, path->p_bh); |
@@ -391,6 +432,12 @@ static int __ext4_ext_check(const char *function, unsigned int line, | |||
391 | error_msg = "invalid extent entries"; | 432 | error_msg = "invalid extent entries"; |
392 | goto corrupted; | 433 | goto corrupted; |
393 | } | 434 | } |
435 | /* Verify checksum on non-root extent tree nodes */ | ||
436 | if (ext_depth(inode) != depth && | ||
437 | !ext4_extent_block_csum_verify(inode, eh)) { | ||
438 | error_msg = "extent tree corrupted"; | ||
439 | goto corrupted; | ||
440 | } | ||
394 | return 0; | 441 | return 0; |
395 | 442 | ||
396 | corrupted: | 443 | corrupted: |
@@ -412,6 +459,26 @@ int ext4_ext_check_inode(struct inode *inode) | |||
412 | return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode)); | 459 | return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode)); |
413 | } | 460 | } |
414 | 461 | ||
462 | static int __ext4_ext_check_block(const char *function, unsigned int line, | ||
463 | struct inode *inode, | ||
464 | struct ext4_extent_header *eh, | ||
465 | int depth, | ||
466 | struct buffer_head *bh) | ||
467 | { | ||
468 | int ret; | ||
469 | |||
470 | if (buffer_verified(bh)) | ||
471 | return 0; | ||
472 | ret = ext4_ext_check(inode, eh, depth); | ||
473 | if (ret) | ||
474 | return ret; | ||
475 | set_buffer_verified(bh); | ||
476 | return ret; | ||
477 | } | ||
478 | |||
479 | #define ext4_ext_check_block(inode, eh, depth, bh) \ | ||
480 | __ext4_ext_check_block(__func__, __LINE__, inode, eh, depth, bh) | ||
481 | |||
415 | #ifdef EXT_DEBUG | 482 | #ifdef EXT_DEBUG |
416 | static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) | 483 | static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) |
417 | { | 484 | { |
@@ -536,7 +603,7 @@ ext4_ext_binsearch_idx(struct inode *inode, | |||
536 | } | 603 | } |
537 | 604 | ||
538 | path->p_idx = l - 1; | 605 | path->p_idx = l - 1; |
539 | ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), | 606 | ext_debug(" -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block), |
540 | ext4_idx_pblock(path->p_idx)); | 607 | ext4_idx_pblock(path->p_idx)); |
541 | 608 | ||
542 | #ifdef CHECK_BINSEARCH | 609 | #ifdef CHECK_BINSEARCH |
@@ -668,8 +735,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
668 | i = depth; | 735 | i = depth; |
669 | /* walk through the tree */ | 736 | /* walk through the tree */ |
670 | while (i) { | 737 | while (i) { |
671 | int need_to_validate = 0; | ||
672 | |||
673 | ext_debug("depth %d: num %d, max %d\n", | 738 | ext_debug("depth %d: num %d, max %d\n", |
674 | ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); | 739 | ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); |
675 | 740 | ||
@@ -688,8 +753,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
688 | put_bh(bh); | 753 | put_bh(bh); |
689 | goto err; | 754 | goto err; |
690 | } | 755 | } |
691 | /* validate the extent entries */ | ||
692 | need_to_validate = 1; | ||
693 | } | 756 | } |
694 | eh = ext_block_hdr(bh); | 757 | eh = ext_block_hdr(bh); |
695 | ppos++; | 758 | ppos++; |
@@ -703,7 +766,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
703 | path[ppos].p_hdr = eh; | 766 | path[ppos].p_hdr = eh; |
704 | i--; | 767 | i--; |
705 | 768 | ||
706 | if (need_to_validate && ext4_ext_check(inode, eh, i)) | 769 | if (ext4_ext_check_block(inode, eh, i, bh)) |
707 | goto err; | 770 | goto err; |
708 | } | 771 | } |
709 | 772 | ||
@@ -914,6 +977,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
914 | le16_add_cpu(&neh->eh_entries, m); | 977 | le16_add_cpu(&neh->eh_entries, m); |
915 | } | 978 | } |
916 | 979 | ||
980 | ext4_extent_block_csum_set(inode, neh); | ||
917 | set_buffer_uptodate(bh); | 981 | set_buffer_uptodate(bh); |
918 | unlock_buffer(bh); | 982 | unlock_buffer(bh); |
919 | 983 | ||
@@ -992,6 +1056,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
992 | sizeof(struct ext4_extent_idx) * m); | 1056 | sizeof(struct ext4_extent_idx) * m); |
993 | le16_add_cpu(&neh->eh_entries, m); | 1057 | le16_add_cpu(&neh->eh_entries, m); |
994 | } | 1058 | } |
1059 | ext4_extent_block_csum_set(inode, neh); | ||
995 | set_buffer_uptodate(bh); | 1060 | set_buffer_uptodate(bh); |
996 | unlock_buffer(bh); | 1061 | unlock_buffer(bh); |
997 | 1062 | ||
@@ -1089,6 +1154,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1089 | else | 1154 | else |
1090 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); | 1155 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); |
1091 | neh->eh_magic = EXT4_EXT_MAGIC; | 1156 | neh->eh_magic = EXT4_EXT_MAGIC; |
1157 | ext4_extent_block_csum_set(inode, neh); | ||
1092 | set_buffer_uptodate(bh); | 1158 | set_buffer_uptodate(bh); |
1093 | unlock_buffer(bh); | 1159 | unlock_buffer(bh); |
1094 | 1160 | ||
@@ -1344,7 +1410,8 @@ got_index: | |||
1344 | return -EIO; | 1410 | return -EIO; |
1345 | eh = ext_block_hdr(bh); | 1411 | eh = ext_block_hdr(bh); |
1346 | /* subtract from p_depth to get proper eh_depth */ | 1412 | /* subtract from p_depth to get proper eh_depth */ |
1347 | if (ext4_ext_check(inode, eh, path->p_depth - depth)) { | 1413 | if (ext4_ext_check_block(inode, eh, |
1414 | path->p_depth - depth, bh)) { | ||
1348 | put_bh(bh); | 1415 | put_bh(bh); |
1349 | return -EIO; | 1416 | return -EIO; |
1350 | } | 1417 | } |
@@ -1357,7 +1424,7 @@ got_index: | |||
1357 | if (bh == NULL) | 1424 | if (bh == NULL) |
1358 | return -EIO; | 1425 | return -EIO; |
1359 | eh = ext_block_hdr(bh); | 1426 | eh = ext_block_hdr(bh); |
1360 | if (ext4_ext_check(inode, eh, path->p_depth - depth)) { | 1427 | if (ext4_ext_check_block(inode, eh, path->p_depth - depth, bh)) { |
1361 | put_bh(bh); | 1428 | put_bh(bh); |
1362 | return -EIO; | 1429 | return -EIO; |
1363 | } | 1430 | } |
@@ -2644,8 +2711,8 @@ cont: | |||
2644 | err = -EIO; | 2711 | err = -EIO; |
2645 | break; | 2712 | break; |
2646 | } | 2713 | } |
2647 | if (ext4_ext_check(inode, ext_block_hdr(bh), | 2714 | if (ext4_ext_check_block(inode, ext_block_hdr(bh), |
2648 | depth - i - 1)) { | 2715 | depth - i - 1, bh)) { |
2649 | err = -EIO; | 2716 | err = -EIO; |
2650 | break; | 2717 | break; |
2651 | } | 2718 | } |
@@ -4722,8 +4789,8 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
4722 | 4789 | ||
4723 | /* Now release the pages */ | 4790 | /* Now release the pages */ |
4724 | if (last_page_offset > first_page_offset) { | 4791 | if (last_page_offset > first_page_offset) { |
4725 | truncate_inode_pages_range(mapping, first_page_offset, | 4792 | truncate_pagecache_range(inode, first_page_offset, |
4726 | last_page_offset-1); | 4793 | last_page_offset - 1); |
4727 | } | 4794 | } |
4728 | 4795 | ||
4729 | /* finish any pending end_io work */ | 4796 | /* finish any pending end_io work */ |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index cb70f1812a70..8c7642a00054 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -95,7 +95,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
95 | { | 95 | { |
96 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; | 96 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; |
97 | int unaligned_aio = 0; | 97 | int unaligned_aio = 0; |
98 | int ret; | 98 | ssize_t ret; |
99 | 99 | ||
100 | /* | 100 | /* |
101 | * If we have encountered a bitmap-format file, the size limit | 101 | * If we have encountered a bitmap-format file, the size limit |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 9f9acac6c43f..d48e8b14928c 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -70,24 +70,27 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, | |||
70 | ext4_group_t block_group, | 70 | ext4_group_t block_group, |
71 | struct ext4_group_desc *gdp) | 71 | struct ext4_group_desc *gdp) |
72 | { | 72 | { |
73 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
74 | |||
75 | J_ASSERT_BH(bh, buffer_locked(bh)); | 73 | J_ASSERT_BH(bh, buffer_locked(bh)); |
76 | 74 | ||
77 | /* If checksum is bad mark all blocks and inodes use to prevent | 75 | /* If checksum is bad mark all blocks and inodes use to prevent |
78 | * allocation, essentially implementing a per-group read-only flag. */ | 76 | * allocation, essentially implementing a per-group read-only flag. */ |
79 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { | 77 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { |
80 | ext4_error(sb, "Checksum bad for group %u", block_group); | 78 | ext4_error(sb, "Checksum bad for group %u", block_group); |
81 | ext4_free_group_clusters_set(sb, gdp, 0); | 79 | ext4_free_group_clusters_set(sb, gdp, 0); |
82 | ext4_free_inodes_set(sb, gdp, 0); | 80 | ext4_free_inodes_set(sb, gdp, 0); |
83 | ext4_itable_unused_set(sb, gdp, 0); | 81 | ext4_itable_unused_set(sb, gdp, 0); |
84 | memset(bh->b_data, 0xff, sb->s_blocksize); | 82 | memset(bh->b_data, 0xff, sb->s_blocksize); |
83 | ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh, | ||
84 | EXT4_INODES_PER_GROUP(sb) / 8); | ||
85 | return 0; | 85 | return 0; |
86 | } | 86 | } |
87 | 87 | ||
88 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); | 88 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); |
89 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | 89 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, |
90 | bh->b_data); | 90 | bh->b_data); |
91 | ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh, | ||
92 | EXT4_INODES_PER_GROUP(sb) / 8); | ||
93 | ext4_group_desc_csum_set(sb, block_group, gdp); | ||
91 | 94 | ||
92 | return EXT4_INODES_PER_GROUP(sb); | 95 | return EXT4_INODES_PER_GROUP(sb); |
93 | } | 96 | } |
@@ -128,12 +131,12 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
128 | return NULL; | 131 | return NULL; |
129 | } | 132 | } |
130 | if (bitmap_uptodate(bh)) | 133 | if (bitmap_uptodate(bh)) |
131 | return bh; | 134 | goto verify; |
132 | 135 | ||
133 | lock_buffer(bh); | 136 | lock_buffer(bh); |
134 | if (bitmap_uptodate(bh)) { | 137 | if (bitmap_uptodate(bh)) { |
135 | unlock_buffer(bh); | 138 | unlock_buffer(bh); |
136 | return bh; | 139 | goto verify; |
137 | } | 140 | } |
138 | 141 | ||
139 | ext4_lock_group(sb, block_group); | 142 | ext4_lock_group(sb, block_group); |
@@ -141,6 +144,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
141 | ext4_init_inode_bitmap(sb, bh, block_group, desc); | 144 | ext4_init_inode_bitmap(sb, bh, block_group, desc); |
142 | set_bitmap_uptodate(bh); | 145 | set_bitmap_uptodate(bh); |
143 | set_buffer_uptodate(bh); | 146 | set_buffer_uptodate(bh); |
147 | set_buffer_verified(bh); | ||
144 | ext4_unlock_group(sb, block_group); | 148 | ext4_unlock_group(sb, block_group); |
145 | unlock_buffer(bh); | 149 | unlock_buffer(bh); |
146 | return bh; | 150 | return bh; |
@@ -154,7 +158,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
154 | */ | 158 | */ |
155 | set_bitmap_uptodate(bh); | 159 | set_bitmap_uptodate(bh); |
156 | unlock_buffer(bh); | 160 | unlock_buffer(bh); |
157 | return bh; | 161 | goto verify; |
158 | } | 162 | } |
159 | /* | 163 | /* |
160 | * submit the buffer_head for reading | 164 | * submit the buffer_head for reading |
@@ -171,6 +175,20 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
171 | block_group, bitmap_blk); | 175 | block_group, bitmap_blk); |
172 | return NULL; | 176 | return NULL; |
173 | } | 177 | } |
178 | |||
179 | verify: | ||
180 | ext4_lock_group(sb, block_group); | ||
181 | if (!buffer_verified(bh) && | ||
182 | !ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, | ||
183 | EXT4_INODES_PER_GROUP(sb) / 8)) { | ||
184 | ext4_unlock_group(sb, block_group); | ||
185 | put_bh(bh); | ||
186 | ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " | ||
187 | "inode_bitmap = %llu", block_group, bitmap_blk); | ||
188 | return NULL; | ||
189 | } | ||
190 | ext4_unlock_group(sb, block_group); | ||
191 | set_buffer_verified(bh); | ||
174 | return bh; | 192 | return bh; |
175 | } | 193 | } |
176 | 194 | ||
@@ -276,7 +294,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
276 | ext4_used_dirs_set(sb, gdp, count); | 294 | ext4_used_dirs_set(sb, gdp, count); |
277 | percpu_counter_dec(&sbi->s_dirs_counter); | 295 | percpu_counter_dec(&sbi->s_dirs_counter); |
278 | } | 296 | } |
279 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); | 297 | ext4_inode_bitmap_csum_set(sb, block_group, gdp, bitmap_bh, |
298 | EXT4_INODES_PER_GROUP(sb) / 8); | ||
299 | ext4_group_desc_csum_set(sb, block_group, gdp); | ||
280 | ext4_unlock_group(sb, block_group); | 300 | ext4_unlock_group(sb, block_group); |
281 | 301 | ||
282 | percpu_counter_inc(&sbi->s_freeinodes_counter); | 302 | percpu_counter_inc(&sbi->s_freeinodes_counter); |
@@ -488,10 +508,12 @@ fallback_retry: | |||
488 | for (i = 0; i < ngroups; i++) { | 508 | for (i = 0; i < ngroups; i++) { |
489 | grp = (parent_group + i) % ngroups; | 509 | grp = (parent_group + i) % ngroups; |
490 | desc = ext4_get_group_desc(sb, grp, NULL); | 510 | desc = ext4_get_group_desc(sb, grp, NULL); |
491 | grp_free = ext4_free_inodes_count(sb, desc); | 511 | if (desc) { |
492 | if (desc && grp_free && grp_free >= avefreei) { | 512 | grp_free = ext4_free_inodes_count(sb, desc); |
493 | *group = grp; | 513 | if (grp_free && grp_free >= avefreei) { |
494 | return 0; | 514 | *group = grp; |
515 | return 0; | ||
516 | } | ||
495 | } | 517 | } |
496 | } | 518 | } |
497 | 519 | ||
@@ -709,7 +731,7 @@ repeat_in_this_group: | |||
709 | 731 | ||
710 | got: | 732 | got: |
711 | /* We may have to initialize the block bitmap if it isn't already */ | 733 | /* We may have to initialize the block bitmap if it isn't already */ |
712 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && | 734 | if (ext4_has_group_desc_csum(sb) && |
713 | gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 735 | gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
714 | struct buffer_head *block_bitmap_bh; | 736 | struct buffer_head *block_bitmap_bh; |
715 | 737 | ||
@@ -731,8 +753,11 @@ got: | |||
731 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | 753 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); |
732 | ext4_free_group_clusters_set(sb, gdp, | 754 | ext4_free_group_clusters_set(sb, gdp, |
733 | ext4_free_clusters_after_init(sb, group, gdp)); | 755 | ext4_free_clusters_after_init(sb, group, gdp)); |
734 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, | 756 | ext4_block_bitmap_csum_set(sb, group, gdp, |
735 | gdp); | 757 | block_bitmap_bh, |
758 | EXT4_BLOCKS_PER_GROUP(sb) / | ||
759 | 8); | ||
760 | ext4_group_desc_csum_set(sb, group, gdp); | ||
736 | } | 761 | } |
737 | ext4_unlock_group(sb, group); | 762 | ext4_unlock_group(sb, group); |
738 | 763 | ||
@@ -751,7 +776,7 @@ got: | |||
751 | goto fail; | 776 | goto fail; |
752 | 777 | ||
753 | /* Update the relevant bg descriptor fields */ | 778 | /* Update the relevant bg descriptor fields */ |
754 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { | 779 | if (ext4_has_group_desc_csum(sb)) { |
755 | int free; | 780 | int free; |
756 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 781 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
757 | 782 | ||
@@ -772,7 +797,10 @@ got: | |||
772 | ext4_itable_unused_set(sb, gdp, | 797 | ext4_itable_unused_set(sb, gdp, |
773 | (EXT4_INODES_PER_GROUP(sb) - ino)); | 798 | (EXT4_INODES_PER_GROUP(sb) - ino)); |
774 | up_read(&grp->alloc_sem); | 799 | up_read(&grp->alloc_sem); |
800 | } else { | ||
801 | ext4_lock_group(sb, group); | ||
775 | } | 802 | } |
803 | |||
776 | ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); | 804 | ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); |
777 | if (S_ISDIR(mode)) { | 805 | if (S_ISDIR(mode)) { |
778 | ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); | 806 | ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); |
@@ -782,10 +810,12 @@ got: | |||
782 | atomic_inc(&sbi->s_flex_groups[f].used_dirs); | 810 | atomic_inc(&sbi->s_flex_groups[f].used_dirs); |
783 | } | 811 | } |
784 | } | 812 | } |
785 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { | 813 | if (ext4_has_group_desc_csum(sb)) { |
786 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | 814 | ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh, |
787 | ext4_unlock_group(sb, group); | 815 | EXT4_INODES_PER_GROUP(sb) / 8); |
816 | ext4_group_desc_csum_set(sb, group, gdp); | ||
788 | } | 817 | } |
818 | ext4_unlock_group(sb, group); | ||
789 | 819 | ||
790 | BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); | 820 | BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); |
791 | err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); | 821 | err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); |
@@ -850,6 +880,19 @@ got: | |||
850 | inode->i_generation = sbi->s_next_generation++; | 880 | inode->i_generation = sbi->s_next_generation++; |
851 | spin_unlock(&sbi->s_next_gen_lock); | 881 | spin_unlock(&sbi->s_next_gen_lock); |
852 | 882 | ||
883 | /* Precompute checksum seed for inode metadata */ | ||
884 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
885 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { | ||
886 | __u32 csum; | ||
887 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
888 | __le32 inum = cpu_to_le32(inode->i_ino); | ||
889 | __le32 gen = cpu_to_le32(inode->i_generation); | ||
890 | csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, | ||
891 | sizeof(inum)); | ||
892 | ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, | ||
893 | sizeof(gen)); | ||
894 | } | ||
895 | |||
853 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ | 896 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ |
854 | ext4_set_inode_state(inode, EXT4_STATE_NEW); | 897 | ext4_set_inode_state(inode, EXT4_STATE_NEW); |
855 | 898 | ||
@@ -1140,7 +1183,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | |||
1140 | skip_zeroout: | 1183 | skip_zeroout: |
1141 | ext4_lock_group(sb, group); | 1184 | ext4_lock_group(sb, group); |
1142 | gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); | 1185 | gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); |
1143 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | 1186 | ext4_group_desc_csum_set(sb, group, gdp); |
1144 | ext4_unlock_group(sb, group); | 1187 | ext4_unlock_group(sb, group); |
1145 | 1188 | ||
1146 | BUFFER_TRACE(group_desc_bh, | 1189 | BUFFER_TRACE(group_desc_bh, |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 07eaf565fdcb..02bc8cbe7281 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -47,6 +47,73 @@ | |||
47 | 47 | ||
48 | #define MPAGE_DA_EXTENT_TAIL 0x01 | 48 | #define MPAGE_DA_EXTENT_TAIL 0x01 |
49 | 49 | ||
50 | static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, | ||
51 | struct ext4_inode_info *ei) | ||
52 | { | ||
53 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
54 | __u16 csum_lo; | ||
55 | __u16 csum_hi = 0; | ||
56 | __u32 csum; | ||
57 | |||
58 | csum_lo = raw->i_checksum_lo; | ||
59 | raw->i_checksum_lo = 0; | ||
60 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && | ||
61 | EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) { | ||
62 | csum_hi = raw->i_checksum_hi; | ||
63 | raw->i_checksum_hi = 0; | ||
64 | } | ||
65 | |||
66 | csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, | ||
67 | EXT4_INODE_SIZE(inode->i_sb)); | ||
68 | |||
69 | raw->i_checksum_lo = csum_lo; | ||
70 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && | ||
71 | EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) | ||
72 | raw->i_checksum_hi = csum_hi; | ||
73 | |||
74 | return csum; | ||
75 | } | ||
76 | |||
77 | static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw, | ||
78 | struct ext4_inode_info *ei) | ||
79 | { | ||
80 | __u32 provided, calculated; | ||
81 | |||
82 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | ||
83 | cpu_to_le32(EXT4_OS_LINUX) || | ||
84 | !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
85 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
86 | return 1; | ||
87 | |||
88 | provided = le16_to_cpu(raw->i_checksum_lo); | ||
89 | calculated = ext4_inode_csum(inode, raw, ei); | ||
90 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && | ||
91 | EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) | ||
92 | provided |= ((__u32)le16_to_cpu(raw->i_checksum_hi)) << 16; | ||
93 | else | ||
94 | calculated &= 0xFFFF; | ||
95 | |||
96 | return provided == calculated; | ||
97 | } | ||
98 | |||
99 | static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, | ||
100 | struct ext4_inode_info *ei) | ||
101 | { | ||
102 | __u32 csum; | ||
103 | |||
104 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | ||
105 | cpu_to_le32(EXT4_OS_LINUX) || | ||
106 | !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
107 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
108 | return; | ||
109 | |||
110 | csum = ext4_inode_csum(inode, raw, ei); | ||
111 | raw->i_checksum_lo = cpu_to_le16(csum & 0xFFFF); | ||
112 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && | ||
113 | EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) | ||
114 | raw->i_checksum_hi = cpu_to_le16(csum >> 16); | ||
115 | } | ||
116 | |||
50 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | 117 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
51 | loff_t new_size) | 118 | loff_t new_size) |
52 | { | 119 | { |
@@ -3517,8 +3584,7 @@ make_io: | |||
3517 | b = table; | 3584 | b = table; |
3518 | end = b + EXT4_SB(sb)->s_inode_readahead_blks; | 3585 | end = b + EXT4_SB(sb)->s_inode_readahead_blks; |
3519 | num = EXT4_INODES_PER_GROUP(sb); | 3586 | num = EXT4_INODES_PER_GROUP(sb); |
3520 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 3587 | if (ext4_has_group_desc_csum(sb)) |
3521 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) | ||
3522 | num -= ext4_itable_unused_count(sb, gdp); | 3588 | num -= ext4_itable_unused_count(sb, gdp); |
3523 | table += num / inodes_per_block; | 3589 | table += num / inodes_per_block; |
3524 | if (end > table) | 3590 | if (end > table) |
@@ -3646,6 +3712,39 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3646 | if (ret < 0) | 3712 | if (ret < 0) |
3647 | goto bad_inode; | 3713 | goto bad_inode; |
3648 | raw_inode = ext4_raw_inode(&iloc); | 3714 | raw_inode = ext4_raw_inode(&iloc); |
3715 | |||
3716 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { | ||
3717 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); | ||
3718 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > | ||
3719 | EXT4_INODE_SIZE(inode->i_sb)) { | ||
3720 | EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)", | ||
3721 | EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize, | ||
3722 | EXT4_INODE_SIZE(inode->i_sb)); | ||
3723 | ret = -EIO; | ||
3724 | goto bad_inode; | ||
3725 | } | ||
3726 | } else | ||
3727 | ei->i_extra_isize = 0; | ||
3728 | |||
3729 | /* Precompute checksum seed for inode metadata */ | ||
3730 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
3731 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { | ||
3732 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
3733 | __u32 csum; | ||
3734 | __le32 inum = cpu_to_le32(inode->i_ino); | ||
3735 | __le32 gen = raw_inode->i_generation; | ||
3736 | csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, | ||
3737 | sizeof(inum)); | ||
3738 | ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, | ||
3739 | sizeof(gen)); | ||
3740 | } | ||
3741 | |||
3742 | if (!ext4_inode_csum_verify(inode, raw_inode, ei)) { | ||
3743 | EXT4_ERROR_INODE(inode, "checksum invalid"); | ||
3744 | ret = -EIO; | ||
3745 | goto bad_inode; | ||
3746 | } | ||
3747 | |||
3649 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 3748 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
3650 | i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); | 3749 | i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); |
3651 | i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); | 3750 | i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); |
@@ -3725,12 +3824,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3725 | } | 3824 | } |
3726 | 3825 | ||
3727 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { | 3826 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { |
3728 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); | ||
3729 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > | ||
3730 | EXT4_INODE_SIZE(inode->i_sb)) { | ||
3731 | ret = -EIO; | ||
3732 | goto bad_inode; | ||
3733 | } | ||
3734 | if (ei->i_extra_isize == 0) { | 3827 | if (ei->i_extra_isize == 0) { |
3735 | /* The extra space is currently unused. Use it. */ | 3828 | /* The extra space is currently unused. Use it. */ |
3736 | ei->i_extra_isize = sizeof(struct ext4_inode) - | 3829 | ei->i_extra_isize = sizeof(struct ext4_inode) - |
@@ -3742,8 +3835,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3742 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) | 3835 | if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) |
3743 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); | 3836 | ext4_set_inode_state(inode, EXT4_STATE_XATTR); |
3744 | } | 3837 | } |
3745 | } else | 3838 | } |
3746 | ei->i_extra_isize = 0; | ||
3747 | 3839 | ||
3748 | EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode); | 3840 | EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode); |
3749 | EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); | 3841 | EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); |
@@ -3942,7 +4034,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
3942 | EXT4_SET_RO_COMPAT_FEATURE(sb, | 4034 | EXT4_SET_RO_COMPAT_FEATURE(sb, |
3943 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); | 4035 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE); |
3944 | ext4_handle_sync(handle); | 4036 | ext4_handle_sync(handle); |
3945 | err = ext4_handle_dirty_super(handle, sb); | 4037 | err = ext4_handle_dirty_super_now(handle, sb); |
3946 | } | 4038 | } |
3947 | } | 4039 | } |
3948 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); | 4040 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); |
@@ -3969,6 +4061,8 @@ static int ext4_do_update_inode(handle_t *handle, | |||
3969 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); | 4061 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); |
3970 | } | 4062 | } |
3971 | 4063 | ||
4064 | ext4_inode_csum_set(inode, raw_inode, ei); | ||
4065 | |||
3972 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4066 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
3973 | rc = ext4_handle_dirty_metadata(handle, NULL, bh); | 4067 | rc = ext4_handle_dirty_metadata(handle, NULL, bh); |
3974 | if (!err) | 4068 | if (!err) |
@@ -4213,7 +4307,8 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
4213 | * will return the blocks that include the delayed allocation | 4307 | * will return the blocks that include the delayed allocation |
4214 | * blocks for this file. | 4308 | * blocks for this file. |
4215 | */ | 4309 | */ |
4216 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; | 4310 | delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), |
4311 | EXT4_I(inode)->i_reserved_data_blocks); | ||
4217 | 4312 | ||
4218 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | 4313 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; |
4219 | return 0; | 4314 | return 0; |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 6eee25591b81..8ad112ae0ade 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -38,7 +38,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
38 | handle_t *handle = NULL; | 38 | handle_t *handle = NULL; |
39 | int err, migrate = 0; | 39 | int err, migrate = 0; |
40 | struct ext4_iloc iloc; | 40 | struct ext4_iloc iloc; |
41 | unsigned int oldflags; | 41 | unsigned int oldflags, mask, i; |
42 | unsigned int jflag; | 42 | unsigned int jflag; |
43 | 43 | ||
44 | if (!inode_owner_or_capable(inode)) | 44 | if (!inode_owner_or_capable(inode)) |
@@ -115,8 +115,14 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
115 | if (err) | 115 | if (err) |
116 | goto flags_err; | 116 | goto flags_err; |
117 | 117 | ||
118 | flags = flags & EXT4_FL_USER_MODIFIABLE; | 118 | for (i = 0, mask = 1; i < 32; i++, mask <<= 1) { |
119 | flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE; | 119 | if (!(mask & EXT4_FL_USER_MODIFIABLE)) |
120 | continue; | ||
121 | if (mask & flags) | ||
122 | ext4_set_inode_flag(inode, i); | ||
123 | else | ||
124 | ext4_clear_inode_flag(inode, i); | ||
125 | } | ||
120 | ei->i_flags = flags; | 126 | ei->i_flags = flags; |
121 | 127 | ||
122 | ext4_set_inode_flags(inode); | 128 | ext4_set_inode_flags(inode); |
@@ -152,6 +158,13 @@ flags_out: | |||
152 | if (!inode_owner_or_capable(inode)) | 158 | if (!inode_owner_or_capable(inode)) |
153 | return -EPERM; | 159 | return -EPERM; |
154 | 160 | ||
161 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
162 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { | ||
163 | ext4_warning(sb, "Setting inode version is not " | ||
164 | "supported with metadata_csum enabled."); | ||
165 | return -ENOTTY; | ||
166 | } | ||
167 | |||
155 | err = mnt_want_write_file(filp); | 168 | err = mnt_want_write_file(filp); |
156 | if (err) | 169 | if (err) |
157 | return err; | 170 | return err; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 99ab428bcfa0..1cd6994fc446 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -788,7 +788,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
788 | int first_block; | 788 | int first_block; |
789 | struct super_block *sb; | 789 | struct super_block *sb; |
790 | struct buffer_head *bhs; | 790 | struct buffer_head *bhs; |
791 | struct buffer_head **bh; | 791 | struct buffer_head **bh = NULL; |
792 | struct inode *inode; | 792 | struct inode *inode; |
793 | char *data; | 793 | char *data; |
794 | char *bitmap; | 794 | char *bitmap; |
@@ -2375,7 +2375,7 @@ static int ext4_groupinfo_create_slab(size_t size) | |||
2375 | return 0; | 2375 | return 0; |
2376 | } | 2376 | } |
2377 | 2377 | ||
2378 | int ext4_mb_init(struct super_block *sb, int needs_recovery) | 2378 | int ext4_mb_init(struct super_block *sb) |
2379 | { | 2379 | { |
2380 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2380 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2381 | unsigned i, j; | 2381 | unsigned i, j; |
@@ -2517,6 +2517,9 @@ int ext4_mb_release(struct super_block *sb) | |||
2517 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2517 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2518 | struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); | 2518 | struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); |
2519 | 2519 | ||
2520 | if (sbi->s_proc) | ||
2521 | remove_proc_entry("mb_groups", sbi->s_proc); | ||
2522 | |||
2520 | if (sbi->s_group_info) { | 2523 | if (sbi->s_group_info) { |
2521 | for (i = 0; i < ngroups; i++) { | 2524 | for (i = 0; i < ngroups; i++) { |
2522 | grinfo = ext4_get_group_info(sb, i); | 2525 | grinfo = ext4_get_group_info(sb, i); |
@@ -2564,8 +2567,6 @@ int ext4_mb_release(struct super_block *sb) | |||
2564 | } | 2567 | } |
2565 | 2568 | ||
2566 | free_percpu(sbi->s_locality_groups); | 2569 | free_percpu(sbi->s_locality_groups); |
2567 | if (sbi->s_proc) | ||
2568 | remove_proc_entry("mb_groups", sbi->s_proc); | ||
2569 | 2570 | ||
2570 | return 0; | 2571 | return 0; |
2571 | } | 2572 | } |
@@ -2797,7 +2798,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2797 | } | 2798 | } |
2798 | len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len; | 2799 | len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len; |
2799 | ext4_free_group_clusters_set(sb, gdp, len); | 2800 | ext4_free_group_clusters_set(sb, gdp, len); |
2800 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); | 2801 | ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh, |
2802 | EXT4_BLOCKS_PER_GROUP(sb) / 8); | ||
2803 | ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp); | ||
2801 | 2804 | ||
2802 | ext4_unlock_group(sb, ac->ac_b_ex.fe_group); | 2805 | ext4_unlock_group(sb, ac->ac_b_ex.fe_group); |
2803 | percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len); | 2806 | percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len); |
@@ -3071,13 +3074,9 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) | |||
3071 | static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) | 3074 | static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) |
3072 | { | 3075 | { |
3073 | struct ext4_prealloc_space *pa = ac->ac_pa; | 3076 | struct ext4_prealloc_space *pa = ac->ac_pa; |
3074 | int len; | ||
3075 | |||
3076 | if (pa && pa->pa_type == MB_INODE_PA) { | ||
3077 | len = ac->ac_b_ex.fe_len; | ||
3078 | pa->pa_free += len; | ||
3079 | } | ||
3080 | 3077 | ||
3078 | if (pa && pa->pa_type == MB_INODE_PA) | ||
3079 | pa->pa_free += ac->ac_b_ex.fe_len; | ||
3081 | } | 3080 | } |
3082 | 3081 | ||
3083 | /* | 3082 | /* |
@@ -4636,6 +4635,7 @@ do_more: | |||
4636 | */ | 4635 | */ |
4637 | new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); | 4636 | new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); |
4638 | if (!new_entry) { | 4637 | if (!new_entry) { |
4638 | ext4_mb_unload_buddy(&e4b); | ||
4639 | err = -ENOMEM; | 4639 | err = -ENOMEM; |
4640 | goto error_return; | 4640 | goto error_return; |
4641 | } | 4641 | } |
@@ -4659,7 +4659,9 @@ do_more: | |||
4659 | 4659 | ||
4660 | ret = ext4_free_group_clusters(sb, gdp) + count_clusters; | 4660 | ret = ext4_free_group_clusters(sb, gdp) + count_clusters; |
4661 | ext4_free_group_clusters_set(sb, gdp, ret); | 4661 | ext4_free_group_clusters_set(sb, gdp, ret); |
4662 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); | 4662 | ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh, |
4663 | EXT4_BLOCKS_PER_GROUP(sb) / 8); | ||
4664 | ext4_group_desc_csum_set(sb, block_group, gdp); | ||
4663 | ext4_unlock_group(sb, block_group); | 4665 | ext4_unlock_group(sb, block_group); |
4664 | percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); | 4666 | percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); |
4665 | 4667 | ||
@@ -4803,7 +4805,9 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, | |||
4803 | mb_free_blocks(NULL, &e4b, bit, count); | 4805 | mb_free_blocks(NULL, &e4b, bit, count); |
4804 | blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc); | 4806 | blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc); |
4805 | ext4_free_group_clusters_set(sb, desc, blk_free_count); | 4807 | ext4_free_group_clusters_set(sb, desc, blk_free_count); |
4806 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); | 4808 | ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh, |
4809 | EXT4_BLOCKS_PER_GROUP(sb) / 8); | ||
4810 | ext4_group_desc_csum_set(sb, block_group, desc); | ||
4807 | ext4_unlock_group(sb, block_group); | 4811 | ext4_unlock_group(sb, block_group); |
4808 | percpu_counter_add(&sbi->s_freeclusters_counter, | 4812 | percpu_counter_add(&sbi->s_freeclusters_counter, |
4809 | EXT4_B2C(sbi, blocks_freed)); | 4813 | EXT4_B2C(sbi, blocks_freed)); |
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index ed6548d89165..f99a1311e847 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c | |||
@@ -6,12 +6,45 @@ | |||
6 | 6 | ||
7 | #include "ext4.h" | 7 | #include "ext4.h" |
8 | 8 | ||
9 | /* Checksumming functions */ | ||
10 | static __u32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) | ||
11 | { | ||
12 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
13 | int offset = offsetof(struct mmp_struct, mmp_checksum); | ||
14 | __u32 csum; | ||
15 | |||
16 | csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset); | ||
17 | |||
18 | return cpu_to_le32(csum); | ||
19 | } | ||
20 | |||
21 | int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) | ||
22 | { | ||
23 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
24 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
25 | return 1; | ||
26 | |||
27 | return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); | ||
28 | } | ||
29 | |||
30 | void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) | ||
31 | { | ||
32 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
33 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
34 | return; | ||
35 | |||
36 | mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); | ||
37 | } | ||
38 | |||
9 | /* | 39 | /* |
10 | * Write the MMP block using WRITE_SYNC to try to get the block on-disk | 40 | * Write the MMP block using WRITE_SYNC to try to get the block on-disk |
11 | * faster. | 41 | * faster. |
12 | */ | 42 | */ |
13 | static int write_mmp_block(struct buffer_head *bh) | 43 | static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) |
14 | { | 44 | { |
45 | struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); | ||
46 | |||
47 | ext4_mmp_csum_set(sb, mmp); | ||
15 | mark_buffer_dirty(bh); | 48 | mark_buffer_dirty(bh); |
16 | lock_buffer(bh); | 49 | lock_buffer(bh); |
17 | bh->b_end_io = end_buffer_write_sync; | 50 | bh->b_end_io = end_buffer_write_sync; |
@@ -59,7 +92,8 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, | |||
59 | } | 92 | } |
60 | 93 | ||
61 | mmp = (struct mmp_struct *)((*bh)->b_data); | 94 | mmp = (struct mmp_struct *)((*bh)->b_data); |
62 | if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) | 95 | if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC || |
96 | !ext4_mmp_csum_verify(sb, mmp)) | ||
63 | return -EINVAL; | 97 | return -EINVAL; |
64 | 98 | ||
65 | return 0; | 99 | return 0; |
@@ -120,7 +154,7 @@ static int kmmpd(void *data) | |||
120 | mmp->mmp_time = cpu_to_le64(get_seconds()); | 154 | mmp->mmp_time = cpu_to_le64(get_seconds()); |
121 | last_update_time = jiffies; | 155 | last_update_time = jiffies; |
122 | 156 | ||
123 | retval = write_mmp_block(bh); | 157 | retval = write_mmp_block(sb, bh); |
124 | /* | 158 | /* |
125 | * Don't spew too many error messages. Print one every | 159 | * Don't spew too many error messages. Print one every |
126 | * (s_mmp_update_interval * 60) seconds. | 160 | * (s_mmp_update_interval * 60) seconds. |
@@ -200,7 +234,7 @@ static int kmmpd(void *data) | |||
200 | mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); | 234 | mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); |
201 | mmp->mmp_time = cpu_to_le64(get_seconds()); | 235 | mmp->mmp_time = cpu_to_le64(get_seconds()); |
202 | 236 | ||
203 | retval = write_mmp_block(bh); | 237 | retval = write_mmp_block(sb, bh); |
204 | 238 | ||
205 | failed: | 239 | failed: |
206 | kfree(data); | 240 | kfree(data); |
@@ -299,7 +333,7 @@ skip: | |||
299 | seq = mmp_new_seq(); | 333 | seq = mmp_new_seq(); |
300 | mmp->mmp_seq = cpu_to_le32(seq); | 334 | mmp->mmp_seq = cpu_to_le32(seq); |
301 | 335 | ||
302 | retval = write_mmp_block(bh); | 336 | retval = write_mmp_block(sb, bh); |
303 | if (retval) | 337 | if (retval) |
304 | goto failed; | 338 | goto failed; |
305 | 339 | ||
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index e2a3f4b0ff78..5845cd97bf8b 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -145,6 +145,14 @@ struct dx_map_entry | |||
145 | u16 size; | 145 | u16 size; |
146 | }; | 146 | }; |
147 | 147 | ||
148 | /* | ||
149 | * This goes at the end of each htree block. | ||
150 | */ | ||
151 | struct dx_tail { | ||
152 | u32 dt_reserved; | ||
153 | __le32 dt_checksum; /* crc32c(uuid+inum+dirblock) */ | ||
154 | }; | ||
155 | |||
148 | static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); | 156 | static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); |
149 | static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); | 157 | static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); |
150 | static inline unsigned dx_get_hash(struct dx_entry *entry); | 158 | static inline unsigned dx_get_hash(struct dx_entry *entry); |
@@ -180,6 +188,230 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, | |||
180 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | 188 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, |
181 | struct inode *inode); | 189 | struct inode *inode); |
182 | 190 | ||
191 | /* checksumming functions */ | ||
192 | #define EXT4_DIRENT_TAIL(block, blocksize) \ | ||
193 | ((struct ext4_dir_entry_tail *)(((void *)(block)) + \ | ||
194 | ((blocksize) - \ | ||
195 | sizeof(struct ext4_dir_entry_tail)))) | ||
196 | |||
197 | static void initialize_dirent_tail(struct ext4_dir_entry_tail *t, | ||
198 | unsigned int blocksize) | ||
199 | { | ||
200 | memset(t, 0, sizeof(struct ext4_dir_entry_tail)); | ||
201 | t->det_rec_len = ext4_rec_len_to_disk( | ||
202 | sizeof(struct ext4_dir_entry_tail), blocksize); | ||
203 | t->det_reserved_ft = EXT4_FT_DIR_CSUM; | ||
204 | } | ||
205 | |||
206 | /* Walk through a dirent block to find a checksum "dirent" at the tail */ | ||
207 | static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode, | ||
208 | struct ext4_dir_entry *de) | ||
209 | { | ||
210 | struct ext4_dir_entry_tail *t; | ||
211 | |||
212 | #ifdef PARANOID | ||
213 | struct ext4_dir_entry *d, *top; | ||
214 | |||
215 | d = de; | ||
216 | top = (struct ext4_dir_entry *)(((void *)de) + | ||
217 | (EXT4_BLOCK_SIZE(inode->i_sb) - | ||
218 | sizeof(struct ext4_dir_entry_tail))); | ||
219 | while (d < top && d->rec_len) | ||
220 | d = (struct ext4_dir_entry *)(((void *)d) + | ||
221 | le16_to_cpu(d->rec_len)); | ||
222 | |||
223 | if (d != top) | ||
224 | return NULL; | ||
225 | |||
226 | t = (struct ext4_dir_entry_tail *)d; | ||
227 | #else | ||
228 | t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb)); | ||
229 | #endif | ||
230 | |||
231 | if (t->det_reserved_zero1 || | ||
232 | le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) || | ||
233 | t->det_reserved_zero2 || | ||
234 | t->det_reserved_ft != EXT4_FT_DIR_CSUM) | ||
235 | return NULL; | ||
236 | |||
237 | return t; | ||
238 | } | ||
239 | |||
240 | static __le32 ext4_dirent_csum(struct inode *inode, | ||
241 | struct ext4_dir_entry *dirent, int size) | ||
242 | { | ||
243 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
244 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
245 | __u32 csum; | ||
246 | |||
247 | csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size); | ||
248 | return cpu_to_le32(csum); | ||
249 | } | ||
250 | |||
251 | int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent) | ||
252 | { | ||
253 | struct ext4_dir_entry_tail *t; | ||
254 | |||
255 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
256 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
257 | return 1; | ||
258 | |||
259 | t = get_dirent_tail(inode, dirent); | ||
260 | if (!t) { | ||
261 | EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir " | ||
262 | "leaf for checksum. Please run e2fsck -D."); | ||
263 | return 0; | ||
264 | } | ||
265 | |||
266 | if (t->det_checksum != ext4_dirent_csum(inode, dirent, | ||
267 | (void *)t - (void *)dirent)) | ||
268 | return 0; | ||
269 | |||
270 | return 1; | ||
271 | } | ||
272 | |||
273 | static void ext4_dirent_csum_set(struct inode *inode, | ||
274 | struct ext4_dir_entry *dirent) | ||
275 | { | ||
276 | struct ext4_dir_entry_tail *t; | ||
277 | |||
278 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
279 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
280 | return; | ||
281 | |||
282 | t = get_dirent_tail(inode, dirent); | ||
283 | if (!t) { | ||
284 | EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir " | ||
285 | "leaf for checksum. Please run e2fsck -D."); | ||
286 | return; | ||
287 | } | ||
288 | |||
289 | t->det_checksum = ext4_dirent_csum(inode, dirent, | ||
290 | (void *)t - (void *)dirent); | ||
291 | } | ||
292 | |||
293 | static inline int ext4_handle_dirty_dirent_node(handle_t *handle, | ||
294 | struct inode *inode, | ||
295 | struct buffer_head *bh) | ||
296 | { | ||
297 | ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data); | ||
298 | return ext4_handle_dirty_metadata(handle, inode, bh); | ||
299 | } | ||
300 | |||
301 | static struct dx_countlimit *get_dx_countlimit(struct inode *inode, | ||
302 | struct ext4_dir_entry *dirent, | ||
303 | int *offset) | ||
304 | { | ||
305 | struct ext4_dir_entry *dp; | ||
306 | struct dx_root_info *root; | ||
307 | int count_offset; | ||
308 | |||
309 | if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb)) | ||
310 | count_offset = 8; | ||
311 | else if (le16_to_cpu(dirent->rec_len) == 12) { | ||
312 | dp = (struct ext4_dir_entry *)(((void *)dirent) + 12); | ||
313 | if (le16_to_cpu(dp->rec_len) != | ||
314 | EXT4_BLOCK_SIZE(inode->i_sb) - 12) | ||
315 | return NULL; | ||
316 | root = (struct dx_root_info *)(((void *)dp + 12)); | ||
317 | if (root->reserved_zero || | ||
318 | root->info_length != sizeof(struct dx_root_info)) | ||
319 | return NULL; | ||
320 | count_offset = 32; | ||
321 | } else | ||
322 | return NULL; | ||
323 | |||
324 | if (offset) | ||
325 | *offset = count_offset; | ||
326 | return (struct dx_countlimit *)(((void *)dirent) + count_offset); | ||
327 | } | ||
328 | |||
329 | static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent, | ||
330 | int count_offset, int count, struct dx_tail *t) | ||
331 | { | ||
332 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
333 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
334 | __u32 csum, old_csum; | ||
335 | int size; | ||
336 | |||
337 | size = count_offset + (count * sizeof(struct dx_entry)); | ||
338 | old_csum = t->dt_checksum; | ||
339 | t->dt_checksum = 0; | ||
340 | csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size); | ||
341 | csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail)); | ||
342 | t->dt_checksum = old_csum; | ||
343 | |||
344 | return cpu_to_le32(csum); | ||
345 | } | ||
346 | |||
347 | static int ext4_dx_csum_verify(struct inode *inode, | ||
348 | struct ext4_dir_entry *dirent) | ||
349 | { | ||
350 | struct dx_countlimit *c; | ||
351 | struct dx_tail *t; | ||
352 | int count_offset, limit, count; | ||
353 | |||
354 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
355 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
356 | return 1; | ||
357 | |||
358 | c = get_dx_countlimit(inode, dirent, &count_offset); | ||
359 | if (!c) { | ||
360 | EXT4_ERROR_INODE(inode, "dir seems corrupt? Run e2fsck -D."); | ||
361 | return 1; | ||
362 | } | ||
363 | limit = le16_to_cpu(c->limit); | ||
364 | count = le16_to_cpu(c->count); | ||
365 | if (count_offset + (limit * sizeof(struct dx_entry)) > | ||
366 | EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) { | ||
367 | EXT4_ERROR_INODE(inode, "metadata_csum set but no space for " | ||
368 | "tree checksum found. Run e2fsck -D."); | ||
369 | return 1; | ||
370 | } | ||
371 | t = (struct dx_tail *)(((struct dx_entry *)c) + limit); | ||
372 | |||
373 | if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset, | ||
374 | count, t)) | ||
375 | return 0; | ||
376 | return 1; | ||
377 | } | ||
378 | |||
379 | static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent) | ||
380 | { | ||
381 | struct dx_countlimit *c; | ||
382 | struct dx_tail *t; | ||
383 | int count_offset, limit, count; | ||
384 | |||
385 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
386 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
387 | return; | ||
388 | |||
389 | c = get_dx_countlimit(inode, dirent, &count_offset); | ||
390 | if (!c) { | ||
391 | EXT4_ERROR_INODE(inode, "dir seems corrupt? Run e2fsck -D."); | ||
392 | return; | ||
393 | } | ||
394 | limit = le16_to_cpu(c->limit); | ||
395 | count = le16_to_cpu(c->count); | ||
396 | if (count_offset + (limit * sizeof(struct dx_entry)) > | ||
397 | EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) { | ||
398 | EXT4_ERROR_INODE(inode, "metadata_csum set but no space for " | ||
399 | "tree checksum. Run e2fsck -D."); | ||
400 | return; | ||
401 | } | ||
402 | t = (struct dx_tail *)(((struct dx_entry *)c) + limit); | ||
403 | |||
404 | t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t); | ||
405 | } | ||
406 | |||
407 | static inline int ext4_handle_dirty_dx_node(handle_t *handle, | ||
408 | struct inode *inode, | ||
409 | struct buffer_head *bh) | ||
410 | { | ||
411 | ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data); | ||
412 | return ext4_handle_dirty_metadata(handle, inode, bh); | ||
413 | } | ||
414 | |||
183 | /* | 415 | /* |
184 | * p is at least 6 bytes before the end of page | 416 | * p is at least 6 bytes before the end of page |
185 | */ | 417 | */ |
@@ -239,12 +471,20 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) | |||
239 | { | 471 | { |
240 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - | 472 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - |
241 | EXT4_DIR_REC_LEN(2) - infosize; | 473 | EXT4_DIR_REC_LEN(2) - infosize; |
474 | |||
475 | if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, | ||
476 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
477 | entry_space -= sizeof(struct dx_tail); | ||
242 | return entry_space / sizeof(struct dx_entry); | 478 | return entry_space / sizeof(struct dx_entry); |
243 | } | 479 | } |
244 | 480 | ||
245 | static inline unsigned dx_node_limit(struct inode *dir) | 481 | static inline unsigned dx_node_limit(struct inode *dir) |
246 | { | 482 | { |
247 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); | 483 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); |
484 | |||
485 | if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, | ||
486 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
487 | entry_space -= sizeof(struct dx_tail); | ||
248 | return entry_space / sizeof(struct dx_entry); | 488 | return entry_space / sizeof(struct dx_entry); |
249 | } | 489 | } |
250 | 490 | ||
@@ -390,6 +630,15 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
390 | goto fail; | 630 | goto fail; |
391 | } | 631 | } |
392 | 632 | ||
633 | if (!buffer_verified(bh) && | ||
634 | !ext4_dx_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) { | ||
635 | ext4_warning(dir->i_sb, "Root failed checksum"); | ||
636 | brelse(bh); | ||
637 | *err = ERR_BAD_DX_DIR; | ||
638 | goto fail; | ||
639 | } | ||
640 | set_buffer_verified(bh); | ||
641 | |||
393 | entries = (struct dx_entry *) (((char *)&root->info) + | 642 | entries = (struct dx_entry *) (((char *)&root->info) + |
394 | root->info.info_length); | 643 | root->info.info_length); |
395 | 644 | ||
@@ -450,6 +699,17 @@ dx_probe(const struct qstr *d_name, struct inode *dir, | |||
450 | if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) | 699 | if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) |
451 | goto fail2; | 700 | goto fail2; |
452 | at = entries = ((struct dx_node *) bh->b_data)->entries; | 701 | at = entries = ((struct dx_node *) bh->b_data)->entries; |
702 | |||
703 | if (!buffer_verified(bh) && | ||
704 | !ext4_dx_csum_verify(dir, | ||
705 | (struct ext4_dir_entry *)bh->b_data)) { | ||
706 | ext4_warning(dir->i_sb, "Node failed checksum"); | ||
707 | brelse(bh); | ||
708 | *err = ERR_BAD_DX_DIR; | ||
709 | goto fail; | ||
710 | } | ||
711 | set_buffer_verified(bh); | ||
712 | |||
453 | if (dx_get_limit(entries) != dx_node_limit (dir)) { | 713 | if (dx_get_limit(entries) != dx_node_limit (dir)) { |
454 | ext4_warning(dir->i_sb, | 714 | ext4_warning(dir->i_sb, |
455 | "dx entry: limit != node limit"); | 715 | "dx entry: limit != node limit"); |
@@ -549,6 +809,15 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, | |||
549 | if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), | 809 | if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), |
550 | 0, &err))) | 810 | 0, &err))) |
551 | return err; /* Failure */ | 811 | return err; /* Failure */ |
812 | |||
813 | if (!buffer_verified(bh) && | ||
814 | !ext4_dx_csum_verify(dir, | ||
815 | (struct ext4_dir_entry *)bh->b_data)) { | ||
816 | ext4_warning(dir->i_sb, "Node failed checksum"); | ||
817 | return -EIO; | ||
818 | } | ||
819 | set_buffer_verified(bh); | ||
820 | |||
552 | p++; | 821 | p++; |
553 | brelse(p->bh); | 822 | brelse(p->bh); |
554 | p->bh = bh; | 823 | p->bh = bh; |
@@ -577,6 +846,11 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
577 | if (!(bh = ext4_bread (NULL, dir, block, 0, &err))) | 846 | if (!(bh = ext4_bread (NULL, dir, block, 0, &err))) |
578 | return err; | 847 | return err; |
579 | 848 | ||
849 | if (!buffer_verified(bh) && | ||
850 | !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) | ||
851 | return -EIO; | ||
852 | set_buffer_verified(bh); | ||
853 | |||
580 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 854 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
581 | top = (struct ext4_dir_entry_2 *) ((char *) de + | 855 | top = (struct ext4_dir_entry_2 *) ((char *) de + |
582 | dir->i_sb->s_blocksize - | 856 | dir->i_sb->s_blocksize - |
@@ -936,6 +1210,15 @@ restart: | |||
936 | brelse(bh); | 1210 | brelse(bh); |
937 | goto next; | 1211 | goto next; |
938 | } | 1212 | } |
1213 | if (!buffer_verified(bh) && | ||
1214 | !ext4_dirent_csum_verify(dir, | ||
1215 | (struct ext4_dir_entry *)bh->b_data)) { | ||
1216 | EXT4_ERROR_INODE(dir, "checksumming directory " | ||
1217 | "block %lu", (unsigned long)block); | ||
1218 | brelse(bh); | ||
1219 | goto next; | ||
1220 | } | ||
1221 | set_buffer_verified(bh); | ||
939 | i = search_dirblock(bh, dir, d_name, | 1222 | i = search_dirblock(bh, dir, d_name, |
940 | block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); | 1223 | block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); |
941 | if (i == 1) { | 1224 | if (i == 1) { |
@@ -987,6 +1270,16 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q | |||
987 | if (!(bh = ext4_bread(NULL, dir, block, 0, err))) | 1270 | if (!(bh = ext4_bread(NULL, dir, block, 0, err))) |
988 | goto errout; | 1271 | goto errout; |
989 | 1272 | ||
1273 | if (!buffer_verified(bh) && | ||
1274 | !ext4_dirent_csum_verify(dir, | ||
1275 | (struct ext4_dir_entry *)bh->b_data)) { | ||
1276 | EXT4_ERROR_INODE(dir, "checksumming directory " | ||
1277 | "block %lu", (unsigned long)block); | ||
1278 | brelse(bh); | ||
1279 | *err = -EIO; | ||
1280 | goto errout; | ||
1281 | } | ||
1282 | set_buffer_verified(bh); | ||
990 | retval = search_dirblock(bh, dir, d_name, | 1283 | retval = search_dirblock(bh, dir, d_name, |
991 | block << EXT4_BLOCK_SIZE_BITS(sb), | 1284 | block << EXT4_BLOCK_SIZE_BITS(sb), |
992 | res_dir); | 1285 | res_dir); |
@@ -1037,6 +1330,12 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru | |||
1037 | EXT4_ERROR_INODE(dir, "bad inode number: %u", ino); | 1330 | EXT4_ERROR_INODE(dir, "bad inode number: %u", ino); |
1038 | return ERR_PTR(-EIO); | 1331 | return ERR_PTR(-EIO); |
1039 | } | 1332 | } |
1333 | if (unlikely(ino == dir->i_ino)) { | ||
1334 | EXT4_ERROR_INODE(dir, "'%.*s' linked to parent dir", | ||
1335 | dentry->d_name.len, | ||
1336 | dentry->d_name.name); | ||
1337 | return ERR_PTR(-EIO); | ||
1338 | } | ||
1040 | inode = ext4_iget(dir->i_sb, ino); | 1339 | inode = ext4_iget(dir->i_sb, ino); |
1041 | if (inode == ERR_PTR(-ESTALE)) { | 1340 | if (inode == ERR_PTR(-ESTALE)) { |
1042 | EXT4_ERROR_INODE(dir, | 1341 | EXT4_ERROR_INODE(dir, |
@@ -1156,8 +1455,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1156 | char *data1 = (*bh)->b_data, *data2; | 1455 | char *data1 = (*bh)->b_data, *data2; |
1157 | unsigned split, move, size; | 1456 | unsigned split, move, size; |
1158 | struct ext4_dir_entry_2 *de = NULL, *de2; | 1457 | struct ext4_dir_entry_2 *de = NULL, *de2; |
1458 | struct ext4_dir_entry_tail *t; | ||
1459 | int csum_size = 0; | ||
1159 | int err = 0, i; | 1460 | int err = 0, i; |
1160 | 1461 | ||
1462 | if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, | ||
1463 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
1464 | csum_size = sizeof(struct ext4_dir_entry_tail); | ||
1465 | |||
1161 | bh2 = ext4_append (handle, dir, &newblock, &err); | 1466 | bh2 = ext4_append (handle, dir, &newblock, &err); |
1162 | if (!(bh2)) { | 1467 | if (!(bh2)) { |
1163 | brelse(*bh); | 1468 | brelse(*bh); |
@@ -1204,10 +1509,20 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1204 | /* Fancy dance to stay within two buffers */ | 1509 | /* Fancy dance to stay within two buffers */ |
1205 | de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize); | 1510 | de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize); |
1206 | de = dx_pack_dirents(data1, blocksize); | 1511 | de = dx_pack_dirents(data1, blocksize); |
1207 | de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de, | 1512 | de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - |
1513 | (char *) de, | ||
1208 | blocksize); | 1514 | blocksize); |
1209 | de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2, | 1515 | de2->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) - |
1516 | (char *) de2, | ||
1210 | blocksize); | 1517 | blocksize); |
1518 | if (csum_size) { | ||
1519 | t = EXT4_DIRENT_TAIL(data2, blocksize); | ||
1520 | initialize_dirent_tail(t, blocksize); | ||
1521 | |||
1522 | t = EXT4_DIRENT_TAIL(data1, blocksize); | ||
1523 | initialize_dirent_tail(t, blocksize); | ||
1524 | } | ||
1525 | |||
1211 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); | 1526 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); |
1212 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); | 1527 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); |
1213 | 1528 | ||
@@ -1218,10 +1533,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
1218 | de = de2; | 1533 | de = de2; |
1219 | } | 1534 | } |
1220 | dx_insert_block(frame, hash2 + continued, newblock); | 1535 | dx_insert_block(frame, hash2 + continued, newblock); |
1221 | err = ext4_handle_dirty_metadata(handle, dir, bh2); | 1536 | err = ext4_handle_dirty_dirent_node(handle, dir, bh2); |
1222 | if (err) | 1537 | if (err) |
1223 | goto journal_error; | 1538 | goto journal_error; |
1224 | err = ext4_handle_dirty_metadata(handle, dir, frame->bh); | 1539 | err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); |
1225 | if (err) | 1540 | if (err) |
1226 | goto journal_error; | 1541 | goto journal_error; |
1227 | brelse(bh2); | 1542 | brelse(bh2); |
@@ -1258,11 +1573,16 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1258 | unsigned short reclen; | 1573 | unsigned short reclen; |
1259 | int nlen, rlen, err; | 1574 | int nlen, rlen, err; |
1260 | char *top; | 1575 | char *top; |
1576 | int csum_size = 0; | ||
1577 | |||
1578 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
1579 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
1580 | csum_size = sizeof(struct ext4_dir_entry_tail); | ||
1261 | 1581 | ||
1262 | reclen = EXT4_DIR_REC_LEN(namelen); | 1582 | reclen = EXT4_DIR_REC_LEN(namelen); |
1263 | if (!de) { | 1583 | if (!de) { |
1264 | de = (struct ext4_dir_entry_2 *)bh->b_data; | 1584 | de = (struct ext4_dir_entry_2 *)bh->b_data; |
1265 | top = bh->b_data + blocksize - reclen; | 1585 | top = bh->b_data + (blocksize - csum_size) - reclen; |
1266 | while ((char *) de <= top) { | 1586 | while ((char *) de <= top) { |
1267 | if (ext4_check_dir_entry(dir, NULL, de, bh, offset)) | 1587 | if (ext4_check_dir_entry(dir, NULL, de, bh, offset)) |
1268 | return -EIO; | 1588 | return -EIO; |
@@ -1295,11 +1615,8 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1295 | de = de1; | 1615 | de = de1; |
1296 | } | 1616 | } |
1297 | de->file_type = EXT4_FT_UNKNOWN; | 1617 | de->file_type = EXT4_FT_UNKNOWN; |
1298 | if (inode) { | 1618 | de->inode = cpu_to_le32(inode->i_ino); |
1299 | de->inode = cpu_to_le32(inode->i_ino); | 1619 | ext4_set_de_type(dir->i_sb, de, inode->i_mode); |
1300 | ext4_set_de_type(dir->i_sb, de, inode->i_mode); | ||
1301 | } else | ||
1302 | de->inode = 0; | ||
1303 | de->name_len = namelen; | 1620 | de->name_len = namelen; |
1304 | memcpy(de->name, name, namelen); | 1621 | memcpy(de->name, name, namelen); |
1305 | /* | 1622 | /* |
@@ -1318,7 +1635,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1318 | dir->i_version++; | 1635 | dir->i_version++; |
1319 | ext4_mark_inode_dirty(handle, dir); | 1636 | ext4_mark_inode_dirty(handle, dir); |
1320 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 1637 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
1321 | err = ext4_handle_dirty_metadata(handle, dir, bh); | 1638 | err = ext4_handle_dirty_dirent_node(handle, dir, bh); |
1322 | if (err) | 1639 | if (err) |
1323 | ext4_std_error(dir->i_sb, err); | 1640 | ext4_std_error(dir->i_sb, err); |
1324 | return 0; | 1641 | return 0; |
@@ -1339,6 +1656,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1339 | struct dx_frame frames[2], *frame; | 1656 | struct dx_frame frames[2], *frame; |
1340 | struct dx_entry *entries; | 1657 | struct dx_entry *entries; |
1341 | struct ext4_dir_entry_2 *de, *de2; | 1658 | struct ext4_dir_entry_2 *de, *de2; |
1659 | struct ext4_dir_entry_tail *t; | ||
1342 | char *data1, *top; | 1660 | char *data1, *top; |
1343 | unsigned len; | 1661 | unsigned len; |
1344 | int retval; | 1662 | int retval; |
@@ -1346,6 +1664,11 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1346 | struct dx_hash_info hinfo; | 1664 | struct dx_hash_info hinfo; |
1347 | ext4_lblk_t block; | 1665 | ext4_lblk_t block; |
1348 | struct fake_dirent *fde; | 1666 | struct fake_dirent *fde; |
1667 | int csum_size = 0; | ||
1668 | |||
1669 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
1670 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
1671 | csum_size = sizeof(struct ext4_dir_entry_tail); | ||
1349 | 1672 | ||
1350 | blocksize = dir->i_sb->s_blocksize; | 1673 | blocksize = dir->i_sb->s_blocksize; |
1351 | dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); | 1674 | dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); |
@@ -1366,7 +1689,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1366 | brelse(bh); | 1689 | brelse(bh); |
1367 | return -EIO; | 1690 | return -EIO; |
1368 | } | 1691 | } |
1369 | len = ((char *) root) + blocksize - (char *) de; | 1692 | len = ((char *) root) + (blocksize - csum_size) - (char *) de; |
1370 | 1693 | ||
1371 | /* Allocate new block for the 0th block's dirents */ | 1694 | /* Allocate new block for the 0th block's dirents */ |
1372 | bh2 = ext4_append(handle, dir, &block, &retval); | 1695 | bh2 = ext4_append(handle, dir, &block, &retval); |
@@ -1382,8 +1705,15 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1382 | top = data1 + len; | 1705 | top = data1 + len; |
1383 | while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) | 1706 | while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) |
1384 | de = de2; | 1707 | de = de2; |
1385 | de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de, | 1708 | de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - |
1709 | (char *) de, | ||
1386 | blocksize); | 1710 | blocksize); |
1711 | |||
1712 | if (csum_size) { | ||
1713 | t = EXT4_DIRENT_TAIL(data1, blocksize); | ||
1714 | initialize_dirent_tail(t, blocksize); | ||
1715 | } | ||
1716 | |||
1387 | /* Initialize the root; the dot dirents already exist */ | 1717 | /* Initialize the root; the dot dirents already exist */ |
1388 | de = (struct ext4_dir_entry_2 *) (&root->dotdot); | 1718 | de = (struct ext4_dir_entry_2 *) (&root->dotdot); |
1389 | de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2), | 1719 | de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2), |
@@ -1408,8 +1738,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1408 | frame->bh = bh; | 1738 | frame->bh = bh; |
1409 | bh = bh2; | 1739 | bh = bh2; |
1410 | 1740 | ||
1411 | ext4_handle_dirty_metadata(handle, dir, frame->bh); | 1741 | ext4_handle_dirty_dx_node(handle, dir, frame->bh); |
1412 | ext4_handle_dirty_metadata(handle, dir, bh); | 1742 | ext4_handle_dirty_dirent_node(handle, dir, bh); |
1413 | 1743 | ||
1414 | de = do_split(handle,dir, &bh, frame, &hinfo, &retval); | 1744 | de = do_split(handle,dir, &bh, frame, &hinfo, &retval); |
1415 | if (!de) { | 1745 | if (!de) { |
@@ -1445,11 +1775,17 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1445 | struct inode *dir = dentry->d_parent->d_inode; | 1775 | struct inode *dir = dentry->d_parent->d_inode; |
1446 | struct buffer_head *bh; | 1776 | struct buffer_head *bh; |
1447 | struct ext4_dir_entry_2 *de; | 1777 | struct ext4_dir_entry_2 *de; |
1778 | struct ext4_dir_entry_tail *t; | ||
1448 | struct super_block *sb; | 1779 | struct super_block *sb; |
1449 | int retval; | 1780 | int retval; |
1450 | int dx_fallback=0; | 1781 | int dx_fallback=0; |
1451 | unsigned blocksize; | 1782 | unsigned blocksize; |
1452 | ext4_lblk_t block, blocks; | 1783 | ext4_lblk_t block, blocks; |
1784 | int csum_size = 0; | ||
1785 | |||
1786 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
1787 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
1788 | csum_size = sizeof(struct ext4_dir_entry_tail); | ||
1453 | 1789 | ||
1454 | sb = dir->i_sb; | 1790 | sb = dir->i_sb; |
1455 | blocksize = sb->s_blocksize; | 1791 | blocksize = sb->s_blocksize; |
@@ -1468,6 +1804,11 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1468 | bh = ext4_bread(handle, dir, block, 0, &retval); | 1804 | bh = ext4_bread(handle, dir, block, 0, &retval); |
1469 | if(!bh) | 1805 | if(!bh) |
1470 | return retval; | 1806 | return retval; |
1807 | if (!buffer_verified(bh) && | ||
1808 | !ext4_dirent_csum_verify(dir, | ||
1809 | (struct ext4_dir_entry *)bh->b_data)) | ||
1810 | return -EIO; | ||
1811 | set_buffer_verified(bh); | ||
1471 | retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); | 1812 | retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); |
1472 | if (retval != -ENOSPC) { | 1813 | if (retval != -ENOSPC) { |
1473 | brelse(bh); | 1814 | brelse(bh); |
@@ -1484,7 +1825,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1484 | return retval; | 1825 | return retval; |
1485 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 1826 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
1486 | de->inode = 0; | 1827 | de->inode = 0; |
1487 | de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); | 1828 | de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize); |
1829 | |||
1830 | if (csum_size) { | ||
1831 | t = EXT4_DIRENT_TAIL(bh->b_data, blocksize); | ||
1832 | initialize_dirent_tail(t, blocksize); | ||
1833 | } | ||
1834 | |||
1488 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); | 1835 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); |
1489 | brelse(bh); | 1836 | brelse(bh); |
1490 | if (retval == 0) | 1837 | if (retval == 0) |
@@ -1516,6 +1863,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1516 | if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err))) | 1863 | if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err))) |
1517 | goto cleanup; | 1864 | goto cleanup; |
1518 | 1865 | ||
1866 | if (!buffer_verified(bh) && | ||
1867 | !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) | ||
1868 | goto journal_error; | ||
1869 | set_buffer_verified(bh); | ||
1870 | |||
1519 | BUFFER_TRACE(bh, "get_write_access"); | 1871 | BUFFER_TRACE(bh, "get_write_access"); |
1520 | err = ext4_journal_get_write_access(handle, bh); | 1872 | err = ext4_journal_get_write_access(handle, bh); |
1521 | if (err) | 1873 | if (err) |
@@ -1583,7 +1935,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1583 | dxtrace(dx_show_index("node", frames[1].entries)); | 1935 | dxtrace(dx_show_index("node", frames[1].entries)); |
1584 | dxtrace(dx_show_index("node", | 1936 | dxtrace(dx_show_index("node", |
1585 | ((struct dx_node *) bh2->b_data)->entries)); | 1937 | ((struct dx_node *) bh2->b_data)->entries)); |
1586 | err = ext4_handle_dirty_metadata(handle, dir, bh2); | 1938 | err = ext4_handle_dirty_dx_node(handle, dir, bh2); |
1587 | if (err) | 1939 | if (err) |
1588 | goto journal_error; | 1940 | goto journal_error; |
1589 | brelse (bh2); | 1941 | brelse (bh2); |
@@ -1609,7 +1961,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
1609 | if (err) | 1961 | if (err) |
1610 | goto journal_error; | 1962 | goto journal_error; |
1611 | } | 1963 | } |
1612 | err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh); | 1964 | err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh); |
1613 | if (err) { | 1965 | if (err) { |
1614 | ext4_std_error(inode->i_sb, err); | 1966 | ext4_std_error(inode->i_sb, err); |
1615 | goto cleanup; | 1967 | goto cleanup; |
@@ -1641,12 +1993,17 @@ static int ext4_delete_entry(handle_t *handle, | |||
1641 | { | 1993 | { |
1642 | struct ext4_dir_entry_2 *de, *pde; | 1994 | struct ext4_dir_entry_2 *de, *pde; |
1643 | unsigned int blocksize = dir->i_sb->s_blocksize; | 1995 | unsigned int blocksize = dir->i_sb->s_blocksize; |
1996 | int csum_size = 0; | ||
1644 | int i, err; | 1997 | int i, err; |
1645 | 1998 | ||
1999 | if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, | ||
2000 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
2001 | csum_size = sizeof(struct ext4_dir_entry_tail); | ||
2002 | |||
1646 | i = 0; | 2003 | i = 0; |
1647 | pde = NULL; | 2004 | pde = NULL; |
1648 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 2005 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
1649 | while (i < bh->b_size) { | 2006 | while (i < bh->b_size - csum_size) { |
1650 | if (ext4_check_dir_entry(dir, NULL, de, bh, i)) | 2007 | if (ext4_check_dir_entry(dir, NULL, de, bh, i)) |
1651 | return -EIO; | 2008 | return -EIO; |
1652 | if (de == de_del) { | 2009 | if (de == de_del) { |
@@ -1667,7 +2024,7 @@ static int ext4_delete_entry(handle_t *handle, | |||
1667 | de->inode = 0; | 2024 | de->inode = 0; |
1668 | dir->i_version++; | 2025 | dir->i_version++; |
1669 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 2026 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
1670 | err = ext4_handle_dirty_metadata(handle, dir, bh); | 2027 | err = ext4_handle_dirty_dirent_node(handle, dir, bh); |
1671 | if (unlikely(err)) { | 2028 | if (unlikely(err)) { |
1672 | ext4_std_error(dir->i_sb, err); | 2029 | ext4_std_error(dir->i_sb, err); |
1673 | return err; | 2030 | return err; |
@@ -1809,9 +2166,15 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
1809 | struct inode *inode; | 2166 | struct inode *inode; |
1810 | struct buffer_head *dir_block = NULL; | 2167 | struct buffer_head *dir_block = NULL; |
1811 | struct ext4_dir_entry_2 *de; | 2168 | struct ext4_dir_entry_2 *de; |
2169 | struct ext4_dir_entry_tail *t; | ||
1812 | unsigned int blocksize = dir->i_sb->s_blocksize; | 2170 | unsigned int blocksize = dir->i_sb->s_blocksize; |
2171 | int csum_size = 0; | ||
1813 | int err, retries = 0; | 2172 | int err, retries = 0; |
1814 | 2173 | ||
2174 | if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, | ||
2175 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
2176 | csum_size = sizeof(struct ext4_dir_entry_tail); | ||
2177 | |||
1815 | if (EXT4_DIR_LINK_MAX(dir)) | 2178 | if (EXT4_DIR_LINK_MAX(dir)) |
1816 | return -EMLINK; | 2179 | return -EMLINK; |
1817 | 2180 | ||
@@ -1852,16 +2215,24 @@ retry: | |||
1852 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 2215 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
1853 | de = ext4_next_entry(de, blocksize); | 2216 | de = ext4_next_entry(de, blocksize); |
1854 | de->inode = cpu_to_le32(dir->i_ino); | 2217 | de->inode = cpu_to_le32(dir->i_ino); |
1855 | de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1), | 2218 | de->rec_len = ext4_rec_len_to_disk(blocksize - |
2219 | (csum_size + EXT4_DIR_REC_LEN(1)), | ||
1856 | blocksize); | 2220 | blocksize); |
1857 | de->name_len = 2; | 2221 | de->name_len = 2; |
1858 | strcpy(de->name, ".."); | 2222 | strcpy(de->name, ".."); |
1859 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 2223 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
1860 | set_nlink(inode, 2); | 2224 | set_nlink(inode, 2); |
2225 | |||
2226 | if (csum_size) { | ||
2227 | t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize); | ||
2228 | initialize_dirent_tail(t, blocksize); | ||
2229 | } | ||
2230 | |||
1861 | BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); | 2231 | BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); |
1862 | err = ext4_handle_dirty_metadata(handle, inode, dir_block); | 2232 | err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); |
1863 | if (err) | 2233 | if (err) |
1864 | goto out_clear_inode; | 2234 | goto out_clear_inode; |
2235 | set_buffer_verified(dir_block); | ||
1865 | err = ext4_mark_inode_dirty(handle, inode); | 2236 | err = ext4_mark_inode_dirty(handle, inode); |
1866 | if (!err) | 2237 | if (!err) |
1867 | err = ext4_add_entry(handle, dentry, inode); | 2238 | err = ext4_add_entry(handle, dentry, inode); |
@@ -1911,6 +2282,14 @@ static int empty_dir(struct inode *inode) | |||
1911 | inode->i_ino); | 2282 | inode->i_ino); |
1912 | return 1; | 2283 | return 1; |
1913 | } | 2284 | } |
2285 | if (!buffer_verified(bh) && | ||
2286 | !ext4_dirent_csum_verify(inode, | ||
2287 | (struct ext4_dir_entry *)bh->b_data)) { | ||
2288 | EXT4_ERROR_INODE(inode, "checksum error reading directory " | ||
2289 | "lblock 0"); | ||
2290 | return -EIO; | ||
2291 | } | ||
2292 | set_buffer_verified(bh); | ||
1914 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 2293 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
1915 | de1 = ext4_next_entry(de, sb->s_blocksize); | 2294 | de1 = ext4_next_entry(de, sb->s_blocksize); |
1916 | if (le32_to_cpu(de->inode) != inode->i_ino || | 2295 | if (le32_to_cpu(de->inode) != inode->i_ino || |
@@ -1942,6 +2321,14 @@ static int empty_dir(struct inode *inode) | |||
1942 | offset += sb->s_blocksize; | 2321 | offset += sb->s_blocksize; |
1943 | continue; | 2322 | continue; |
1944 | } | 2323 | } |
2324 | if (!buffer_verified(bh) && | ||
2325 | !ext4_dirent_csum_verify(inode, | ||
2326 | (struct ext4_dir_entry *)bh->b_data)) { | ||
2327 | EXT4_ERROR_INODE(inode, "checksum error " | ||
2328 | "reading directory lblock 0"); | ||
2329 | return -EIO; | ||
2330 | } | ||
2331 | set_buffer_verified(bh); | ||
1945 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 2332 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
1946 | } | 2333 | } |
1947 | if (ext4_check_dir_entry(inode, NULL, de, bh, offset)) { | 2334 | if (ext4_check_dir_entry(inode, NULL, de, bh, offset)) { |
@@ -2010,7 +2397,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
2010 | /* Insert this inode at the head of the on-disk orphan list... */ | 2397 | /* Insert this inode at the head of the on-disk orphan list... */ |
2011 | NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); | 2398 | NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); |
2012 | EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); | 2399 | EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); |
2013 | err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); | 2400 | err = ext4_handle_dirty_super_now(handle, sb); |
2014 | rc = ext4_mark_iloc_dirty(handle, inode, &iloc); | 2401 | rc = ext4_mark_iloc_dirty(handle, inode, &iloc); |
2015 | if (!err) | 2402 | if (!err) |
2016 | err = rc; | 2403 | err = rc; |
@@ -2083,7 +2470,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) | |||
2083 | if (err) | 2470 | if (err) |
2084 | goto out_brelse; | 2471 | goto out_brelse; |
2085 | sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); | 2472 | sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); |
2086 | err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); | 2473 | err = ext4_handle_dirty_super_now(handle, inode->i_sb); |
2087 | } else { | 2474 | } else { |
2088 | struct ext4_iloc iloc2; | 2475 | struct ext4_iloc iloc2; |
2089 | struct inode *i_prev = | 2476 | struct inode *i_prev = |
@@ -2442,6 +2829,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2442 | dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); | 2829 | dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); |
2443 | if (!dir_bh) | 2830 | if (!dir_bh) |
2444 | goto end_rename; | 2831 | goto end_rename; |
2832 | if (!buffer_verified(dir_bh) && | ||
2833 | !ext4_dirent_csum_verify(old_inode, | ||
2834 | (struct ext4_dir_entry *)dir_bh->b_data)) | ||
2835 | goto end_rename; | ||
2836 | set_buffer_verified(dir_bh); | ||
2445 | if (le32_to_cpu(PARENT_INO(dir_bh->b_data, | 2837 | if (le32_to_cpu(PARENT_INO(dir_bh->b_data, |
2446 | old_dir->i_sb->s_blocksize)) != old_dir->i_ino) | 2838 | old_dir->i_sb->s_blocksize)) != old_dir->i_ino) |
2447 | goto end_rename; | 2839 | goto end_rename; |
@@ -2472,7 +2864,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2472 | ext4_current_time(new_dir); | 2864 | ext4_current_time(new_dir); |
2473 | ext4_mark_inode_dirty(handle, new_dir); | 2865 | ext4_mark_inode_dirty(handle, new_dir); |
2474 | BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata"); | 2866 | BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata"); |
2475 | retval = ext4_handle_dirty_metadata(handle, new_dir, new_bh); | 2867 | retval = ext4_handle_dirty_dirent_node(handle, new_dir, new_bh); |
2476 | if (unlikely(retval)) { | 2868 | if (unlikely(retval)) { |
2477 | ext4_std_error(new_dir->i_sb, retval); | 2869 | ext4_std_error(new_dir->i_sb, retval); |
2478 | goto end_rename; | 2870 | goto end_rename; |
@@ -2526,7 +2918,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2526 | PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = | 2918 | PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = |
2527 | cpu_to_le32(new_dir->i_ino); | 2919 | cpu_to_le32(new_dir->i_ino); |
2528 | BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); | 2920 | BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); |
2529 | retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh); | 2921 | retval = ext4_handle_dirty_dirent_node(handle, old_inode, |
2922 | dir_bh); | ||
2530 | if (retval) { | 2923 | if (retval) { |
2531 | ext4_std_error(old_dir->i_sb, retval); | 2924 | ext4_std_error(old_dir->i_sb, retval); |
2532 | goto end_rename; | 2925 | goto end_rename; |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 59fa0be27251..7ea6cbb44121 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -161,6 +161,8 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) | |||
161 | if (flex_gd == NULL) | 161 | if (flex_gd == NULL) |
162 | goto out3; | 162 | goto out3; |
163 | 163 | ||
164 | if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data)) | ||
165 | goto out2; | ||
164 | flex_gd->count = flexbg_size; | 166 | flex_gd->count = flexbg_size; |
165 | 167 | ||
166 | flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) * | 168 | flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) * |
@@ -796,7 +798,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
796 | ext4_kvfree(o_group_desc); | 798 | ext4_kvfree(o_group_desc); |
797 | 799 | ||
798 | le16_add_cpu(&es->s_reserved_gdt_blocks, -1); | 800 | le16_add_cpu(&es->s_reserved_gdt_blocks, -1); |
799 | err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); | 801 | err = ext4_handle_dirty_super_now(handle, sb); |
800 | if (err) | 802 | if (err) |
801 | ext4_std_error(sb, err); | 803 | ext4_std_error(sb, err); |
802 | 804 | ||
@@ -968,6 +970,8 @@ static void update_backups(struct super_block *sb, | |||
968 | goto exit_err; | 970 | goto exit_err; |
969 | } | 971 | } |
970 | 972 | ||
973 | ext4_superblock_csum_set(sb, (struct ext4_super_block *)data); | ||
974 | |||
971 | while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) { | 975 | while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) { |
972 | struct buffer_head *bh; | 976 | struct buffer_head *bh; |
973 | 977 | ||
@@ -1067,6 +1071,54 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, | |||
1067 | return err; | 1071 | return err; |
1068 | } | 1072 | } |
1069 | 1073 | ||
1074 | static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block) | ||
1075 | { | ||
1076 | struct buffer_head *bh = sb_getblk(sb, block); | ||
1077 | if (!bh) | ||
1078 | return NULL; | ||
1079 | |||
1080 | if (bitmap_uptodate(bh)) | ||
1081 | return bh; | ||
1082 | |||
1083 | lock_buffer(bh); | ||
1084 | if (bh_submit_read(bh) < 0) { | ||
1085 | unlock_buffer(bh); | ||
1086 | brelse(bh); | ||
1087 | return NULL; | ||
1088 | } | ||
1089 | unlock_buffer(bh); | ||
1090 | |||
1091 | return bh; | ||
1092 | } | ||
1093 | |||
1094 | static int ext4_set_bitmap_checksums(struct super_block *sb, | ||
1095 | ext4_group_t group, | ||
1096 | struct ext4_group_desc *gdp, | ||
1097 | struct ext4_new_group_data *group_data) | ||
1098 | { | ||
1099 | struct buffer_head *bh; | ||
1100 | |||
1101 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
1102 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
1103 | return 0; | ||
1104 | |||
1105 | bh = ext4_get_bitmap(sb, group_data->inode_bitmap); | ||
1106 | if (!bh) | ||
1107 | return -EIO; | ||
1108 | ext4_inode_bitmap_csum_set(sb, group, gdp, bh, | ||
1109 | EXT4_INODES_PER_GROUP(sb) / 8); | ||
1110 | brelse(bh); | ||
1111 | |||
1112 | bh = ext4_get_bitmap(sb, group_data->block_bitmap); | ||
1113 | if (!bh) | ||
1114 | return -EIO; | ||
1115 | ext4_block_bitmap_csum_set(sb, group, gdp, bh, | ||
1116 | EXT4_BLOCKS_PER_GROUP(sb) / 8); | ||
1117 | brelse(bh); | ||
1118 | |||
1119 | return 0; | ||
1120 | } | ||
1121 | |||
1070 | /* | 1122 | /* |
1071 | * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg | 1123 | * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg |
1072 | */ | 1124 | */ |
@@ -1093,18 +1145,24 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, | |||
1093 | */ | 1145 | */ |
1094 | gdb_bh = sbi->s_group_desc[gdb_num]; | 1146 | gdb_bh = sbi->s_group_desc[gdb_num]; |
1095 | /* Update group descriptor block for new group */ | 1147 | /* Update group descriptor block for new group */ |
1096 | gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data + | 1148 | gdp = (struct ext4_group_desc *)(gdb_bh->b_data + |
1097 | gdb_off * EXT4_DESC_SIZE(sb)); | 1149 | gdb_off * EXT4_DESC_SIZE(sb)); |
1098 | 1150 | ||
1099 | memset(gdp, 0, EXT4_DESC_SIZE(sb)); | 1151 | memset(gdp, 0, EXT4_DESC_SIZE(sb)); |
1100 | ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap); | 1152 | ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap); |
1101 | ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap); | 1153 | ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap); |
1154 | err = ext4_set_bitmap_checksums(sb, group, gdp, group_data); | ||
1155 | if (err) { | ||
1156 | ext4_std_error(sb, err); | ||
1157 | break; | ||
1158 | } | ||
1159 | |||
1102 | ext4_inode_table_set(sb, gdp, group_data->inode_table); | 1160 | ext4_inode_table_set(sb, gdp, group_data->inode_table); |
1103 | ext4_free_group_clusters_set(sb, gdp, | 1161 | ext4_free_group_clusters_set(sb, gdp, |
1104 | EXT4_B2C(sbi, group_data->free_blocks_count)); | 1162 | EXT4_B2C(sbi, group_data->free_blocks_count)); |
1105 | ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); | 1163 | ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); |
1106 | gdp->bg_flags = cpu_to_le16(*bg_flags); | 1164 | gdp->bg_flags = cpu_to_le16(*bg_flags); |
1107 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | 1165 | ext4_group_desc_csum_set(sb, group, gdp); |
1108 | 1166 | ||
1109 | err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); | 1167 | err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); |
1110 | if (unlikely(err)) { | 1168 | if (unlikely(err)) { |
@@ -1343,17 +1401,14 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, | |||
1343 | (1 + ext4_bg_num_gdb(sb, group + i) + | 1401 | (1 + ext4_bg_num_gdb(sb, group + i) + |
1344 | le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; | 1402 | le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; |
1345 | group_data[i].free_blocks_count = blocks_per_group - overhead; | 1403 | group_data[i].free_blocks_count = blocks_per_group - overhead; |
1346 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 1404 | if (ext4_has_group_desc_csum(sb)) |
1347 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) | ||
1348 | flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | | 1405 | flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | |
1349 | EXT4_BG_INODE_UNINIT; | 1406 | EXT4_BG_INODE_UNINIT; |
1350 | else | 1407 | else |
1351 | flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED; | 1408 | flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED; |
1352 | } | 1409 | } |
1353 | 1410 | ||
1354 | if (last_group == n_group && | 1411 | if (last_group == n_group && ext4_has_group_desc_csum(sb)) |
1355 | EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
1356 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) | ||
1357 | /* We need to initialize block bitmap of last group. */ | 1412 | /* We need to initialize block bitmap of last group. */ |
1358 | flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT; | 1413 | flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT; |
1359 | 1414 | ||
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 35b5954489ee..eb7aa3e4ef05 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -112,6 +112,48 @@ static struct file_system_type ext3_fs_type = { | |||
112 | #define IS_EXT3_SB(sb) (0) | 112 | #define IS_EXT3_SB(sb) (0) |
113 | #endif | 113 | #endif |
114 | 114 | ||
115 | static int ext4_verify_csum_type(struct super_block *sb, | ||
116 | struct ext4_super_block *es) | ||
117 | { | ||
118 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
119 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
120 | return 1; | ||
121 | |||
122 | return es->s_checksum_type == EXT4_CRC32C_CHKSUM; | ||
123 | } | ||
124 | |||
125 | static __le32 ext4_superblock_csum(struct super_block *sb, | ||
126 | struct ext4_super_block *es) | ||
127 | { | ||
128 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
129 | int offset = offsetof(struct ext4_super_block, s_checksum); | ||
130 | __u32 csum; | ||
131 | |||
132 | csum = ext4_chksum(sbi, ~0, (char *)es, offset); | ||
133 | |||
134 | return cpu_to_le32(csum); | ||
135 | } | ||
136 | |||
137 | int ext4_superblock_csum_verify(struct super_block *sb, | ||
138 | struct ext4_super_block *es) | ||
139 | { | ||
140 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
141 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
142 | return 1; | ||
143 | |||
144 | return es->s_checksum == ext4_superblock_csum(sb, es); | ||
145 | } | ||
146 | |||
147 | void ext4_superblock_csum_set(struct super_block *sb, | ||
148 | struct ext4_super_block *es) | ||
149 | { | ||
150 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
151 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
152 | return; | ||
153 | |||
154 | es->s_checksum = ext4_superblock_csum(sb, es); | ||
155 | } | ||
156 | |||
115 | void *ext4_kvmalloc(size_t size, gfp_t flags) | 157 | void *ext4_kvmalloc(size_t size, gfp_t flags) |
116 | { | 158 | { |
117 | void *ret; | 159 | void *ret; |
@@ -497,6 +539,7 @@ void __ext4_error(struct super_block *sb, const char *function, | |||
497 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", | 539 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", |
498 | sb->s_id, function, line, current->comm, &vaf); | 540 | sb->s_id, function, line, current->comm, &vaf); |
499 | va_end(args); | 541 | va_end(args); |
542 | save_error_info(sb, function, line); | ||
500 | 543 | ||
501 | ext4_handle_error(sb); | 544 | ext4_handle_error(sb); |
502 | } | 545 | } |
@@ -905,6 +948,8 @@ static void ext4_put_super(struct super_block *sb) | |||
905 | unlock_super(sb); | 948 | unlock_super(sb); |
906 | kobject_put(&sbi->s_kobj); | 949 | kobject_put(&sbi->s_kobj); |
907 | wait_for_completion(&sbi->s_kobj_unregister); | 950 | wait_for_completion(&sbi->s_kobj_unregister); |
951 | if (sbi->s_chksum_driver) | ||
952 | crypto_free_shash(sbi->s_chksum_driver); | ||
908 | kfree(sbi->s_blockgroup_lock); | 953 | kfree(sbi->s_blockgroup_lock); |
909 | kfree(sbi); | 954 | kfree(sbi); |
910 | } | 955 | } |
@@ -1922,43 +1967,69 @@ failed: | |||
1922 | return 0; | 1967 | return 0; |
1923 | } | 1968 | } |
1924 | 1969 | ||
1925 | __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, | 1970 | static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, |
1926 | struct ext4_group_desc *gdp) | 1971 | struct ext4_group_desc *gdp) |
1927 | { | 1972 | { |
1973 | int offset; | ||
1928 | __u16 crc = 0; | 1974 | __u16 crc = 0; |
1975 | __le32 le_group = cpu_to_le32(block_group); | ||
1929 | 1976 | ||
1930 | if (sbi->s_es->s_feature_ro_compat & | 1977 | if ((sbi->s_es->s_feature_ro_compat & |
1931 | cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { | 1978 | cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) { |
1932 | int offset = offsetof(struct ext4_group_desc, bg_checksum); | 1979 | /* Use new metadata_csum algorithm */ |
1933 | __le32 le_group = cpu_to_le32(block_group); | 1980 | __u16 old_csum; |
1934 | 1981 | __u32 csum32; | |
1935 | crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); | 1982 | |
1936 | crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); | 1983 | old_csum = gdp->bg_checksum; |
1937 | crc = crc16(crc, (__u8 *)gdp, offset); | 1984 | gdp->bg_checksum = 0; |
1938 | offset += sizeof(gdp->bg_checksum); /* skip checksum */ | 1985 | csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group, |
1939 | /* for checksum of struct ext4_group_desc do the rest...*/ | 1986 | sizeof(le_group)); |
1940 | if ((sbi->s_es->s_feature_incompat & | 1987 | csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, |
1941 | cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && | 1988 | sbi->s_desc_size); |
1942 | offset < le16_to_cpu(sbi->s_es->s_desc_size)) | 1989 | gdp->bg_checksum = old_csum; |
1943 | crc = crc16(crc, (__u8 *)gdp + offset, | 1990 | |
1944 | le16_to_cpu(sbi->s_es->s_desc_size) - | 1991 | crc = csum32 & 0xFFFF; |
1945 | offset); | 1992 | goto out; |
1946 | } | 1993 | } |
1947 | 1994 | ||
1995 | /* old crc16 code */ | ||
1996 | offset = offsetof(struct ext4_group_desc, bg_checksum); | ||
1997 | |||
1998 | crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); | ||
1999 | crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); | ||
2000 | crc = crc16(crc, (__u8 *)gdp, offset); | ||
2001 | offset += sizeof(gdp->bg_checksum); /* skip checksum */ | ||
2002 | /* for checksum of struct ext4_group_desc do the rest...*/ | ||
2003 | if ((sbi->s_es->s_feature_incompat & | ||
2004 | cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && | ||
2005 | offset < le16_to_cpu(sbi->s_es->s_desc_size)) | ||
2006 | crc = crc16(crc, (__u8 *)gdp + offset, | ||
2007 | le16_to_cpu(sbi->s_es->s_desc_size) - | ||
2008 | offset); | ||
2009 | |||
2010 | out: | ||
1948 | return cpu_to_le16(crc); | 2011 | return cpu_to_le16(crc); |
1949 | } | 2012 | } |
1950 | 2013 | ||
1951 | int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, | 2014 | int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group, |
1952 | struct ext4_group_desc *gdp) | 2015 | struct ext4_group_desc *gdp) |
1953 | { | 2016 | { |
1954 | if ((sbi->s_es->s_feature_ro_compat & | 2017 | if (ext4_has_group_desc_csum(sb) && |
1955 | cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && | 2018 | (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb), |
1956 | (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) | 2019 | block_group, gdp))) |
1957 | return 0; | 2020 | return 0; |
1958 | 2021 | ||
1959 | return 1; | 2022 | return 1; |
1960 | } | 2023 | } |
1961 | 2024 | ||
2025 | void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group, | ||
2026 | struct ext4_group_desc *gdp) | ||
2027 | { | ||
2028 | if (!ext4_has_group_desc_csum(sb)) | ||
2029 | return; | ||
2030 | gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp); | ||
2031 | } | ||
2032 | |||
1962 | /* Called at mount-time, super-block is locked */ | 2033 | /* Called at mount-time, super-block is locked */ |
1963 | static int ext4_check_descriptors(struct super_block *sb, | 2034 | static int ext4_check_descriptors(struct super_block *sb, |
1964 | ext4_group_t *first_not_zeroed) | 2035 | ext4_group_t *first_not_zeroed) |
@@ -2013,7 +2084,7 @@ static int ext4_check_descriptors(struct super_block *sb, | |||
2013 | return 0; | 2084 | return 0; |
2014 | } | 2085 | } |
2015 | ext4_lock_group(sb, i); | 2086 | ext4_lock_group(sb, i); |
2016 | if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { | 2087 | if (!ext4_group_desc_csum_verify(sb, i, gdp)) { |
2017 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " | 2088 | ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " |
2018 | "Checksum for group %u failed (%u!=%u)", | 2089 | "Checksum for group %u failed (%u!=%u)", |
2019 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, | 2090 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, |
@@ -2417,6 +2488,23 @@ static ssize_t sbi_ui_store(struct ext4_attr *a, | |||
2417 | return count; | 2488 | return count; |
2418 | } | 2489 | } |
2419 | 2490 | ||
2491 | static ssize_t trigger_test_error(struct ext4_attr *a, | ||
2492 | struct ext4_sb_info *sbi, | ||
2493 | const char *buf, size_t count) | ||
2494 | { | ||
2495 | int len = count; | ||
2496 | |||
2497 | if (!capable(CAP_SYS_ADMIN)) | ||
2498 | return -EPERM; | ||
2499 | |||
2500 | if (len && buf[len-1] == '\n') | ||
2501 | len--; | ||
2502 | |||
2503 | if (len) | ||
2504 | ext4_error(sbi->s_sb, "%.*s", len, buf); | ||
2505 | return count; | ||
2506 | } | ||
2507 | |||
2420 | #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ | 2508 | #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ |
2421 | static struct ext4_attr ext4_attr_##_name = { \ | 2509 | static struct ext4_attr ext4_attr_##_name = { \ |
2422 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | 2510 | .attr = {.name = __stringify(_name), .mode = _mode }, \ |
@@ -2447,6 +2535,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); | |||
2447 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); | 2535 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); |
2448 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); | 2536 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); |
2449 | EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); | 2537 | EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); |
2538 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); | ||
2450 | 2539 | ||
2451 | static struct attribute *ext4_attrs[] = { | 2540 | static struct attribute *ext4_attrs[] = { |
2452 | ATTR_LIST(delayed_allocation_blocks), | 2541 | ATTR_LIST(delayed_allocation_blocks), |
@@ -2461,6 +2550,7 @@ static struct attribute *ext4_attrs[] = { | |||
2461 | ATTR_LIST(mb_stream_req), | 2550 | ATTR_LIST(mb_stream_req), |
2462 | ATTR_LIST(mb_group_prealloc), | 2551 | ATTR_LIST(mb_group_prealloc), |
2463 | ATTR_LIST(max_writeback_mb_bump), | 2552 | ATTR_LIST(max_writeback_mb_bump), |
2553 | ATTR_LIST(trigger_fs_error), | ||
2464 | NULL, | 2554 | NULL, |
2465 | }; | 2555 | }; |
2466 | 2556 | ||
@@ -2957,6 +3047,44 @@ static void ext4_destroy_lazyinit_thread(void) | |||
2957 | kthread_stop(ext4_lazyinit_task); | 3047 | kthread_stop(ext4_lazyinit_task); |
2958 | } | 3048 | } |
2959 | 3049 | ||
3050 | static int set_journal_csum_feature_set(struct super_block *sb) | ||
3051 | { | ||
3052 | int ret = 1; | ||
3053 | int compat, incompat; | ||
3054 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
3055 | |||
3056 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
3057 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { | ||
3058 | /* journal checksum v2 */ | ||
3059 | compat = 0; | ||
3060 | incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2; | ||
3061 | } else { | ||
3062 | /* journal checksum v1 */ | ||
3063 | compat = JBD2_FEATURE_COMPAT_CHECKSUM; | ||
3064 | incompat = 0; | ||
3065 | } | ||
3066 | |||
3067 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { | ||
3068 | ret = jbd2_journal_set_features(sbi->s_journal, | ||
3069 | compat, 0, | ||
3070 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | | ||
3071 | incompat); | ||
3072 | } else if (test_opt(sb, JOURNAL_CHECKSUM)) { | ||
3073 | ret = jbd2_journal_set_features(sbi->s_journal, | ||
3074 | compat, 0, | ||
3075 | incompat); | ||
3076 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, | ||
3077 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | ||
3078 | } else { | ||
3079 | jbd2_journal_clear_features(sbi->s_journal, | ||
3080 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | ||
3081 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | | ||
3082 | JBD2_FEATURE_INCOMPAT_CSUM_V2); | ||
3083 | } | ||
3084 | |||
3085 | return ret; | ||
3086 | } | ||
3087 | |||
2960 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) | 3088 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
2961 | { | 3089 | { |
2962 | char *orig_data = kstrdup(data, GFP_KERNEL); | 3090 | char *orig_data = kstrdup(data, GFP_KERNEL); |
@@ -2993,6 +3121,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2993 | goto out_free_orig; | 3121 | goto out_free_orig; |
2994 | } | 3122 | } |
2995 | sb->s_fs_info = sbi; | 3123 | sb->s_fs_info = sbi; |
3124 | sbi->s_sb = sb; | ||
2996 | sbi->s_mount_opt = 0; | 3125 | sbi->s_mount_opt = 0; |
2997 | sbi->s_resuid = make_kuid(&init_user_ns, EXT4_DEF_RESUID); | 3126 | sbi->s_resuid = make_kuid(&init_user_ns, EXT4_DEF_RESUID); |
2998 | sbi->s_resgid = make_kgid(&init_user_ns, EXT4_DEF_RESGID); | 3127 | sbi->s_resgid = make_kgid(&init_user_ns, EXT4_DEF_RESGID); |
@@ -3032,13 +3161,54 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3032 | * Note: s_es must be initialized as soon as possible because | 3161 | * Note: s_es must be initialized as soon as possible because |
3033 | * some ext4 macro-instructions depend on its value | 3162 | * some ext4 macro-instructions depend on its value |
3034 | */ | 3163 | */ |
3035 | es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); | 3164 | es = (struct ext4_super_block *) (bh->b_data + offset); |
3036 | sbi->s_es = es; | 3165 | sbi->s_es = es; |
3037 | sb->s_magic = le16_to_cpu(es->s_magic); | 3166 | sb->s_magic = le16_to_cpu(es->s_magic); |
3038 | if (sb->s_magic != EXT4_SUPER_MAGIC) | 3167 | if (sb->s_magic != EXT4_SUPER_MAGIC) |
3039 | goto cantfind_ext4; | 3168 | goto cantfind_ext4; |
3040 | sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); | 3169 | sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); |
3041 | 3170 | ||
3171 | /* Warn if metadata_csum and gdt_csum are both set. */ | ||
3172 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
3173 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && | ||
3174 | EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) | ||
3175 | ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are " | ||
3176 | "redundant flags; please run fsck."); | ||
3177 | |||
3178 | /* Check for a known checksum algorithm */ | ||
3179 | if (!ext4_verify_csum_type(sb, es)) { | ||
3180 | ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " | ||
3181 | "unknown checksum algorithm."); | ||
3182 | silent = 1; | ||
3183 | goto cantfind_ext4; | ||
3184 | } | ||
3185 | |||
3186 | /* Load the checksum driver */ | ||
3187 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
3188 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { | ||
3189 | sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); | ||
3190 | if (IS_ERR(sbi->s_chksum_driver)) { | ||
3191 | ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); | ||
3192 | ret = PTR_ERR(sbi->s_chksum_driver); | ||
3193 | sbi->s_chksum_driver = NULL; | ||
3194 | goto failed_mount; | ||
3195 | } | ||
3196 | } | ||
3197 | |||
3198 | /* Check superblock checksum */ | ||
3199 | if (!ext4_superblock_csum_verify(sb, es)) { | ||
3200 | ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " | ||
3201 | "invalid superblock checksum. Run e2fsck?"); | ||
3202 | silent = 1; | ||
3203 | goto cantfind_ext4; | ||
3204 | } | ||
3205 | |||
3206 | /* Precompute checksum seed for all metadata */ | ||
3207 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
3208 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
3209 | sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, | ||
3210 | sizeof(es->s_uuid)); | ||
3211 | |||
3042 | /* Set defaults before we parse the mount options */ | 3212 | /* Set defaults before we parse the mount options */ |
3043 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); | 3213 | def_mount_opts = le32_to_cpu(es->s_default_mount_opts); |
3044 | set_opt(sb, INIT_INODE_TABLE); | 3214 | set_opt(sb, INIT_INODE_TABLE); |
@@ -3200,7 +3370,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3200 | "Can't read superblock on 2nd try"); | 3370 | "Can't read superblock on 2nd try"); |
3201 | goto failed_mount; | 3371 | goto failed_mount; |
3202 | } | 3372 | } |
3203 | es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); | 3373 | es = (struct ext4_super_block *)(bh->b_data + offset); |
3204 | sbi->s_es = es; | 3374 | sbi->s_es = es; |
3205 | if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { | 3375 | if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { |
3206 | ext4_msg(sb, KERN_ERR, | 3376 | ext4_msg(sb, KERN_ERR, |
@@ -3392,6 +3562,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3392 | GFP_KERNEL); | 3562 | GFP_KERNEL); |
3393 | if (sbi->s_group_desc == NULL) { | 3563 | if (sbi->s_group_desc == NULL) { |
3394 | ext4_msg(sb, KERN_ERR, "not enough memory"); | 3564 | ext4_msg(sb, KERN_ERR, "not enough memory"); |
3565 | ret = -ENOMEM; | ||
3395 | goto failed_mount; | 3566 | goto failed_mount; |
3396 | } | 3567 | } |
3397 | 3568 | ||
@@ -3449,6 +3620,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3449 | } | 3620 | } |
3450 | if (err) { | 3621 | if (err) { |
3451 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | 3622 | ext4_msg(sb, KERN_ERR, "insufficient memory"); |
3623 | ret = err; | ||
3452 | goto failed_mount3; | 3624 | goto failed_mount3; |
3453 | } | 3625 | } |
3454 | 3626 | ||
@@ -3506,26 +3678,17 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3506 | goto no_journal; | 3678 | goto no_journal; |
3507 | } | 3679 | } |
3508 | 3680 | ||
3509 | if (ext4_blocks_count(es) > 0xffffffffULL && | 3681 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) && |
3510 | !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, | 3682 | !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, |
3511 | JBD2_FEATURE_INCOMPAT_64BIT)) { | 3683 | JBD2_FEATURE_INCOMPAT_64BIT)) { |
3512 | ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); | 3684 | ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); |
3513 | goto failed_mount_wq; | 3685 | goto failed_mount_wq; |
3514 | } | 3686 | } |
3515 | 3687 | ||
3516 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { | 3688 | if (!set_journal_csum_feature_set(sb)) { |
3517 | jbd2_journal_set_features(sbi->s_journal, | 3689 | ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " |
3518 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | 3690 | "feature set"); |
3519 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | 3691 | goto failed_mount_wq; |
3520 | } else if (test_opt(sb, JOURNAL_CHECKSUM)) { | ||
3521 | jbd2_journal_set_features(sbi->s_journal, | ||
3522 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); | ||
3523 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, | ||
3524 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | ||
3525 | } else { | ||
3526 | jbd2_journal_clear_features(sbi->s_journal, | ||
3527 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | ||
3528 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | ||
3529 | } | 3692 | } |
3530 | 3693 | ||
3531 | /* We have now updated the journal if required, so we can | 3694 | /* We have now updated the journal if required, so we can |
@@ -3606,7 +3769,8 @@ no_journal: | |||
3606 | goto failed_mount4; | 3769 | goto failed_mount4; |
3607 | } | 3770 | } |
3608 | 3771 | ||
3609 | ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); | 3772 | if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY)) |
3773 | sb->s_flags |= MS_RDONLY; | ||
3610 | 3774 | ||
3611 | /* determine the minimum size of new large inodes, if present */ | 3775 | /* determine the minimum size of new large inodes, if present */ |
3612 | if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { | 3776 | if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { |
@@ -3641,7 +3805,7 @@ no_journal: | |||
3641 | } | 3805 | } |
3642 | 3806 | ||
3643 | ext4_ext_init(sb); | 3807 | ext4_ext_init(sb); |
3644 | err = ext4_mb_init(sb, needs_recovery); | 3808 | err = ext4_mb_init(sb); |
3645 | if (err) { | 3809 | if (err) { |
3646 | ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", | 3810 | ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", |
3647 | err); | 3811 | err); |
@@ -3724,6 +3888,8 @@ failed_mount2: | |||
3724 | brelse(sbi->s_group_desc[i]); | 3888 | brelse(sbi->s_group_desc[i]); |
3725 | ext4_kvfree(sbi->s_group_desc); | 3889 | ext4_kvfree(sbi->s_group_desc); |
3726 | failed_mount: | 3890 | failed_mount: |
3891 | if (sbi->s_chksum_driver) | ||
3892 | crypto_free_shash(sbi->s_chksum_driver); | ||
3727 | if (sbi->s_proc) { | 3893 | if (sbi->s_proc) { |
3728 | remove_proc_entry("options", sbi->s_proc); | 3894 | remove_proc_entry("options", sbi->s_proc); |
3729 | remove_proc_entry(sb->s_id, ext4_proc_root); | 3895 | remove_proc_entry(sb->s_id, ext4_proc_root); |
@@ -3847,7 +4013,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, | |||
3847 | goto out_bdev; | 4013 | goto out_bdev; |
3848 | } | 4014 | } |
3849 | 4015 | ||
3850 | es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); | 4016 | es = (struct ext4_super_block *) (bh->b_data + offset); |
3851 | if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || | 4017 | if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || |
3852 | !(le32_to_cpu(es->s_feature_incompat) & | 4018 | !(le32_to_cpu(es->s_feature_incompat) & |
3853 | EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { | 4019 | EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { |
@@ -4039,6 +4205,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
4039 | &EXT4_SB(sb)->s_freeinodes_counter)); | 4205 | &EXT4_SB(sb)->s_freeinodes_counter)); |
4040 | sb->s_dirt = 0; | 4206 | sb->s_dirt = 0; |
4041 | BUFFER_TRACE(sbh, "marking dirty"); | 4207 | BUFFER_TRACE(sbh, "marking dirty"); |
4208 | ext4_superblock_csum_set(sb, es); | ||
4042 | mark_buffer_dirty(sbh); | 4209 | mark_buffer_dirty(sbh); |
4043 | if (sync) { | 4210 | if (sync) { |
4044 | error = sync_dirty_buffer(sbh); | 4211 | error = sync_dirty_buffer(sbh); |
@@ -4333,7 +4500,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4333 | struct ext4_group_desc *gdp = | 4500 | struct ext4_group_desc *gdp = |
4334 | ext4_get_group_desc(sb, g, NULL); | 4501 | ext4_get_group_desc(sb, g, NULL); |
4335 | 4502 | ||
4336 | if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { | 4503 | if (!ext4_group_desc_csum_verify(sb, g, gdp)) { |
4337 | ext4_msg(sb, KERN_ERR, | 4504 | ext4_msg(sb, KERN_ERR, |
4338 | "ext4_remount: Checksum for group %u failed (%u!=%u)", | 4505 | "ext4_remount: Checksum for group %u failed (%u!=%u)", |
4339 | g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), | 4506 | g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e88748e55c0f..e56c9ed7d6e3 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -122,6 +122,58 @@ const struct xattr_handler *ext4_xattr_handlers[] = { | |||
122 | NULL | 122 | NULL |
123 | }; | 123 | }; |
124 | 124 | ||
125 | static __le32 ext4_xattr_block_csum(struct inode *inode, | ||
126 | sector_t block_nr, | ||
127 | struct ext4_xattr_header *hdr) | ||
128 | { | ||
129 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
130 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
131 | __u32 csum, old; | ||
132 | |||
133 | old = hdr->h_checksum; | ||
134 | hdr->h_checksum = 0; | ||
135 | if (le32_to_cpu(hdr->h_refcount) != 1) { | ||
136 | block_nr = cpu_to_le64(block_nr); | ||
137 | csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&block_nr, | ||
138 | sizeof(block_nr)); | ||
139 | } else | ||
140 | csum = ei->i_csum_seed; | ||
141 | csum = ext4_chksum(sbi, csum, (__u8 *)hdr, | ||
142 | EXT4_BLOCK_SIZE(inode->i_sb)); | ||
143 | hdr->h_checksum = old; | ||
144 | return cpu_to_le32(csum); | ||
145 | } | ||
146 | |||
147 | static int ext4_xattr_block_csum_verify(struct inode *inode, | ||
148 | sector_t block_nr, | ||
149 | struct ext4_xattr_header *hdr) | ||
150 | { | ||
151 | if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
152 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && | ||
153 | (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr))) | ||
154 | return 0; | ||
155 | return 1; | ||
156 | } | ||
157 | |||
158 | static void ext4_xattr_block_csum_set(struct inode *inode, | ||
159 | sector_t block_nr, | ||
160 | struct ext4_xattr_header *hdr) | ||
161 | { | ||
162 | if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, | ||
163 | EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) | ||
164 | return; | ||
165 | |||
166 | hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr); | ||
167 | } | ||
168 | |||
169 | static inline int ext4_handle_dirty_xattr_block(handle_t *handle, | ||
170 | struct inode *inode, | ||
171 | struct buffer_head *bh) | ||
172 | { | ||
173 | ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh)); | ||
174 | return ext4_handle_dirty_metadata(handle, inode, bh); | ||
175 | } | ||
176 | |||
125 | static inline const struct xattr_handler * | 177 | static inline const struct xattr_handler * |
126 | ext4_xattr_handler(int name_index) | 178 | ext4_xattr_handler(int name_index) |
127 | { | 179 | { |
@@ -156,12 +208,22 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end) | |||
156 | } | 208 | } |
157 | 209 | ||
158 | static inline int | 210 | static inline int |
159 | ext4_xattr_check_block(struct buffer_head *bh) | 211 | ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) |
160 | { | 212 | { |
213 | int error; | ||
214 | |||
215 | if (buffer_verified(bh)) | ||
216 | return 0; | ||
217 | |||
161 | if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || | 218 | if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || |
162 | BHDR(bh)->h_blocks != cpu_to_le32(1)) | 219 | BHDR(bh)->h_blocks != cpu_to_le32(1)) |
163 | return -EIO; | 220 | return -EIO; |
164 | return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); | 221 | if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh))) |
222 | return -EIO; | ||
223 | error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); | ||
224 | if (!error) | ||
225 | set_buffer_verified(bh); | ||
226 | return error; | ||
165 | } | 227 | } |
166 | 228 | ||
167 | static inline int | 229 | static inline int |
@@ -224,7 +286,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, | |||
224 | goto cleanup; | 286 | goto cleanup; |
225 | ea_bdebug(bh, "b_count=%d, refcount=%d", | 287 | ea_bdebug(bh, "b_count=%d, refcount=%d", |
226 | atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); | 288 | atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); |
227 | if (ext4_xattr_check_block(bh)) { | 289 | if (ext4_xattr_check_block(inode, bh)) { |
228 | bad_block: | 290 | bad_block: |
229 | EXT4_ERROR_INODE(inode, "bad block %llu", | 291 | EXT4_ERROR_INODE(inode, "bad block %llu", |
230 | EXT4_I(inode)->i_file_acl); | 292 | EXT4_I(inode)->i_file_acl); |
@@ -369,7 +431,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
369 | goto cleanup; | 431 | goto cleanup; |
370 | ea_bdebug(bh, "b_count=%d, refcount=%d", | 432 | ea_bdebug(bh, "b_count=%d, refcount=%d", |
371 | atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); | 433 | atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); |
372 | if (ext4_xattr_check_block(bh)) { | 434 | if (ext4_xattr_check_block(inode, bh)) { |
373 | EXT4_ERROR_INODE(inode, "bad block %llu", | 435 | EXT4_ERROR_INODE(inode, "bad block %llu", |
374 | EXT4_I(inode)->i_file_acl); | 436 | EXT4_I(inode)->i_file_acl); |
375 | error = -EIO; | 437 | error = -EIO; |
@@ -492,7 +554,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, | |||
492 | if (ce) | 554 | if (ce) |
493 | mb_cache_entry_release(ce); | 555 | mb_cache_entry_release(ce); |
494 | unlock_buffer(bh); | 556 | unlock_buffer(bh); |
495 | error = ext4_handle_dirty_metadata(handle, inode, bh); | 557 | error = ext4_handle_dirty_xattr_block(handle, inode, bh); |
496 | if (IS_SYNC(inode)) | 558 | if (IS_SYNC(inode)) |
497 | ext4_handle_sync(handle); | 559 | ext4_handle_sync(handle); |
498 | dquot_free_block(inode, 1); | 560 | dquot_free_block(inode, 1); |
@@ -662,7 +724,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, | |||
662 | ea_bdebug(bs->bh, "b_count=%d, refcount=%d", | 724 | ea_bdebug(bs->bh, "b_count=%d, refcount=%d", |
663 | atomic_read(&(bs->bh->b_count)), | 725 | atomic_read(&(bs->bh->b_count)), |
664 | le32_to_cpu(BHDR(bs->bh)->h_refcount)); | 726 | le32_to_cpu(BHDR(bs->bh)->h_refcount)); |
665 | if (ext4_xattr_check_block(bs->bh)) { | 727 | if (ext4_xattr_check_block(inode, bs->bh)) { |
666 | EXT4_ERROR_INODE(inode, "bad block %llu", | 728 | EXT4_ERROR_INODE(inode, "bad block %llu", |
667 | EXT4_I(inode)->i_file_acl); | 729 | EXT4_I(inode)->i_file_acl); |
668 | error = -EIO; | 730 | error = -EIO; |
@@ -725,9 +787,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, | |||
725 | if (error == -EIO) | 787 | if (error == -EIO) |
726 | goto bad_block; | 788 | goto bad_block; |
727 | if (!error) | 789 | if (!error) |
728 | error = ext4_handle_dirty_metadata(handle, | 790 | error = ext4_handle_dirty_xattr_block(handle, |
729 | inode, | 791 | inode, |
730 | bs->bh); | 792 | bs->bh); |
731 | if (error) | 793 | if (error) |
732 | goto cleanup; | 794 | goto cleanup; |
733 | goto inserted; | 795 | goto inserted; |
@@ -796,9 +858,9 @@ inserted: | |||
796 | ea_bdebug(new_bh, "reusing; refcount now=%d", | 858 | ea_bdebug(new_bh, "reusing; refcount now=%d", |
797 | le32_to_cpu(BHDR(new_bh)->h_refcount)); | 859 | le32_to_cpu(BHDR(new_bh)->h_refcount)); |
798 | unlock_buffer(new_bh); | 860 | unlock_buffer(new_bh); |
799 | error = ext4_handle_dirty_metadata(handle, | 861 | error = ext4_handle_dirty_xattr_block(handle, |
800 | inode, | 862 | inode, |
801 | new_bh); | 863 | new_bh); |
802 | if (error) | 864 | if (error) |
803 | goto cleanup_dquot; | 865 | goto cleanup_dquot; |
804 | } | 866 | } |
@@ -855,8 +917,8 @@ getblk_failed: | |||
855 | set_buffer_uptodate(new_bh); | 917 | set_buffer_uptodate(new_bh); |
856 | unlock_buffer(new_bh); | 918 | unlock_buffer(new_bh); |
857 | ext4_xattr_cache_insert(new_bh); | 919 | ext4_xattr_cache_insert(new_bh); |
858 | error = ext4_handle_dirty_metadata(handle, | 920 | error = ext4_handle_dirty_xattr_block(handle, |
859 | inode, new_bh); | 921 | inode, new_bh); |
860 | if (error) | 922 | if (error) |
861 | goto cleanup; | 923 | goto cleanup; |
862 | } | 924 | } |
@@ -1193,7 +1255,7 @@ retry: | |||
1193 | error = -EIO; | 1255 | error = -EIO; |
1194 | if (!bh) | 1256 | if (!bh) |
1195 | goto cleanup; | 1257 | goto cleanup; |
1196 | if (ext4_xattr_check_block(bh)) { | 1258 | if (ext4_xattr_check_block(inode, bh)) { |
1197 | EXT4_ERROR_INODE(inode, "bad block %llu", | 1259 | EXT4_ERROR_INODE(inode, "bad block %llu", |
1198 | EXT4_I(inode)->i_file_acl); | 1260 | EXT4_I(inode)->i_file_acl); |
1199 | error = -EIO; | 1261 | error = -EIO; |
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 25b7387ff183..91f31ca7d9af 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h | |||
@@ -27,7 +27,9 @@ struct ext4_xattr_header { | |||
27 | __le32 h_refcount; /* reference count */ | 27 | __le32 h_refcount; /* reference count */ |
28 | __le32 h_blocks; /* number of disk blocks used */ | 28 | __le32 h_blocks; /* number of disk blocks used */ |
29 | __le32 h_hash; /* hash value of all attributes */ | 29 | __le32 h_hash; /* hash value of all attributes */ |
30 | __u32 h_reserved[4]; /* zero right now */ | 30 | __le32 h_checksum; /* crc32c(uuid+id+xattrblock) */ |
31 | /* id = inum if refcount=1, blknum otherwise */ | ||
32 | __u32 h_reserved[3]; /* zero right now */ | ||
31 | }; | 33 | }; |
32 | 34 | ||
33 | struct ext4_xattr_ibody_header { | 35 | struct ext4_xattr_ibody_header { |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index c2973ea5df9a..a3d81ebf6d86 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -735,10 +735,9 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb, | |||
735 | } | 735 | } |
736 | 736 | ||
737 | static int | 737 | static int |
738 | fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable) | 738 | fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent) |
739 | { | 739 | { |
740 | int len = *lenp; | 740 | int len = *lenp; |
741 | struct inode *inode = de->d_inode; | ||
742 | u32 ipos_h, ipos_m, ipos_l; | 741 | u32 ipos_h, ipos_m, ipos_l; |
743 | 742 | ||
744 | if (len < 5) { | 743 | if (len < 5) { |
@@ -754,9 +753,9 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable) | |||
754 | fh[1] = inode->i_generation; | 753 | fh[1] = inode->i_generation; |
755 | fh[2] = ipos_h; | 754 | fh[2] = ipos_h; |
756 | fh[3] = ipos_m | MSDOS_I(inode)->i_logstart; | 755 | fh[3] = ipos_m | MSDOS_I(inode)->i_logstart; |
757 | spin_lock(&de->d_lock); | 756 | fh[4] = ipos_l; |
758 | fh[4] = ipos_l | MSDOS_I(de->d_parent->d_inode)->i_logstart; | 757 | if (parent) |
759 | spin_unlock(&de->d_lock); | 758 | fh[4] |= MSDOS_I(parent)->i_logstart; |
760 | return 3; | 759 | return 3; |
761 | } | 760 | } |
762 | 761 | ||
diff --git a/fs/fcntl.c b/fs/fcntl.c index d078b75572a7..81b70e665bf0 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -442,28 +442,24 @@ static int check_fcntl_cmd(unsigned cmd) | |||
442 | SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) | 442 | SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) |
443 | { | 443 | { |
444 | struct file *filp; | 444 | struct file *filp; |
445 | int fput_needed; | ||
445 | long err = -EBADF; | 446 | long err = -EBADF; |
446 | 447 | ||
447 | filp = fget_raw(fd); | 448 | filp = fget_raw_light(fd, &fput_needed); |
448 | if (!filp) | 449 | if (!filp) |
449 | goto out; | 450 | goto out; |
450 | 451 | ||
451 | if (unlikely(filp->f_mode & FMODE_PATH)) { | 452 | if (unlikely(filp->f_mode & FMODE_PATH)) { |
452 | if (!check_fcntl_cmd(cmd)) { | 453 | if (!check_fcntl_cmd(cmd)) |
453 | fput(filp); | 454 | goto out1; |
454 | goto out; | ||
455 | } | ||
456 | } | 455 | } |
457 | 456 | ||
458 | err = security_file_fcntl(filp, cmd, arg); | 457 | err = security_file_fcntl(filp, cmd, arg); |
459 | if (err) { | 458 | if (!err) |
460 | fput(filp); | 459 | err = do_fcntl(fd, cmd, arg, filp); |
461 | return err; | ||
462 | } | ||
463 | 460 | ||
464 | err = do_fcntl(fd, cmd, arg, filp); | 461 | out1: |
465 | 462 | fput_light(filp, fput_needed); | |
466 | fput(filp); | ||
467 | out: | 463 | out: |
468 | return err; | 464 | return err; |
469 | } | 465 | } |
@@ -473,26 +469,21 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, | |||
473 | unsigned long, arg) | 469 | unsigned long, arg) |
474 | { | 470 | { |
475 | struct file * filp; | 471 | struct file * filp; |
476 | long err; | 472 | long err = -EBADF; |
473 | int fput_needed; | ||
477 | 474 | ||
478 | err = -EBADF; | 475 | filp = fget_raw_light(fd, &fput_needed); |
479 | filp = fget_raw(fd); | ||
480 | if (!filp) | 476 | if (!filp) |
481 | goto out; | 477 | goto out; |
482 | 478 | ||
483 | if (unlikely(filp->f_mode & FMODE_PATH)) { | 479 | if (unlikely(filp->f_mode & FMODE_PATH)) { |
484 | if (!check_fcntl_cmd(cmd)) { | 480 | if (!check_fcntl_cmd(cmd)) |
485 | fput(filp); | 481 | goto out1; |
486 | goto out; | ||
487 | } | ||
488 | } | 482 | } |
489 | 483 | ||
490 | err = security_file_fcntl(filp, cmd, arg); | 484 | err = security_file_fcntl(filp, cmd, arg); |
491 | if (err) { | 485 | if (err) |
492 | fput(filp); | 486 | goto out1; |
493 | return err; | ||
494 | } | ||
495 | err = -EBADF; | ||
496 | 487 | ||
497 | switch (cmd) { | 488 | switch (cmd) { |
498 | case F_GETLK64: | 489 | case F_GETLK64: |
@@ -507,7 +498,8 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, | |||
507 | err = do_fcntl(fd, cmd, arg, filp); | 498 | err = do_fcntl(fd, cmd, arg, filp); |
508 | break; | 499 | break; |
509 | } | 500 | } |
510 | fput(filp); | 501 | out1: |
502 | fput_light(filp, fput_needed); | ||
511 | out: | 503 | out: |
512 | return err; | 504 | return err; |
513 | } | 505 | } |
diff --git a/fs/file_table.c b/fs/file_table.c index 70f2a0fd6aec..a305d9e2d1b2 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -34,7 +34,6 @@ struct files_stat_struct files_stat = { | |||
34 | .max_files = NR_FILE | 34 | .max_files = NR_FILE |
35 | }; | 35 | }; |
36 | 36 | ||
37 | DECLARE_LGLOCK(files_lglock); | ||
38 | DEFINE_LGLOCK(files_lglock); | 37 | DEFINE_LGLOCK(files_lglock); |
39 | 38 | ||
40 | /* SLAB cache for file structures */ | 39 | /* SLAB cache for file structures */ |
@@ -421,9 +420,9 @@ static inline void __file_sb_list_add(struct file *file, struct super_block *sb) | |||
421 | */ | 420 | */ |
422 | void file_sb_list_add(struct file *file, struct super_block *sb) | 421 | void file_sb_list_add(struct file *file, struct super_block *sb) |
423 | { | 422 | { |
424 | lg_local_lock(files_lglock); | 423 | lg_local_lock(&files_lglock); |
425 | __file_sb_list_add(file, sb); | 424 | __file_sb_list_add(file, sb); |
426 | lg_local_unlock(files_lglock); | 425 | lg_local_unlock(&files_lglock); |
427 | } | 426 | } |
428 | 427 | ||
429 | /** | 428 | /** |
@@ -436,9 +435,9 @@ void file_sb_list_add(struct file *file, struct super_block *sb) | |||
436 | void file_sb_list_del(struct file *file) | 435 | void file_sb_list_del(struct file *file) |
437 | { | 436 | { |
438 | if (!list_empty(&file->f_u.fu_list)) { | 437 | if (!list_empty(&file->f_u.fu_list)) { |
439 | lg_local_lock_cpu(files_lglock, file_list_cpu(file)); | 438 | lg_local_lock_cpu(&files_lglock, file_list_cpu(file)); |
440 | list_del_init(&file->f_u.fu_list); | 439 | list_del_init(&file->f_u.fu_list); |
441 | lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); | 440 | lg_local_unlock_cpu(&files_lglock, file_list_cpu(file)); |
442 | } | 441 | } |
443 | } | 442 | } |
444 | 443 | ||
@@ -485,7 +484,7 @@ void mark_files_ro(struct super_block *sb) | |||
485 | struct file *f; | 484 | struct file *f; |
486 | 485 | ||
487 | retry: | 486 | retry: |
488 | lg_global_lock(files_lglock); | 487 | lg_global_lock(&files_lglock); |
489 | do_file_list_for_each_entry(sb, f) { | 488 | do_file_list_for_each_entry(sb, f) { |
490 | struct vfsmount *mnt; | 489 | struct vfsmount *mnt; |
491 | if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) | 490 | if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) |
@@ -502,12 +501,12 @@ retry: | |||
502 | file_release_write(f); | 501 | file_release_write(f); |
503 | mnt = mntget(f->f_path.mnt); | 502 | mnt = mntget(f->f_path.mnt); |
504 | /* This can sleep, so we can't hold the spinlock. */ | 503 | /* This can sleep, so we can't hold the spinlock. */ |
505 | lg_global_unlock(files_lglock); | 504 | lg_global_unlock(&files_lglock); |
506 | mnt_drop_write(mnt); | 505 | mnt_drop_write(mnt); |
507 | mntput(mnt); | 506 | mntput(mnt); |
508 | goto retry; | 507 | goto retry; |
509 | } while_file_list_for_each_entry; | 508 | } while_file_list_for_each_entry; |
510 | lg_global_unlock(files_lglock); | 509 | lg_global_unlock(&files_lglock); |
511 | } | 510 | } |
512 | 511 | ||
513 | void __init files_init(unsigned long mempages) | 512 | void __init files_init(unsigned long mempages) |
@@ -525,6 +524,6 @@ void __init files_init(unsigned long mempages) | |||
525 | n = (mempages * (PAGE_SIZE / 1024)) / 10; | 524 | n = (mempages * (PAGE_SIZE / 1024)) / 10; |
526 | files_stat.max_files = max_t(unsigned long, n, NR_FILE); | 525 | files_stat.max_files = max_t(unsigned long, n, NR_FILE); |
527 | files_defer_init(); | 526 | files_defer_init(); |
528 | lg_lock_init(files_lglock); | 527 | lg_lock_init(&files_lglock, "files_lglock"); |
529 | percpu_counter_init(&nr_files, 0); | 528 | percpu_counter_init(&nr_files, 0); |
530 | } | 529 | } |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 504e61b7fd75..9562109d3a87 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -962,7 +962,9 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
962 | if (err) | 962 | if (err) |
963 | goto out; | 963 | goto out; |
964 | 964 | ||
965 | file_update_time(file); | 965 | err = file_update_time(file); |
966 | if (err) | ||
967 | goto out; | ||
966 | 968 | ||
967 | if (file->f_flags & O_DIRECT) { | 969 | if (file->f_flags & O_DIRECT) { |
968 | written = generic_file_direct_write(iocb, iov, &nr_segs, | 970 | written = generic_file_direct_write(iocb, iov, &nr_segs, |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 56f6dcf30768..42678a33b7bb 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -627,12 +627,10 @@ static struct dentry *fuse_get_dentry(struct super_block *sb, | |||
627 | return ERR_PTR(err); | 627 | return ERR_PTR(err); |
628 | } | 628 | } |
629 | 629 | ||
630 | static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | 630 | static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len, |
631 | int connectable) | 631 | struct inode *parent) |
632 | { | 632 | { |
633 | struct inode *inode = dentry->d_inode; | 633 | int len = parent ? 6 : 3; |
634 | bool encode_parent = connectable && !S_ISDIR(inode->i_mode); | ||
635 | int len = encode_parent ? 6 : 3; | ||
636 | u64 nodeid; | 634 | u64 nodeid; |
637 | u32 generation; | 635 | u32 generation; |
638 | 636 | ||
@@ -648,14 +646,9 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | |||
648 | fh[1] = (u32)(nodeid & 0xffffffff); | 646 | fh[1] = (u32)(nodeid & 0xffffffff); |
649 | fh[2] = generation; | 647 | fh[2] = generation; |
650 | 648 | ||
651 | if (encode_parent) { | 649 | if (parent) { |
652 | struct inode *parent; | ||
653 | |||
654 | spin_lock(&dentry->d_lock); | ||
655 | parent = dentry->d_parent->d_inode; | ||
656 | nodeid = get_fuse_inode(parent)->nodeid; | 650 | nodeid = get_fuse_inode(parent)->nodeid; |
657 | generation = parent->i_generation; | 651 | generation = parent->i_generation; |
658 | spin_unlock(&dentry->d_lock); | ||
659 | 652 | ||
660 | fh[3] = (u32)(nodeid >> 32); | 653 | fh[3] = (u32)(nodeid >> 32); |
661 | fh[4] = (u32)(nodeid & 0xffffffff); | 654 | fh[4] = (u32)(nodeid & 0xffffffff); |
@@ -663,7 +656,7 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | |||
663 | } | 656 | } |
664 | 657 | ||
665 | *max_len = len; | 658 | *max_len = len; |
666 | return encode_parent ? 0x82 : 0x81; | 659 | return parent ? 0x82 : 0x81; |
667 | } | 660 | } |
668 | 661 | ||
669 | static struct dentry *fuse_fh_to_dentry(struct super_block *sb, | 662 | static struct dentry *fuse_fh_to_dentry(struct super_block *sb, |
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 70ba891654f8..e8ed6d4a6181 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c | |||
@@ -28,15 +28,14 @@ | |||
28 | #define GFS2_LARGE_FH_SIZE 8 | 28 | #define GFS2_LARGE_FH_SIZE 8 |
29 | #define GFS2_OLD_FH_SIZE 10 | 29 | #define GFS2_OLD_FH_SIZE 10 |
30 | 30 | ||
31 | static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, | 31 | static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len, |
32 | int connectable) | 32 | struct inode *parent) |
33 | { | 33 | { |
34 | __be32 *fh = (__force __be32 *)p; | 34 | __be32 *fh = (__force __be32 *)p; |
35 | struct inode *inode = dentry->d_inode; | ||
36 | struct super_block *sb = inode->i_sb; | 35 | struct super_block *sb = inode->i_sb; |
37 | struct gfs2_inode *ip = GFS2_I(inode); | 36 | struct gfs2_inode *ip = GFS2_I(inode); |
38 | 37 | ||
39 | if (connectable && (*len < GFS2_LARGE_FH_SIZE)) { | 38 | if (parent && (*len < GFS2_LARGE_FH_SIZE)) { |
40 | *len = GFS2_LARGE_FH_SIZE; | 39 | *len = GFS2_LARGE_FH_SIZE; |
41 | return 255; | 40 | return 255; |
42 | } else if (*len < GFS2_SMALL_FH_SIZE) { | 41 | } else if (*len < GFS2_SMALL_FH_SIZE) { |
@@ -50,14 +49,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, | |||
50 | fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); | 49 | fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); |
51 | *len = GFS2_SMALL_FH_SIZE; | 50 | *len = GFS2_SMALL_FH_SIZE; |
52 | 51 | ||
53 | if (!connectable || inode == sb->s_root->d_inode) | 52 | if (!parent || inode == sb->s_root->d_inode) |
54 | return *len; | 53 | return *len; |
55 | 54 | ||
56 | spin_lock(&dentry->d_lock); | 55 | ip = GFS2_I(parent); |
57 | inode = dentry->d_parent->d_inode; | ||
58 | ip = GFS2_I(inode); | ||
59 | igrab(inode); | ||
60 | spin_unlock(&dentry->d_lock); | ||
61 | 56 | ||
62 | fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32); | 57 | fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32); |
63 | fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); | 58 | fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); |
@@ -65,8 +60,6 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, | |||
65 | fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); | 60 | fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); |
66 | *len = GFS2_LARGE_FH_SIZE; | 61 | *len = GFS2_LARGE_FH_SIZE; |
67 | 62 | ||
68 | iput(inode); | ||
69 | |||
70 | return *len; | 63 | return *len; |
71 | } | 64 | } |
72 | 65 | ||
diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c index 7a5eb2c718c8..cdb84a838068 100644 --- a/fs/hpfs/alloc.c +++ b/fs/hpfs/alloc.c | |||
@@ -16,9 +16,9 @@ | |||
16 | static int chk_if_allocated(struct super_block *s, secno sec, char *msg) | 16 | static int chk_if_allocated(struct super_block *s, secno sec, char *msg) |
17 | { | 17 | { |
18 | struct quad_buffer_head qbh; | 18 | struct quad_buffer_head qbh; |
19 | u32 *bmp; | 19 | __le32 *bmp; |
20 | if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "chk"))) goto fail; | 20 | if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "chk"))) goto fail; |
21 | if ((cpu_to_le32(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f)) & 1) { | 21 | if ((le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f)) & 1) { |
22 | hpfs_error(s, "sector '%s' - %08x not allocated in bitmap", msg, sec); | 22 | hpfs_error(s, "sector '%s' - %08x not allocated in bitmap", msg, sec); |
23 | goto fail1; | 23 | goto fail1; |
24 | } | 24 | } |
@@ -62,7 +62,7 @@ int hpfs_chk_sectors(struct super_block *s, secno start, int len, char *msg) | |||
62 | static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigned forward) | 62 | static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigned forward) |
63 | { | 63 | { |
64 | struct quad_buffer_head qbh; | 64 | struct quad_buffer_head qbh; |
65 | unsigned *bmp; | 65 | __le32 *bmp; |
66 | unsigned bs = near & ~0x3fff; | 66 | unsigned bs = near & ~0x3fff; |
67 | unsigned nr = (near & 0x3fff) & ~(n - 1); | 67 | unsigned nr = (near & 0x3fff) & ~(n - 1); |
68 | /*unsigned mnr;*/ | 68 | /*unsigned mnr;*/ |
@@ -236,7 +236,7 @@ static secno alloc_in_dirband(struct super_block *s, secno near) | |||
236 | int hpfs_alloc_if_possible(struct super_block *s, secno sec) | 236 | int hpfs_alloc_if_possible(struct super_block *s, secno sec) |
237 | { | 237 | { |
238 | struct quad_buffer_head qbh; | 238 | struct quad_buffer_head qbh; |
239 | u32 *bmp; | 239 | __le32 *bmp; |
240 | if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "aip"))) goto end; | 240 | if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "aip"))) goto end; |
241 | if (le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) & (1 << (sec & 0x1f))) { | 241 | if (le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) & (1 << (sec & 0x1f))) { |
242 | bmp[(sec & 0x3fff) >> 5] &= cpu_to_le32(~(1 << (sec & 0x1f))); | 242 | bmp[(sec & 0x3fff) >> 5] &= cpu_to_le32(~(1 << (sec & 0x1f))); |
@@ -254,7 +254,7 @@ int hpfs_alloc_if_possible(struct super_block *s, secno sec) | |||
254 | void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n) | 254 | void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n) |
255 | { | 255 | { |
256 | struct quad_buffer_head qbh; | 256 | struct quad_buffer_head qbh; |
257 | u32 *bmp; | 257 | __le32 *bmp; |
258 | struct hpfs_sb_info *sbi = hpfs_sb(s); | 258 | struct hpfs_sb_info *sbi = hpfs_sb(s); |
259 | /*printk("2 - ");*/ | 259 | /*printk("2 - ");*/ |
260 | if (!n) return; | 260 | if (!n) return; |
@@ -299,7 +299,7 @@ int hpfs_check_free_dnodes(struct super_block *s, int n) | |||
299 | int n_bmps = (hpfs_sb(s)->sb_fs_size + 0x4000 - 1) >> 14; | 299 | int n_bmps = (hpfs_sb(s)->sb_fs_size + 0x4000 - 1) >> 14; |
300 | int b = hpfs_sb(s)->sb_c_bitmap & 0x0fffffff; | 300 | int b = hpfs_sb(s)->sb_c_bitmap & 0x0fffffff; |
301 | int i, j; | 301 | int i, j; |
302 | u32 *bmp; | 302 | __le32 *bmp; |
303 | struct quad_buffer_head qbh; | 303 | struct quad_buffer_head qbh; |
304 | if ((bmp = hpfs_map_dnode_bitmap(s, &qbh))) { | 304 | if ((bmp = hpfs_map_dnode_bitmap(s, &qbh))) { |
305 | for (j = 0; j < 512; j++) { | 305 | for (j = 0; j < 512; j++) { |
@@ -351,7 +351,7 @@ void hpfs_free_dnode(struct super_block *s, dnode_secno dno) | |||
351 | hpfs_free_sectors(s, dno, 4); | 351 | hpfs_free_sectors(s, dno, 4); |
352 | } else { | 352 | } else { |
353 | struct quad_buffer_head qbh; | 353 | struct quad_buffer_head qbh; |
354 | u32 *bmp; | 354 | __le32 *bmp; |
355 | unsigned ssec = (dno - hpfs_sb(s)->sb_dirband_start) / 4; | 355 | unsigned ssec = (dno - hpfs_sb(s)->sb_dirband_start) / 4; |
356 | if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) { | 356 | if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) { |
357 | return; | 357 | return; |
diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c index 08b503e8ed29..4bae4a4a60b1 100644 --- a/fs/hpfs/anode.c +++ b/fs/hpfs/anode.c | |||
@@ -20,7 +20,7 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode, | |||
20 | int c1, c2 = 0; | 20 | int c1, c2 = 0; |
21 | go_down: | 21 | go_down: |
22 | if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, a, &c1, &c2, "hpfs_bplus_lookup")) return -1; | 22 | if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, a, &c1, &c2, "hpfs_bplus_lookup")) return -1; |
23 | if (btree->internal) { | 23 | if (bp_internal(btree)) { |
24 | for (i = 0; i < btree->n_used_nodes; i++) | 24 | for (i = 0; i < btree->n_used_nodes; i++) |
25 | if (le32_to_cpu(btree->u.internal[i].file_secno) > sec) { | 25 | if (le32_to_cpu(btree->u.internal[i].file_secno) > sec) { |
26 | a = le32_to_cpu(btree->u.internal[i].down); | 26 | a = le32_to_cpu(btree->u.internal[i].down); |
@@ -82,7 +82,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi | |||
82 | brelse(bh); | 82 | brelse(bh); |
83 | return -1; | 83 | return -1; |
84 | } | 84 | } |
85 | if (btree->internal) { | 85 | if (bp_internal(btree)) { |
86 | a = le32_to_cpu(btree->u.internal[n].down); | 86 | a = le32_to_cpu(btree->u.internal[n].down); |
87 | btree->u.internal[n].file_secno = cpu_to_le32(-1); | 87 | btree->u.internal[n].file_secno = cpu_to_le32(-1); |
88 | mark_buffer_dirty(bh); | 88 | mark_buffer_dirty(bh); |
@@ -129,12 +129,12 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi | |||
129 | } | 129 | } |
130 | if (a == node && fnod) { | 130 | if (a == node && fnod) { |
131 | anode->up = cpu_to_le32(node); | 131 | anode->up = cpu_to_le32(node); |
132 | anode->btree.fnode_parent = 1; | 132 | anode->btree.flags |= BP_fnode_parent; |
133 | anode->btree.n_used_nodes = btree->n_used_nodes; | 133 | anode->btree.n_used_nodes = btree->n_used_nodes; |
134 | anode->btree.first_free = btree->first_free; | 134 | anode->btree.first_free = btree->first_free; |
135 | anode->btree.n_free_nodes = 40 - anode->btree.n_used_nodes; | 135 | anode->btree.n_free_nodes = 40 - anode->btree.n_used_nodes; |
136 | memcpy(&anode->u, &btree->u, btree->n_used_nodes * 12); | 136 | memcpy(&anode->u, &btree->u, btree->n_used_nodes * 12); |
137 | btree->internal = 1; | 137 | btree->flags |= BP_internal; |
138 | btree->n_free_nodes = 11; | 138 | btree->n_free_nodes = 11; |
139 | btree->n_used_nodes = 1; | 139 | btree->n_used_nodes = 1; |
140 | btree->first_free = cpu_to_le16((char *)&(btree->u.internal[1]) - (char *)btree); | 140 | btree->first_free = cpu_to_le16((char *)&(btree->u.internal[1]) - (char *)btree); |
@@ -184,7 +184,10 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi | |||
184 | hpfs_free_sectors(s, ra, 1); | 184 | hpfs_free_sectors(s, ra, 1); |
185 | if ((anode = hpfs_map_anode(s, na, &bh))) { | 185 | if ((anode = hpfs_map_anode(s, na, &bh))) { |
186 | anode->up = cpu_to_le32(up); | 186 | anode->up = cpu_to_le32(up); |
187 | anode->btree.fnode_parent = up == node && fnod; | 187 | if (up == node && fnod) |
188 | anode->btree.flags |= BP_fnode_parent; | ||
189 | else | ||
190 | anode->btree.flags &= ~BP_fnode_parent; | ||
188 | mark_buffer_dirty(bh); | 191 | mark_buffer_dirty(bh); |
189 | brelse(bh); | 192 | brelse(bh); |
190 | } | 193 | } |
@@ -198,7 +201,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi | |||
198 | if ((new_anode = hpfs_alloc_anode(s, a, &na, &bh))) { | 201 | if ((new_anode = hpfs_alloc_anode(s, a, &na, &bh))) { |
199 | anode = new_anode; | 202 | anode = new_anode; |
200 | /*anode->up = cpu_to_le32(up != -1 ? up : ra);*/ | 203 | /*anode->up = cpu_to_le32(up != -1 ? up : ra);*/ |
201 | anode->btree.internal = 1; | 204 | anode->btree.flags |= BP_internal; |
202 | anode->btree.n_used_nodes = 1; | 205 | anode->btree.n_used_nodes = 1; |
203 | anode->btree.n_free_nodes = 59; | 206 | anode->btree.n_free_nodes = 59; |
204 | anode->btree.first_free = cpu_to_le16(16); | 207 | anode->btree.first_free = cpu_to_le16(16); |
@@ -215,7 +218,8 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi | |||
215 | } | 218 | } |
216 | if ((anode = hpfs_map_anode(s, na, &bh))) { | 219 | if ((anode = hpfs_map_anode(s, na, &bh))) { |
217 | anode->up = cpu_to_le32(node); | 220 | anode->up = cpu_to_le32(node); |
218 | if (fnod) anode->btree.fnode_parent = 1; | 221 | if (fnod) |
222 | anode->btree.flags |= BP_fnode_parent; | ||
219 | mark_buffer_dirty(bh); | 223 | mark_buffer_dirty(bh); |
220 | brelse(bh); | 224 | brelse(bh); |
221 | } | 225 | } |
@@ -234,18 +238,19 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi | |||
234 | } | 238 | } |
235 | ranode->up = cpu_to_le32(node); | 239 | ranode->up = cpu_to_le32(node); |
236 | memcpy(&ranode->btree, btree, le16_to_cpu(btree->first_free)); | 240 | memcpy(&ranode->btree, btree, le16_to_cpu(btree->first_free)); |
237 | if (fnod) ranode->btree.fnode_parent = 1; | 241 | if (fnod) |
238 | ranode->btree.n_free_nodes = (ranode->btree.internal ? 60 : 40) - ranode->btree.n_used_nodes; | 242 | ranode->btree.flags |= BP_fnode_parent; |
239 | if (ranode->btree.internal) for (n = 0; n < ranode->btree.n_used_nodes; n++) { | 243 | ranode->btree.n_free_nodes = (bp_internal(&ranode->btree) ? 60 : 40) - ranode->btree.n_used_nodes; |
244 | if (bp_internal(&ranode->btree)) for (n = 0; n < ranode->btree.n_used_nodes; n++) { | ||
240 | struct anode *unode; | 245 | struct anode *unode; |
241 | if ((unode = hpfs_map_anode(s, le32_to_cpu(ranode->u.internal[n].down), &bh1))) { | 246 | if ((unode = hpfs_map_anode(s, le32_to_cpu(ranode->u.internal[n].down), &bh1))) { |
242 | unode->up = cpu_to_le32(ra); | 247 | unode->up = cpu_to_le32(ra); |
243 | unode->btree.fnode_parent = 0; | 248 | unode->btree.flags &= ~BP_fnode_parent; |
244 | mark_buffer_dirty(bh1); | 249 | mark_buffer_dirty(bh1); |
245 | brelse(bh1); | 250 | brelse(bh1); |
246 | } | 251 | } |
247 | } | 252 | } |
248 | btree->internal = 1; | 253 | btree->flags |= BP_internal; |
249 | btree->n_free_nodes = fnod ? 10 : 58; | 254 | btree->n_free_nodes = fnod ? 10 : 58; |
250 | btree->n_used_nodes = 2; | 255 | btree->n_used_nodes = 2; |
251 | btree->first_free = cpu_to_le16((char *)&btree->u.internal[2] - (char *)btree); | 256 | btree->first_free = cpu_to_le16((char *)&btree->u.internal[2] - (char *)btree); |
@@ -278,7 +283,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree) | |||
278 | int d1, d2; | 283 | int d1, d2; |
279 | go_down: | 284 | go_down: |
280 | d2 = 0; | 285 | d2 = 0; |
281 | while (btree1->internal) { | 286 | while (bp_internal(btree1)) { |
282 | ano = le32_to_cpu(btree1->u.internal[pos].down); | 287 | ano = le32_to_cpu(btree1->u.internal[pos].down); |
283 | if (level) brelse(bh); | 288 | if (level) brelse(bh); |
284 | if (hpfs_sb(s)->sb_chk) | 289 | if (hpfs_sb(s)->sb_chk) |
@@ -412,13 +417,13 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs) | |||
412 | btree->n_free_nodes = 8; | 417 | btree->n_free_nodes = 8; |
413 | btree->n_used_nodes = 0; | 418 | btree->n_used_nodes = 0; |
414 | btree->first_free = cpu_to_le16(8); | 419 | btree->first_free = cpu_to_le16(8); |
415 | btree->internal = 0; | 420 | btree->flags &= ~BP_internal; |
416 | mark_buffer_dirty(bh); | 421 | mark_buffer_dirty(bh); |
417 | } else hpfs_free_sectors(s, f, 1); | 422 | } else hpfs_free_sectors(s, f, 1); |
418 | brelse(bh); | 423 | brelse(bh); |
419 | return; | 424 | return; |
420 | } | 425 | } |
421 | while (btree->internal) { | 426 | while (bp_internal(btree)) { |
422 | nodes = btree->n_used_nodes + btree->n_free_nodes; | 427 | nodes = btree->n_used_nodes + btree->n_free_nodes; |
423 | for (i = 0; i < btree->n_used_nodes; i++) | 428 | for (i = 0; i < btree->n_used_nodes; i++) |
424 | if (le32_to_cpu(btree->u.internal[i].file_secno) >= secs) goto f; | 429 | if (le32_to_cpu(btree->u.internal[i].file_secno) >= secs) goto f; |
@@ -479,13 +484,13 @@ void hpfs_remove_fnode(struct super_block *s, fnode_secno fno) | |||
479 | struct extended_attribute *ea; | 484 | struct extended_attribute *ea; |
480 | struct extended_attribute *ea_end; | 485 | struct extended_attribute *ea_end; |
481 | if (!(fnode = hpfs_map_fnode(s, fno, &bh))) return; | 486 | if (!(fnode = hpfs_map_fnode(s, fno, &bh))) return; |
482 | if (!fnode->dirflag) hpfs_remove_btree(s, &fnode->btree); | 487 | if (!fnode_is_dir(fnode)) hpfs_remove_btree(s, &fnode->btree); |
483 | else hpfs_remove_dtree(s, le32_to_cpu(fnode->u.external[0].disk_secno)); | 488 | else hpfs_remove_dtree(s, le32_to_cpu(fnode->u.external[0].disk_secno)); |
484 | ea_end = fnode_end_ea(fnode); | 489 | ea_end = fnode_end_ea(fnode); |
485 | for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) | 490 | for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) |
486 | if (ea->indirect) | 491 | if (ea_indirect(ea)) |
487 | hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea)); | 492 | hpfs_ea_remove(s, ea_sec(ea), ea_in_anode(ea), ea_len(ea)); |
488 | hpfs_ea_ext_remove(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l)); | 493 | hpfs_ea_ext_remove(s, le32_to_cpu(fnode->ea_secno), fnode_in_anode(fnode), le32_to_cpu(fnode->ea_size_l)); |
489 | brelse(bh); | 494 | brelse(bh); |
490 | hpfs_free_sectors(s, fno, 1); | 495 | hpfs_free_sectors(s, fno, 1); |
491 | } | 496 | } |
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 2fa0089a02a8..b8472f803f4e 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c | |||
@@ -87,7 +87,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
87 | ret = -EIOERROR; | 87 | ret = -EIOERROR; |
88 | goto out; | 88 | goto out; |
89 | } | 89 | } |
90 | if (!fno->dirflag) { | 90 | if (!fnode_is_dir(fno)) { |
91 | e = 1; | 91 | e = 1; |
92 | hpfs_error(inode->i_sb, "not a directory, fnode %08lx", | 92 | hpfs_error(inode->i_sb, "not a directory, fnode %08lx", |
93 | (unsigned long)inode->i_ino); | 93 | (unsigned long)inode->i_ino); |
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c index 1e0e2ac30fd3..3228c524ebe5 100644 --- a/fs/hpfs/dnode.c +++ b/fs/hpfs/dnode.c | |||
@@ -153,7 +153,7 @@ static void set_last_pointer(struct super_block *s, struct dnode *d, dnode_secno | |||
153 | } | 153 | } |
154 | de->length = cpu_to_le16(36); | 154 | de->length = cpu_to_le16(36); |
155 | de->down = 1; | 155 | de->down = 1; |
156 | *(dnode_secno *)((char *)de + 32) = cpu_to_le32(ptr); | 156 | *(__le32 *)((char *)de + 32) = cpu_to_le32(ptr); |
157 | } | 157 | } |
158 | } | 158 | } |
159 | 159 | ||
@@ -177,7 +177,7 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d, | |||
177 | memmove((char *)de + d_size, de, (char *)de_end - (char *)de); | 177 | memmove((char *)de + d_size, de, (char *)de_end - (char *)de); |
178 | memset(de, 0, d_size); | 178 | memset(de, 0, d_size); |
179 | if (down_ptr) { | 179 | if (down_ptr) { |
180 | *(dnode_secno *)((char *)de + d_size - 4) = cpu_to_le32(down_ptr); | 180 | *(__le32 *)((char *)de + d_size - 4) = cpu_to_le32(down_ptr); |
181 | de->down = 1; | 181 | de->down = 1; |
182 | } | 182 | } |
183 | de->length = cpu_to_le16(d_size); | 183 | de->length = cpu_to_le16(d_size); |
@@ -656,7 +656,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) | |||
656 | del->down = 0; | 656 | del->down = 0; |
657 | d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) - 4); | 657 | d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) - 4); |
658 | } else if (down) | 658 | } else if (down) |
659 | *(dnode_secno *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down); | 659 | *(__le32 *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down); |
660 | } else goto endm; | 660 | } else goto endm; |
661 | if (!(de_cp = kmalloc(le16_to_cpu(de_prev->length), GFP_NOFS))) { | 661 | if (!(de_cp = kmalloc(le16_to_cpu(de_prev->length), GFP_NOFS))) { |
662 | printk("HPFS: out of memory for dtree balancing\n"); | 662 | printk("HPFS: out of memory for dtree balancing\n"); |
@@ -672,7 +672,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) | |||
672 | de_prev->down = 1; | 672 | de_prev->down = 1; |
673 | dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) + 4); | 673 | dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) + 4); |
674 | } | 674 | } |
675 | *(dnode_secno *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown); | 675 | *(__le32 *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown); |
676 | hpfs_mark_4buffers_dirty(&qbh); | 676 | hpfs_mark_4buffers_dirty(&qbh); |
677 | hpfs_brelse4(&qbh); | 677 | hpfs_brelse4(&qbh); |
678 | for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | (p - 1), 4); | 678 | for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | (p - 1), 4); |
@@ -1015,7 +1015,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno, | |||
1015 | kfree(name2); | 1015 | kfree(name2); |
1016 | return NULL; | 1016 | return NULL; |
1017 | } | 1017 | } |
1018 | if (!upf->dirflag) { | 1018 | if (!fnode_is_dir(upf)) { |
1019 | brelse(bh); | 1019 | brelse(bh); |
1020 | hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, le32_to_cpu(f->up)); | 1020 | hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, le32_to_cpu(f->up)); |
1021 | kfree(name2); | 1021 | kfree(name2); |
diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c index d8b84d113c89..bcaafcd2666a 100644 --- a/fs/hpfs/ea.c +++ b/fs/hpfs/ea.c | |||
@@ -23,15 +23,15 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len) | |||
23 | return; | 23 | return; |
24 | } | 24 | } |
25 | if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return; | 25 | if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return; |
26 | if (ea->indirect) { | 26 | if (ea_indirect(ea)) { |
27 | if (ea_valuelen(ea) != 8) { | 27 | if (ea_valuelen(ea) != 8) { |
28 | hpfs_error(s, "ea->indirect set while ea->valuelen!=8, %s %08x, pos %08x", | 28 | hpfs_error(s, "ea_indirect(ea) set while ea->valuelen!=8, %s %08x, pos %08x", |
29 | ano ? "anode" : "sectors", a, pos); | 29 | ano ? "anode" : "sectors", a, pos); |
30 | return; | 30 | return; |
31 | } | 31 | } |
32 | if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 9, ex+4)) | 32 | if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 9, ex+4)) |
33 | return; | 33 | return; |
34 | hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea)); | 34 | hpfs_ea_remove(s, ea_sec(ea), ea_in_anode(ea), ea_len(ea)); |
35 | } | 35 | } |
36 | pos += ea->namelen + ea_valuelen(ea) + 5; | 36 | pos += ea->namelen + ea_valuelen(ea) + 5; |
37 | } | 37 | } |
@@ -81,7 +81,7 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, | |||
81 | struct extended_attribute *ea_end = fnode_end_ea(fnode); | 81 | struct extended_attribute *ea_end = fnode_end_ea(fnode); |
82 | for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) | 82 | for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) |
83 | if (!strcmp(ea->name, key)) { | 83 | if (!strcmp(ea->name, key)) { |
84 | if (ea->indirect) | 84 | if (ea_indirect(ea)) |
85 | goto indirect; | 85 | goto indirect; |
86 | if (ea_valuelen(ea) >= size) | 86 | if (ea_valuelen(ea) >= size) |
87 | return -EINVAL; | 87 | return -EINVAL; |
@@ -91,7 +91,7 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, | |||
91 | } | 91 | } |
92 | a = le32_to_cpu(fnode->ea_secno); | 92 | a = le32_to_cpu(fnode->ea_secno); |
93 | len = le32_to_cpu(fnode->ea_size_l); | 93 | len = le32_to_cpu(fnode->ea_size_l); |
94 | ano = fnode->ea_anode; | 94 | ano = fnode_in_anode(fnode); |
95 | pos = 0; | 95 | pos = 0; |
96 | while (pos < len) { | 96 | while (pos < len) { |
97 | ea = (struct extended_attribute *)ex; | 97 | ea = (struct extended_attribute *)ex; |
@@ -101,10 +101,10 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, | |||
101 | return -EIO; | 101 | return -EIO; |
102 | } | 102 | } |
103 | if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return -EIO; | 103 | if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return -EIO; |
104 | if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea->indirect ? 8 : 0), ex + 4)) | 104 | if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea_indirect(ea) ? 8 : 0), ex + 4)) |
105 | return -EIO; | 105 | return -EIO; |
106 | if (!strcmp(ea->name, key)) { | 106 | if (!strcmp(ea->name, key)) { |
107 | if (ea->indirect) | 107 | if (ea_indirect(ea)) |
108 | goto indirect; | 108 | goto indirect; |
109 | if (ea_valuelen(ea) >= size) | 109 | if (ea_valuelen(ea) >= size) |
110 | return -EINVAL; | 110 | return -EINVAL; |
@@ -119,7 +119,7 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, | |||
119 | indirect: | 119 | indirect: |
120 | if (ea_len(ea) >= size) | 120 | if (ea_len(ea) >= size) |
121 | return -EINVAL; | 121 | return -EINVAL; |
122 | if (hpfs_ea_read(s, ea_sec(ea), ea->anode, 0, ea_len(ea), buf)) | 122 | if (hpfs_ea_read(s, ea_sec(ea), ea_in_anode(ea), 0, ea_len(ea), buf)) |
123 | return -EIO; | 123 | return -EIO; |
124 | buf[ea_len(ea)] = 0; | 124 | buf[ea_len(ea)] = 0; |
125 | return 0; | 125 | return 0; |
@@ -136,8 +136,8 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si | |||
136 | struct extended_attribute *ea_end = fnode_end_ea(fnode); | 136 | struct extended_attribute *ea_end = fnode_end_ea(fnode); |
137 | for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) | 137 | for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) |
138 | if (!strcmp(ea->name, key)) { | 138 | if (!strcmp(ea->name, key)) { |
139 | if (ea->indirect) | 139 | if (ea_indirect(ea)) |
140 | return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea)); | 140 | return get_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), *size = ea_len(ea)); |
141 | if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) { | 141 | if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) { |
142 | printk("HPFS: out of memory for EA\n"); | 142 | printk("HPFS: out of memory for EA\n"); |
143 | return NULL; | 143 | return NULL; |
@@ -148,7 +148,7 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si | |||
148 | } | 148 | } |
149 | a = le32_to_cpu(fnode->ea_secno); | 149 | a = le32_to_cpu(fnode->ea_secno); |
150 | len = le32_to_cpu(fnode->ea_size_l); | 150 | len = le32_to_cpu(fnode->ea_size_l); |
151 | ano = fnode->ea_anode; | 151 | ano = fnode_in_anode(fnode); |
152 | pos = 0; | 152 | pos = 0; |
153 | while (pos < len) { | 153 | while (pos < len) { |
154 | char ex[4 + 255 + 1 + 8]; | 154 | char ex[4 + 255 + 1 + 8]; |
@@ -159,11 +159,11 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si | |||
159 | return NULL; | 159 | return NULL; |
160 | } | 160 | } |
161 | if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return NULL; | 161 | if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return NULL; |
162 | if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea->indirect ? 8 : 0), ex + 4)) | 162 | if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea_indirect(ea) ? 8 : 0), ex + 4)) |
163 | return NULL; | 163 | return NULL; |
164 | if (!strcmp(ea->name, key)) { | 164 | if (!strcmp(ea->name, key)) { |
165 | if (ea->indirect) | 165 | if (ea_indirect(ea)) |
166 | return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea)); | 166 | return get_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), *size = ea_len(ea)); |
167 | if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) { | 167 | if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) { |
168 | printk("HPFS: out of memory for EA\n"); | 168 | printk("HPFS: out of memory for EA\n"); |
169 | return NULL; | 169 | return NULL; |
@@ -199,9 +199,9 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, | |||
199 | struct extended_attribute *ea_end = fnode_end_ea(fnode); | 199 | struct extended_attribute *ea_end = fnode_end_ea(fnode); |
200 | for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) | 200 | for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) |
201 | if (!strcmp(ea->name, key)) { | 201 | if (!strcmp(ea->name, key)) { |
202 | if (ea->indirect) { | 202 | if (ea_indirect(ea)) { |
203 | if (ea_len(ea) == size) | 203 | if (ea_len(ea) == size) |
204 | set_indirect_ea(s, ea->anode, ea_sec(ea), data, size); | 204 | set_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), data, size); |
205 | } else if (ea_valuelen(ea) == size) { | 205 | } else if (ea_valuelen(ea) == size) { |
206 | memcpy(ea_data(ea), data, size); | 206 | memcpy(ea_data(ea), data, size); |
207 | } | 207 | } |
@@ -209,7 +209,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, | |||
209 | } | 209 | } |
210 | a = le32_to_cpu(fnode->ea_secno); | 210 | a = le32_to_cpu(fnode->ea_secno); |
211 | len = le32_to_cpu(fnode->ea_size_l); | 211 | len = le32_to_cpu(fnode->ea_size_l); |
212 | ano = fnode->ea_anode; | 212 | ano = fnode_in_anode(fnode); |
213 | pos = 0; | 213 | pos = 0; |
214 | while (pos < len) { | 214 | while (pos < len) { |
215 | char ex[4 + 255 + 1 + 8]; | 215 | char ex[4 + 255 + 1 + 8]; |
@@ -220,12 +220,12 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, | |||
220 | return; | 220 | return; |
221 | } | 221 | } |
222 | if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return; | 222 | if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return; |
223 | if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea->indirect ? 8 : 0), ex + 4)) | 223 | if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea_indirect(ea) ? 8 : 0), ex + 4)) |
224 | return; | 224 | return; |
225 | if (!strcmp(ea->name, key)) { | 225 | if (!strcmp(ea->name, key)) { |
226 | if (ea->indirect) { | 226 | if (ea_indirect(ea)) { |
227 | if (ea_len(ea) == size) | 227 | if (ea_len(ea) == size) |
228 | set_indirect_ea(s, ea->anode, ea_sec(ea), data, size); | 228 | set_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), data, size); |
229 | } | 229 | } |
230 | else { | 230 | else { |
231 | if (ea_valuelen(ea) == size) | 231 | if (ea_valuelen(ea) == size) |
@@ -246,7 +246,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, | |||
246 | if (le16_to_cpu(fnode->ea_offs) < 0xc4 || le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200) { | 246 | if (le16_to_cpu(fnode->ea_offs) < 0xc4 || le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200) { |
247 | hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x", | 247 | hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x", |
248 | (unsigned long)inode->i_ino, | 248 | (unsigned long)inode->i_ino, |
249 | le32_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s)); | 249 | le16_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s)); |
250 | return; | 250 | return; |
251 | } | 251 | } |
252 | if ((le16_to_cpu(fnode->ea_size_s) || !le32_to_cpu(fnode->ea_size_l)) && | 252 | if ((le16_to_cpu(fnode->ea_size_s) || !le32_to_cpu(fnode->ea_size_l)) && |
@@ -276,7 +276,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, | |||
276 | fnode->ea_size_l = cpu_to_le32(le16_to_cpu(fnode->ea_size_s)); | 276 | fnode->ea_size_l = cpu_to_le32(le16_to_cpu(fnode->ea_size_s)); |
277 | fnode->ea_size_s = cpu_to_le16(0); | 277 | fnode->ea_size_s = cpu_to_le16(0); |
278 | fnode->ea_secno = cpu_to_le32(n); | 278 | fnode->ea_secno = cpu_to_le32(n); |
279 | fnode->ea_anode = cpu_to_le32(0); | 279 | fnode->flags &= ~FNODE_anode; |
280 | mark_buffer_dirty(bh); | 280 | mark_buffer_dirty(bh); |
281 | brelse(bh); | 281 | brelse(bh); |
282 | } | 282 | } |
@@ -288,9 +288,9 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, | |||
288 | secno q = hpfs_alloc_sector(s, fno, 1, 0); | 288 | secno q = hpfs_alloc_sector(s, fno, 1, 0); |
289 | if (!q) goto bail; | 289 | if (!q) goto bail; |
290 | fnode->ea_secno = cpu_to_le32(q); | 290 | fnode->ea_secno = cpu_to_le32(q); |
291 | fnode->ea_anode = 0; | 291 | fnode->flags &= ~FNODE_anode; |
292 | len++; | 292 | len++; |
293 | } else if (!fnode->ea_anode) { | 293 | } else if (!fnode_in_anode(fnode)) { |
294 | if (hpfs_alloc_if_possible(s, le32_to_cpu(fnode->ea_secno) + len)) { | 294 | if (hpfs_alloc_if_possible(s, le32_to_cpu(fnode->ea_secno) + len)) { |
295 | len++; | 295 | len++; |
296 | } else { | 296 | } else { |
@@ -310,7 +310,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, | |||
310 | anode->u.external[0].length = cpu_to_le32(len); | 310 | anode->u.external[0].length = cpu_to_le32(len); |
311 | mark_buffer_dirty(bh); | 311 | mark_buffer_dirty(bh); |
312 | brelse(bh); | 312 | brelse(bh); |
313 | fnode->ea_anode = 1; | 313 | fnode->flags |= FNODE_anode; |
314 | fnode->ea_secno = cpu_to_le32(a_s);*/ | 314 | fnode->ea_secno = cpu_to_le32(a_s);*/ |
315 | secno new_sec; | 315 | secno new_sec; |
316 | int i; | 316 | int i; |
@@ -338,7 +338,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, | |||
338 | len = (pos + 511) >> 9; | 338 | len = (pos + 511) >> 9; |
339 | } | 339 | } |
340 | } | 340 | } |
341 | if (fnode->ea_anode) { | 341 | if (fnode_in_anode(fnode)) { |
342 | if (hpfs_add_sector_to_btree(s, le32_to_cpu(fnode->ea_secno), | 342 | if (hpfs_add_sector_to_btree(s, le32_to_cpu(fnode->ea_secno), |
343 | 0, len) != -1) { | 343 | 0, len) != -1) { |
344 | len++; | 344 | len++; |
@@ -351,16 +351,16 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, | |||
351 | h[1] = strlen(key); | 351 | h[1] = strlen(key); |
352 | h[2] = size & 0xff; | 352 | h[2] = size & 0xff; |
353 | h[3] = size >> 8; | 353 | h[3] = size >> 8; |
354 | if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l), 4, h)) goto bail; | 354 | if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode_in_anode(fnode), le32_to_cpu(fnode->ea_size_l), 4, h)) goto bail; |
355 | if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 4, h[1] + 1, key)) goto bail; | 355 | if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode_in_anode(fnode), le32_to_cpu(fnode->ea_size_l) + 4, h[1] + 1, key)) goto bail; |
356 | if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 5 + h[1], size, data)) goto bail; | 356 | if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode_in_anode(fnode), le32_to_cpu(fnode->ea_size_l) + 5 + h[1], size, data)) goto bail; |
357 | fnode->ea_size_l = cpu_to_le32(pos); | 357 | fnode->ea_size_l = cpu_to_le32(pos); |
358 | ret: | 358 | ret: |
359 | hpfs_i(inode)->i_ea_size += 5 + strlen(key) + size; | 359 | hpfs_i(inode)->i_ea_size += 5 + strlen(key) + size; |
360 | return; | 360 | return; |
361 | bail: | 361 | bail: |
362 | if (le32_to_cpu(fnode->ea_secno)) | 362 | if (le32_to_cpu(fnode->ea_secno)) |
363 | if (fnode->ea_anode) hpfs_truncate_btree(s, le32_to_cpu(fnode->ea_secno), 1, (le32_to_cpu(fnode->ea_size_l) + 511) >> 9); | 363 | if (fnode_in_anode(fnode)) hpfs_truncate_btree(s, le32_to_cpu(fnode->ea_secno), 1, (le32_to_cpu(fnode->ea_size_l) + 511) >> 9); |
364 | else hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno) + ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9), len - ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9)); | 364 | else hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno) + ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9), len - ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9)); |
365 | else fnode->ea_secno = fnode->ea_size_l = cpu_to_le32(0); | 365 | else fnode->ea_secno = fnode->ea_size_l = cpu_to_le32(0); |
366 | } | 366 | } |
diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index 8b0650aae328..cce025aff1b1 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h | |||
@@ -51,11 +51,11 @@ struct hpfs_boot_block | |||
51 | u8 n_rootdir_entries[2]; | 51 | u8 n_rootdir_entries[2]; |
52 | u8 n_sectors_s[2]; | 52 | u8 n_sectors_s[2]; |
53 | u8 media_byte; | 53 | u8 media_byte; |
54 | u16 sectors_per_fat; | 54 | __le16 sectors_per_fat; |
55 | u16 sectors_per_track; | 55 | __le16 sectors_per_track; |
56 | u16 heads_per_cyl; | 56 | __le16 heads_per_cyl; |
57 | u32 n_hidden_sectors; | 57 | __le32 n_hidden_sectors; |
58 | u32 n_sectors_l; /* size of partition */ | 58 | __le32 n_sectors_l; /* size of partition */ |
59 | u8 drive_number; | 59 | u8 drive_number; |
60 | u8 mbz; | 60 | u8 mbz; |
61 | u8 sig_28h; /* 28h */ | 61 | u8 sig_28h; /* 28h */ |
@@ -63,7 +63,7 @@ struct hpfs_boot_block | |||
63 | u8 vol_label[11]; | 63 | u8 vol_label[11]; |
64 | u8 sig_hpfs[8]; /* "HPFS " */ | 64 | u8 sig_hpfs[8]; /* "HPFS " */ |
65 | u8 pad[448]; | 65 | u8 pad[448]; |
66 | u16 magic; /* aa55 */ | 66 | __le16 magic; /* aa55 */ |
67 | }; | 67 | }; |
68 | 68 | ||
69 | 69 | ||
@@ -75,28 +75,28 @@ struct hpfs_boot_block | |||
75 | 75 | ||
76 | struct hpfs_super_block | 76 | struct hpfs_super_block |
77 | { | 77 | { |
78 | u32 magic; /* f995 e849 */ | 78 | __le32 magic; /* f995 e849 */ |
79 | u32 magic1; /* fa53 e9c5, more magic? */ | 79 | __le32 magic1; /* fa53 e9c5, more magic? */ |
80 | u8 version; /* version of a filesystem usually 2 */ | 80 | u8 version; /* version of a filesystem usually 2 */ |
81 | u8 funcversion; /* functional version - oldest version | 81 | u8 funcversion; /* functional version - oldest version |
82 | of filesystem that can understand | 82 | of filesystem that can understand |
83 | this disk */ | 83 | this disk */ |
84 | u16 zero; /* 0 */ | 84 | __le16 zero; /* 0 */ |
85 | fnode_secno root; /* fnode of root directory */ | 85 | __le32 root; /* fnode of root directory */ |
86 | secno n_sectors; /* size of filesystem */ | 86 | __le32 n_sectors; /* size of filesystem */ |
87 | u32 n_badblocks; /* number of bad blocks */ | 87 | __le32 n_badblocks; /* number of bad blocks */ |
88 | secno bitmaps; /* pointers to free space bit maps */ | 88 | __le32 bitmaps; /* pointers to free space bit maps */ |
89 | u32 zero1; /* 0 */ | 89 | __le32 zero1; /* 0 */ |
90 | secno badblocks; /* bad block list */ | 90 | __le32 badblocks; /* bad block list */ |
91 | u32 zero3; /* 0 */ | 91 | __le32 zero3; /* 0 */ |
92 | time32_t last_chkdsk; /* date last checked, 0 if never */ | 92 | __le32 last_chkdsk; /* date last checked, 0 if never */ |
93 | time32_t last_optimize; /* date last optimized, 0 if never */ | 93 | __le32 last_optimize; /* date last optimized, 0 if never */ |
94 | secno n_dir_band; /* number of sectors in dir band */ | 94 | __le32 n_dir_band; /* number of sectors in dir band */ |
95 | secno dir_band_start; /* first sector in dir band */ | 95 | __le32 dir_band_start; /* first sector in dir band */ |
96 | secno dir_band_end; /* last sector in dir band */ | 96 | __le32 dir_band_end; /* last sector in dir band */ |
97 | secno dir_band_bitmap; /* free space map, 1 dnode per bit */ | 97 | __le32 dir_band_bitmap; /* free space map, 1 dnode per bit */ |
98 | u8 volume_name[32]; /* not used */ | 98 | u8 volume_name[32]; /* not used */ |
99 | secno user_id_table; /* 8 preallocated sectors - user id */ | 99 | __le32 user_id_table; /* 8 preallocated sectors - user id */ |
100 | u32 zero6[103]; /* 0 */ | 100 | u32 zero6[103]; /* 0 */ |
101 | }; | 101 | }; |
102 | 102 | ||
@@ -109,8 +109,8 @@ struct hpfs_super_block | |||
109 | 109 | ||
110 | struct hpfs_spare_block | 110 | struct hpfs_spare_block |
111 | { | 111 | { |
112 | u32 magic; /* f991 1849 */ | 112 | __le32 magic; /* f991 1849 */ |
113 | u32 magic1; /* fa52 29c5, more magic? */ | 113 | __le32 magic1; /* fa52 29c5, more magic? */ |
114 | 114 | ||
115 | #ifdef __LITTLE_ENDIAN | 115 | #ifdef __LITTLE_ENDIAN |
116 | u8 dirty: 1; /* 0 clean, 1 "improperly stopped" */ | 116 | u8 dirty: 1; /* 0 clean, 1 "improperly stopped" */ |
@@ -153,21 +153,21 @@ struct hpfs_spare_block | |||
153 | u8 mm_contlgulty; | 153 | u8 mm_contlgulty; |
154 | u8 unused; | 154 | u8 unused; |
155 | 155 | ||
156 | secno hotfix_map; /* info about remapped bad sectors */ | 156 | __le32 hotfix_map; /* info about remapped bad sectors */ |
157 | u32 n_spares_used; /* number of hotfixes */ | 157 | __le32 n_spares_used; /* number of hotfixes */ |
158 | u32 n_spares; /* number of spares in hotfix map */ | 158 | __le32 n_spares; /* number of spares in hotfix map */ |
159 | u32 n_dnode_spares_free; /* spare dnodes unused */ | 159 | __le32 n_dnode_spares_free; /* spare dnodes unused */ |
160 | u32 n_dnode_spares; /* length of spare_dnodes[] list, | 160 | __le32 n_dnode_spares; /* length of spare_dnodes[] list, |
161 | follows in this block*/ | 161 | follows in this block*/ |
162 | secno code_page_dir; /* code page directory block */ | 162 | __le32 code_page_dir; /* code page directory block */ |
163 | u32 n_code_pages; /* number of code pages */ | 163 | __le32 n_code_pages; /* number of code pages */ |
164 | u32 super_crc; /* on HPFS386 and LAN Server this is | 164 | __le32 super_crc; /* on HPFS386 and LAN Server this is |
165 | checksum of superblock, on normal | 165 | checksum of superblock, on normal |
166 | OS/2 unused */ | 166 | OS/2 unused */ |
167 | u32 spare_crc; /* on HPFS386 checksum of spareblock */ | 167 | __le32 spare_crc; /* on HPFS386 checksum of spareblock */ |
168 | u32 zero1[15]; /* unused */ | 168 | __le32 zero1[15]; /* unused */ |
169 | dnode_secno spare_dnodes[100]; /* emergency free dnode list */ | 169 | __le32 spare_dnodes[100]; /* emergency free dnode list */ |
170 | u32 zero2[1]; /* room for more? */ | 170 | __le32 zero2[1]; /* room for more? */ |
171 | }; | 171 | }; |
172 | 172 | ||
173 | /* The bad block list is 4 sectors long. The first word must be zero, | 173 | /* The bad block list is 4 sectors long. The first word must be zero, |
@@ -202,18 +202,18 @@ struct hpfs_spare_block | |||
202 | 202 | ||
203 | struct code_page_directory | 203 | struct code_page_directory |
204 | { | 204 | { |
205 | u32 magic; /* 4945 21f7 */ | 205 | __le32 magic; /* 4945 21f7 */ |
206 | u32 n_code_pages; /* number of pointers following */ | 206 | __le32 n_code_pages; /* number of pointers following */ |
207 | u32 zero1[2]; | 207 | __le32 zero1[2]; |
208 | struct { | 208 | struct { |
209 | u16 ix; /* index */ | 209 | __le16 ix; /* index */ |
210 | u16 code_page_number; /* code page number */ | 210 | __le16 code_page_number; /* code page number */ |
211 | u32 bounds; /* matches corresponding word | 211 | __le32 bounds; /* matches corresponding word |
212 | in data block */ | 212 | in data block */ |
213 | secno code_page_data; /* sector number of a code_page_data | 213 | __le32 code_page_data; /* sector number of a code_page_data |
214 | containing c.p. array */ | 214 | containing c.p. array */ |
215 | u16 index; /* index in c.p. array in that sector*/ | 215 | __le16 index; /* index in c.p. array in that sector*/ |
216 | u16 unknown; /* some unknown value; usually 0; | 216 | __le16 unknown; /* some unknown value; usually 0; |
217 | 2 in Japanese version */ | 217 | 2 in Japanese version */ |
218 | } array[31]; /* unknown length */ | 218 | } array[31]; /* unknown length */ |
219 | }; | 219 | }; |
@@ -224,19 +224,19 @@ struct code_page_directory | |||
224 | 224 | ||
225 | struct code_page_data | 225 | struct code_page_data |
226 | { | 226 | { |
227 | u32 magic; /* 8945 21f7 */ | 227 | __le32 magic; /* 8945 21f7 */ |
228 | u32 n_used; /* # elements used in c_p_data[] */ | 228 | __le32 n_used; /* # elements used in c_p_data[] */ |
229 | u32 bounds[3]; /* looks a bit like | 229 | __le32 bounds[3]; /* looks a bit like |
230 | (beg1,end1), (beg2,end2) | 230 | (beg1,end1), (beg2,end2) |
231 | one byte each */ | 231 | one byte each */ |
232 | u16 offs[3]; /* offsets from start of sector | 232 | __le16 offs[3]; /* offsets from start of sector |
233 | to start of c_p_data[ix] */ | 233 | to start of c_p_data[ix] */ |
234 | struct { | 234 | struct { |
235 | u16 ix; /* index */ | 235 | __le16 ix; /* index */ |
236 | u16 code_page_number; /* code page number */ | 236 | __le16 code_page_number; /* code page number */ |
237 | u16 unknown; /* the same as in cp directory */ | 237 | __le16 unknown; /* the same as in cp directory */ |
238 | u8 map[128]; /* upcase table for chars 80..ff */ | 238 | u8 map[128]; /* upcase table for chars 80..ff */ |
239 | u16 zero2; | 239 | __le16 zero2; |
240 | } code_page[3]; | 240 | } code_page[3]; |
241 | u8 incognita[78]; | 241 | u8 incognita[78]; |
242 | }; | 242 | }; |
@@ -278,8 +278,8 @@ struct code_page_data | |||
278 | #define DNODE_MAGIC 0x77e40aae | 278 | #define DNODE_MAGIC 0x77e40aae |
279 | 279 | ||
280 | struct dnode { | 280 | struct dnode { |
281 | u32 magic; /* 77e4 0aae */ | 281 | __le32 magic; /* 77e4 0aae */ |
282 | u32 first_free; /* offset from start of dnode to | 282 | __le32 first_free; /* offset from start of dnode to |
283 | first free dir entry */ | 283 | first free dir entry */ |
284 | #ifdef __LITTLE_ENDIAN | 284 | #ifdef __LITTLE_ENDIAN |
285 | u8 root_dnode: 1; /* Is it root dnode? */ | 285 | u8 root_dnode: 1; /* Is it root dnode? */ |
@@ -293,14 +293,14 @@ struct dnode { | |||
293 | u8 root_dnode: 1; /* Is it root dnode? */ | 293 | u8 root_dnode: 1; /* Is it root dnode? */ |
294 | #endif | 294 | #endif |
295 | u8 increment_me2[3]; | 295 | u8 increment_me2[3]; |
296 | secno up; /* (root dnode) directory's fnode | 296 | __le32 up; /* (root dnode) directory's fnode |
297 | (nonroot) parent dnode */ | 297 | (nonroot) parent dnode */ |
298 | dnode_secno self; /* pointer to this dnode */ | 298 | __le32 self; /* pointer to this dnode */ |
299 | u8 dirent[2028]; /* one or more dirents */ | 299 | u8 dirent[2028]; /* one or more dirents */ |
300 | }; | 300 | }; |
301 | 301 | ||
302 | struct hpfs_dirent { | 302 | struct hpfs_dirent { |
303 | u16 length; /* offset to next dirent */ | 303 | __le16 length; /* offset to next dirent */ |
304 | 304 | ||
305 | #ifdef __LITTLE_ENDIAN | 305 | #ifdef __LITTLE_ENDIAN |
306 | u8 first: 1; /* set on phony ^A^A (".") entry */ | 306 | u8 first: 1; /* set on phony ^A^A (".") entry */ |
@@ -346,12 +346,12 @@ struct hpfs_dirent { | |||
346 | u8 read_only: 1; /* dos attrib */ | 346 | u8 read_only: 1; /* dos attrib */ |
347 | #endif | 347 | #endif |
348 | 348 | ||
349 | fnode_secno fnode; /* fnode giving allocation info */ | 349 | __le32 fnode; /* fnode giving allocation info */ |
350 | time32_t write_date; /* mtime */ | 350 | __le32 write_date; /* mtime */ |
351 | u32 file_size; /* file length, bytes */ | 351 | __le32 file_size; /* file length, bytes */ |
352 | time32_t read_date; /* atime */ | 352 | __le32 read_date; /* atime */ |
353 | time32_t creation_date; /* ctime */ | 353 | __le32 creation_date; /* ctime */ |
354 | u32 ea_size; /* total EA length, bytes */ | 354 | __le32 ea_size; /* total EA length, bytes */ |
355 | u8 no_of_acls; /* number of ACL's (low 3 bits) */ | 355 | u8 no_of_acls; /* number of ACL's (low 3 bits) */ |
356 | u8 ix; /* code page index (of filename), see | 356 | u8 ix; /* code page index (of filename), see |
357 | struct code_page_data */ | 357 | struct code_page_data */ |
@@ -375,50 +375,36 @@ struct hpfs_dirent { | |||
375 | 375 | ||
376 | struct bplus_leaf_node | 376 | struct bplus_leaf_node |
377 | { | 377 | { |
378 | u32 file_secno; /* first file sector in extent */ | 378 | __le32 file_secno; /* first file sector in extent */ |
379 | u32 length; /* length, sectors */ | 379 | __le32 length; /* length, sectors */ |
380 | secno disk_secno; /* first corresponding disk sector */ | 380 | __le32 disk_secno; /* first corresponding disk sector */ |
381 | }; | 381 | }; |
382 | 382 | ||
383 | struct bplus_internal_node | 383 | struct bplus_internal_node |
384 | { | 384 | { |
385 | u32 file_secno; /* subtree maps sectors < this */ | 385 | __le32 file_secno; /* subtree maps sectors < this */ |
386 | anode_secno down; /* pointer to subtree */ | 386 | __le32 down; /* pointer to subtree */ |
387 | }; | 387 | }; |
388 | 388 | ||
389 | enum { | ||
390 | BP_hbff = 1, | ||
391 | BP_fnode_parent = 0x20, | ||
392 | BP_binary_search = 0x40, | ||
393 | BP_internal = 0x80 | ||
394 | }; | ||
389 | struct bplus_header | 395 | struct bplus_header |
390 | { | 396 | { |
391 | #ifdef __LITTLE_ENDIAN | 397 | u8 flags; /* bit 0 - high bit of first free entry offset |
392 | u8 hbff: 1; /* high bit of first free entry offset */ | 398 | bit 5 - we're pointed to by an fnode, |
393 | u8 flag1234: 4; | ||
394 | u8 fnode_parent: 1; /* ? we're pointed to by an fnode, | ||
395 | the data btree or some ea or the | ||
396 | main ea bootage pointer ea_secno */ | ||
397 | /* also can get set in fnodes, which | ||
398 | may be a chkdsk glitch or may mean | ||
399 | this bit is irrelevant in fnodes, | ||
400 | or this interpretation is all wet */ | ||
401 | u8 binary_search: 1; /* suggest binary search (unused) */ | ||
402 | u8 internal: 1; /* 1 -> (internal) tree of anodes | ||
403 | 0 -> (leaf) list of extents */ | ||
404 | #else | ||
405 | u8 internal: 1; /* 1 -> (internal) tree of anodes | ||
406 | 0 -> (leaf) list of extents */ | ||
407 | u8 binary_search: 1; /* suggest binary search (unused) */ | ||
408 | u8 fnode_parent: 1; /* ? we're pointed to by an fnode, | ||
409 | the data btree or some ea or the | 399 | the data btree or some ea or the |
410 | main ea bootage pointer ea_secno */ | 400 | main ea bootage pointer ea_secno |
411 | /* also can get set in fnodes, which | 401 | bit 6 - suggest binary search (unused) |
412 | may be a chkdsk glitch or may mean | 402 | bit 7 - 1 -> (internal) tree of anodes |
413 | this bit is irrelevant in fnodes, | 403 | 0 -> (leaf) list of extents */ |
414 | or this interpretation is all wet */ | ||
415 | u8 flag1234: 4; | ||
416 | u8 hbff: 1; /* high bit of first free entry offset */ | ||
417 | #endif | ||
418 | u8 fill[3]; | 404 | u8 fill[3]; |
419 | u8 n_free_nodes; /* free nodes in following array */ | 405 | u8 n_free_nodes; /* free nodes in following array */ |
420 | u8 n_used_nodes; /* used nodes in following array */ | 406 | u8 n_used_nodes; /* used nodes in following array */ |
421 | u16 first_free; /* offset from start of header to | 407 | __le16 first_free; /* offset from start of header to |
422 | first free node in array */ | 408 | first free node in array */ |
423 | union { | 409 | union { |
424 | struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving | 410 | struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving |
@@ -428,6 +414,16 @@ struct bplus_header | |||
428 | } u; | 414 | } u; |
429 | }; | 415 | }; |
430 | 416 | ||
417 | static inline bool bp_internal(struct bplus_header *bp) | ||
418 | { | ||
419 | return bp->flags & BP_internal; | ||
420 | } | ||
421 | |||
422 | static inline bool bp_fnode_parent(struct bplus_header *bp) | ||
423 | { | ||
424 | return bp->flags & BP_fnode_parent; | ||
425 | } | ||
426 | |||
431 | /* fnode: root of allocation b+ tree, and EA's */ | 427 | /* fnode: root of allocation b+ tree, and EA's */ |
432 | 428 | ||
433 | /* Every file and every directory has one fnode, pointed to by the directory | 429 | /* Every file and every directory has one fnode, pointed to by the directory |
@@ -436,62 +432,56 @@ struct bplus_header | |||
436 | 432 | ||
437 | #define FNODE_MAGIC 0xf7e40aae | 433 | #define FNODE_MAGIC 0xf7e40aae |
438 | 434 | ||
435 | enum {FNODE_anode = cpu_to_le16(2), FNODE_dir = cpu_to_le16(256)}; | ||
439 | struct fnode | 436 | struct fnode |
440 | { | 437 | { |
441 | u32 magic; /* f7e4 0aae */ | 438 | __le32 magic; /* f7e4 0aae */ |
442 | u32 zero1[2]; /* read history */ | 439 | __le32 zero1[2]; /* read history */ |
443 | u8 len, name[15]; /* true length, truncated name */ | 440 | u8 len, name[15]; /* true length, truncated name */ |
444 | fnode_secno up; /* pointer to file's directory fnode */ | 441 | __le32 up; /* pointer to file's directory fnode */ |
445 | secno acl_size_l; | 442 | __le32 acl_size_l; |
446 | secno acl_secno; | 443 | __le32 acl_secno; |
447 | u16 acl_size_s; | 444 | __le16 acl_size_s; |
448 | u8 acl_anode; | 445 | u8 acl_anode; |
449 | u8 zero2; /* history bit count */ | 446 | u8 zero2; /* history bit count */ |
450 | u32 ea_size_l; /* length of disk-resident ea's */ | 447 | __le32 ea_size_l; /* length of disk-resident ea's */ |
451 | secno ea_secno; /* first sector of disk-resident ea's*/ | 448 | __le32 ea_secno; /* first sector of disk-resident ea's*/ |
452 | u16 ea_size_s; /* length of fnode-resident ea's */ | 449 | __le16 ea_size_s; /* length of fnode-resident ea's */ |
453 | |||
454 | #ifdef __LITTLE_ENDIAN | ||
455 | u8 flag0: 1; | ||
456 | u8 ea_anode: 1; /* 1 -> ea_secno is an anode */ | ||
457 | u8 flag234567: 6; | ||
458 | #else | ||
459 | u8 flag234567: 6; | ||
460 | u8 ea_anode: 1; /* 1 -> ea_secno is an anode */ | ||
461 | u8 flag0: 1; | ||
462 | #endif | ||
463 | 450 | ||
464 | #ifdef __LITTLE_ENDIAN | 451 | __le16 flags; /* bit 1 set -> ea_secno is an anode */ |
465 | u8 dirflag: 1; /* 1 -> directory. first & only extent | 452 | /* bit 8 set -> directory. first & only extent |
466 | points to dnode. */ | ||
467 | u8 flag9012345: 7; | ||
468 | #else | ||
469 | u8 flag9012345: 7; | ||
470 | u8 dirflag: 1; /* 1 -> directory. first & only extent | ||
471 | points to dnode. */ | 453 | points to dnode. */ |
472 | #endif | ||
473 | |||
474 | struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */ | 454 | struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */ |
475 | union { | 455 | union { |
476 | struct bplus_leaf_node external[8]; | 456 | struct bplus_leaf_node external[8]; |
477 | struct bplus_internal_node internal[12]; | 457 | struct bplus_internal_node internal[12]; |
478 | } u; | 458 | } u; |
479 | 459 | ||
480 | u32 file_size; /* file length, bytes */ | 460 | __le32 file_size; /* file length, bytes */ |
481 | u32 n_needea; /* number of EA's with NEEDEA set */ | 461 | __le32 n_needea; /* number of EA's with NEEDEA set */ |
482 | u8 user_id[16]; /* unused */ | 462 | u8 user_id[16]; /* unused */ |
483 | u16 ea_offs; /* offset from start of fnode | 463 | __le16 ea_offs; /* offset from start of fnode |
484 | to first fnode-resident ea */ | 464 | to first fnode-resident ea */ |
485 | u8 dasd_limit_treshhold; | 465 | u8 dasd_limit_treshhold; |
486 | u8 dasd_limit_delta; | 466 | u8 dasd_limit_delta; |
487 | u32 dasd_limit; | 467 | __le32 dasd_limit; |
488 | u32 dasd_usage; | 468 | __le32 dasd_usage; |
489 | u8 ea[316]; /* zero or more EA's, packed together | 469 | u8 ea[316]; /* zero or more EA's, packed together |
490 | with no alignment padding. | 470 | with no alignment padding. |
491 | (Do not use this name, get here | 471 | (Do not use this name, get here |
492 | via fnode + ea_offs. I think.) */ | 472 | via fnode + ea_offs. I think.) */ |
493 | }; | 473 | }; |
494 | 474 | ||
475 | static inline bool fnode_in_anode(struct fnode *p) | ||
476 | { | ||
477 | return (p->flags & FNODE_anode) != 0; | ||
478 | } | ||
479 | |||
480 | static inline bool fnode_is_dir(struct fnode *p) | ||
481 | { | ||
482 | return (p->flags & FNODE_dir) != 0; | ||
483 | } | ||
484 | |||
495 | 485 | ||
496 | /* anode: 99.44% pure allocation tree */ | 486 | /* anode: 99.44% pure allocation tree */ |
497 | 487 | ||
@@ -499,9 +489,9 @@ struct fnode | |||
499 | 489 | ||
500 | struct anode | 490 | struct anode |
501 | { | 491 | { |
502 | u32 magic; /* 37e4 0aae */ | 492 | __le32 magic; /* 37e4 0aae */ |
503 | anode_secno self; /* pointer to this anode */ | 493 | __le32 self; /* pointer to this anode */ |
504 | secno up; /* parent anode or fnode */ | 494 | __le32 up; /* parent anode or fnode */ |
505 | 495 | ||
506 | struct bplus_header btree; /* b+tree, 40 extents or 60 subtrees */ | 496 | struct bplus_header btree; /* b+tree, 40 extents or 60 subtrees */ |
507 | union { | 497 | union { |
@@ -509,7 +499,7 @@ struct anode | |||
509 | struct bplus_internal_node internal[60]; | 499 | struct bplus_internal_node internal[60]; |
510 | } u; | 500 | } u; |
511 | 501 | ||
512 | u32 fill[3]; /* unused */ | 502 | __le32 fill[3]; /* unused */ |
513 | }; | 503 | }; |
514 | 504 | ||
515 | 505 | ||
@@ -528,32 +518,23 @@ struct anode | |||
528 | run, or in multiple runs. Flags in the fnode tell whether the EA list | 518 | run, or in multiple runs. Flags in the fnode tell whether the EA list |
529 | is immediate, in a single run, or in multiple runs. */ | 519 | is immediate, in a single run, or in multiple runs. */ |
530 | 520 | ||
521 | enum {EA_indirect = 1, EA_anode = 2, EA_needea = 128 }; | ||
531 | struct extended_attribute | 522 | struct extended_attribute |
532 | { | 523 | { |
533 | #ifdef __LITTLE_ENDIAN | 524 | u8 flags; /* bit 0 set -> value gives sector number |
534 | u8 indirect: 1; /* 1 -> value gives sector number | ||
535 | where real value starts */ | 525 | where real value starts */ |
536 | u8 anode: 1; /* 1 -> sector is an anode | 526 | /* bit 1 set -> sector is an anode |
537 | that points to fragmented value */ | 527 | that points to fragmented value */ |
538 | u8 flag23456: 5; | 528 | /* bit 7 set -> required ea */ |
539 | u8 needea: 1; /* required ea */ | ||
540 | #else | ||
541 | u8 needea: 1; /* required ea */ | ||
542 | u8 flag23456: 5; | ||
543 | u8 anode: 1; /* 1 -> sector is an anode | ||
544 | that points to fragmented value */ | ||
545 | u8 indirect: 1; /* 1 -> value gives sector number | ||
546 | where real value starts */ | ||
547 | #endif | ||
548 | u8 namelen; /* length of name, bytes */ | 529 | u8 namelen; /* length of name, bytes */ |
549 | u8 valuelen_lo; /* length of value, bytes */ | 530 | u8 valuelen_lo; /* length of value, bytes */ |
550 | u8 valuelen_hi; /* length of value, bytes */ | 531 | u8 valuelen_hi; /* length of value, bytes */ |
551 | u8 name[0]; | 532 | u8 name[]; |
552 | /* | 533 | /* |
553 | u8 name[namelen]; ascii attrib name | 534 | u8 name[namelen]; ascii attrib name |
554 | u8 nul; terminating '\0', not counted | 535 | u8 nul; terminating '\0', not counted |
555 | u8 value[valuelen]; value, arbitrary | 536 | u8 value[valuelen]; value, arbitrary |
556 | if this.indirect, valuelen is 8 and the value is | 537 | if this.flags & 1, valuelen is 8 and the value is |
557 | u32 length; real length of value, bytes | 538 | u32 length; real length of value, bytes |
558 | secno secno; sector address where it starts | 539 | secno secno; sector address where it starts |
559 | if this.anode, the above sector number is the root of an anode tree | 540 | if this.anode, the above sector number is the root of an anode tree |
@@ -561,6 +542,16 @@ struct extended_attribute | |||
561 | */ | 542 | */ |
562 | }; | 543 | }; |
563 | 544 | ||
545 | static inline bool ea_indirect(struct extended_attribute *ea) | ||
546 | { | ||
547 | return ea->flags & EA_indirect; | ||
548 | } | ||
549 | |||
550 | static inline bool ea_in_anode(struct extended_attribute *ea) | ||
551 | { | ||
552 | return ea->flags & EA_anode; | ||
553 | } | ||
554 | |||
564 | /* | 555 | /* |
565 | Local Variables: | 556 | Local Variables: |
566 | comment-column: 40 | 557 | comment-column: 40 |
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 6d2d5008fa43..c07ef1f1ced6 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h | |||
@@ -75,7 +75,7 @@ struct hpfs_sb_info { | |||
75 | unsigned char *sb_cp_table; /* code page tables: */ | 75 | unsigned char *sb_cp_table; /* code page tables: */ |
76 | /* 128 bytes uppercasing table & */ | 76 | /* 128 bytes uppercasing table & */ |
77 | /* 128 bytes lowercasing table */ | 77 | /* 128 bytes lowercasing table */ |
78 | unsigned *sb_bmp_dir; /* main bitmap directory */ | 78 | __le32 *sb_bmp_dir; /* main bitmap directory */ |
79 | unsigned sb_c_bitmap; /* current bitmap */ | 79 | unsigned sb_c_bitmap; /* current bitmap */ |
80 | unsigned sb_max_fwd_alloc; /* max forwad allocation */ | 80 | unsigned sb_max_fwd_alloc; /* max forwad allocation */ |
81 | int sb_timeshift; | 81 | int sb_timeshift; |
@@ -93,7 +93,7 @@ struct quad_buffer_head { | |||
93 | static inline dnode_secno de_down_pointer (struct hpfs_dirent *de) | 93 | static inline dnode_secno de_down_pointer (struct hpfs_dirent *de) |
94 | { | 94 | { |
95 | CHKCOND(de->down,("HPFS: de_down_pointer: !de->down\n")); | 95 | CHKCOND(de->down,("HPFS: de_down_pointer: !de->down\n")); |
96 | return le32_to_cpu(*(dnode_secno *) ((void *) de + le16_to_cpu(de->length) - 4)); | 96 | return le32_to_cpu(*(__le32 *) ((void *) de + le16_to_cpu(de->length) - 4)); |
97 | } | 97 | } |
98 | 98 | ||
99 | /* The first dir entry in a dnode */ | 99 | /* The first dir entry in a dnode */ |
@@ -141,12 +141,12 @@ static inline struct extended_attribute *next_ea(struct extended_attribute *ea) | |||
141 | 141 | ||
142 | static inline secno ea_sec(struct extended_attribute *ea) | 142 | static inline secno ea_sec(struct extended_attribute *ea) |
143 | { | 143 | { |
144 | return le32_to_cpu(get_unaligned((secno *)((char *)ea + 9 + ea->namelen))); | 144 | return le32_to_cpu(get_unaligned((__le32 *)((char *)ea + 9 + ea->namelen))); |
145 | } | 145 | } |
146 | 146 | ||
147 | static inline secno ea_len(struct extended_attribute *ea) | 147 | static inline secno ea_len(struct extended_attribute *ea) |
148 | { | 148 | { |
149 | return le32_to_cpu(get_unaligned((secno *)((char *)ea + 5 + ea->namelen))); | 149 | return le32_to_cpu(get_unaligned((__le32 *)((char *)ea + 5 + ea->namelen))); |
150 | } | 150 | } |
151 | 151 | ||
152 | static inline char *ea_data(struct extended_attribute *ea) | 152 | static inline char *ea_data(struct extended_attribute *ea) |
@@ -171,7 +171,7 @@ static inline void copy_de(struct hpfs_dirent *dst, struct hpfs_dirent *src) | |||
171 | dst->not_8x3 = n; | 171 | dst->not_8x3 = n; |
172 | } | 172 | } |
173 | 173 | ||
174 | static inline unsigned tstbits(u32 *bmp, unsigned b, unsigned n) | 174 | static inline unsigned tstbits(__le32 *bmp, unsigned b, unsigned n) |
175 | { | 175 | { |
176 | int i; | 176 | int i; |
177 | if ((b >= 0x4000) || (b + n - 1 >= 0x4000)) return n; | 177 | if ((b >= 0x4000) || (b + n - 1 >= 0x4000)) return n; |
@@ -268,10 +268,10 @@ void hpfs_evict_inode(struct inode *); | |||
268 | 268 | ||
269 | /* map.c */ | 269 | /* map.c */ |
270 | 270 | ||
271 | unsigned *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *); | 271 | __le32 *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *); |
272 | unsigned *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *); | 272 | __le32 *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *); |
273 | unsigned char *hpfs_load_code_page(struct super_block *, secno); | 273 | unsigned char *hpfs_load_code_page(struct super_block *, secno); |
274 | secno *hpfs_load_bitmap_directory(struct super_block *, secno bmp); | 274 | __le32 *hpfs_load_bitmap_directory(struct super_block *, secno bmp); |
275 | struct fnode *hpfs_map_fnode(struct super_block *s, ino_t, struct buffer_head **); | 275 | struct fnode *hpfs_map_fnode(struct super_block *s, ino_t, struct buffer_head **); |
276 | struct anode *hpfs_map_anode(struct super_block *s, anode_secno, struct buffer_head **); | 276 | struct anode *hpfs_map_anode(struct super_block *s, anode_secno, struct buffer_head **); |
277 | struct dnode *hpfs_map_dnode(struct super_block *s, dnode_secno, struct quad_buffer_head *); | 277 | struct dnode *hpfs_map_dnode(struct super_block *s, dnode_secno, struct quad_buffer_head *); |
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index b43066cbdc6a..ed671e0ea784 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c | |||
@@ -110,7 +110,7 @@ void hpfs_read_inode(struct inode *i) | |||
110 | } | 110 | } |
111 | } | 111 | } |
112 | } | 112 | } |
113 | if (fnode->dirflag) { | 113 | if (fnode_is_dir(fnode)) { |
114 | int n_dnodes, n_subdirs; | 114 | int n_dnodes, n_subdirs; |
115 | i->i_mode |= S_IFDIR; | 115 | i->i_mode |= S_IFDIR; |
116 | i->i_op = &hpfs_dir_iops; | 116 | i->i_op = &hpfs_dir_iops; |
diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index a790821366a7..4acb19d78359 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c | |||
@@ -8,12 +8,12 @@ | |||
8 | 8 | ||
9 | #include "hpfs_fn.h" | 9 | #include "hpfs_fn.h" |
10 | 10 | ||
11 | unsigned *hpfs_map_dnode_bitmap(struct super_block *s, struct quad_buffer_head *qbh) | 11 | __le32 *hpfs_map_dnode_bitmap(struct super_block *s, struct quad_buffer_head *qbh) |
12 | { | 12 | { |
13 | return hpfs_map_4sectors(s, hpfs_sb(s)->sb_dmap, qbh, 0); | 13 | return hpfs_map_4sectors(s, hpfs_sb(s)->sb_dmap, qbh, 0); |
14 | } | 14 | } |
15 | 15 | ||
16 | unsigned int *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, | 16 | __le32 *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, |
17 | struct quad_buffer_head *qbh, char *id) | 17 | struct quad_buffer_head *qbh, char *id) |
18 | { | 18 | { |
19 | secno sec; | 19 | secno sec; |
@@ -89,18 +89,18 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps) | |||
89 | return cp_table; | 89 | return cp_table; |
90 | } | 90 | } |
91 | 91 | ||
92 | secno *hpfs_load_bitmap_directory(struct super_block *s, secno bmp) | 92 | __le32 *hpfs_load_bitmap_directory(struct super_block *s, secno bmp) |
93 | { | 93 | { |
94 | struct buffer_head *bh; | 94 | struct buffer_head *bh; |
95 | int n = (hpfs_sb(s)->sb_fs_size + 0x200000 - 1) >> 21; | 95 | int n = (hpfs_sb(s)->sb_fs_size + 0x200000 - 1) >> 21; |
96 | int i; | 96 | int i; |
97 | secno *b; | 97 | __le32 *b; |
98 | if (!(b = kmalloc(n * 512, GFP_KERNEL))) { | 98 | if (!(b = kmalloc(n * 512, GFP_KERNEL))) { |
99 | printk("HPFS: can't allocate memory for bitmap directory\n"); | 99 | printk("HPFS: can't allocate memory for bitmap directory\n"); |
100 | return NULL; | 100 | return NULL; |
101 | } | 101 | } |
102 | for (i=0;i<n;i++) { | 102 | for (i=0;i<n;i++) { |
103 | secno *d = hpfs_map_sector(s, bmp+i, &bh, n - i - 1); | 103 | __le32 *d = hpfs_map_sector(s, bmp+i, &bh, n - i - 1); |
104 | if (!d) { | 104 | if (!d) { |
105 | kfree(b); | 105 | kfree(b); |
106 | return NULL; | 106 | return NULL; |
@@ -130,16 +130,16 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea | |||
130 | (unsigned long)ino); | 130 | (unsigned long)ino); |
131 | goto bail; | 131 | goto bail; |
132 | } | 132 | } |
133 | if (!fnode->dirflag) { | 133 | if (!fnode_is_dir(fnode)) { |
134 | if ((unsigned)fnode->btree.n_used_nodes + (unsigned)fnode->btree.n_free_nodes != | 134 | if ((unsigned)fnode->btree.n_used_nodes + (unsigned)fnode->btree.n_free_nodes != |
135 | (fnode->btree.internal ? 12 : 8)) { | 135 | (bp_internal(&fnode->btree) ? 12 : 8)) { |
136 | hpfs_error(s, | 136 | hpfs_error(s, |
137 | "bad number of nodes in fnode %08lx", | 137 | "bad number of nodes in fnode %08lx", |
138 | (unsigned long)ino); | 138 | (unsigned long)ino); |
139 | goto bail; | 139 | goto bail; |
140 | } | 140 | } |
141 | if (le16_to_cpu(fnode->btree.first_free) != | 141 | if (le16_to_cpu(fnode->btree.first_free) != |
142 | 8 + fnode->btree.n_used_nodes * (fnode->btree.internal ? 8 : 12)) { | 142 | 8 + fnode->btree.n_used_nodes * (bp_internal(&fnode->btree) ? 8 : 12)) { |
143 | hpfs_error(s, | 143 | hpfs_error(s, |
144 | "bad first_free pointer in fnode %08lx", | 144 | "bad first_free pointer in fnode %08lx", |
145 | (unsigned long)ino); | 145 | (unsigned long)ino); |
@@ -187,12 +187,12 @@ struct anode *hpfs_map_anode(struct super_block *s, anode_secno ano, struct buff | |||
187 | goto bail; | 187 | goto bail; |
188 | } | 188 | } |
189 | if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes != | 189 | if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes != |
190 | (anode->btree.internal ? 60 : 40)) { | 190 | (bp_internal(&anode->btree) ? 60 : 40)) { |
191 | hpfs_error(s, "bad number of nodes in anode %08x", ano); | 191 | hpfs_error(s, "bad number of nodes in anode %08x", ano); |
192 | goto bail; | 192 | goto bail; |
193 | } | 193 | } |
194 | if (le16_to_cpu(anode->btree.first_free) != | 194 | if (le16_to_cpu(anode->btree.first_free) != |
195 | 8 + anode->btree.n_used_nodes * (anode->btree.internal ? 8 : 12)) { | 195 | 8 + anode->btree.n_used_nodes * (bp_internal(&anode->btree) ? 8 : 12)) { |
196 | hpfs_error(s, "bad first_free pointer in anode %08x", ano); | 196 | hpfs_error(s, "bad first_free pointer in anode %08x", ano); |
197 | goto bail; | 197 | goto bail; |
198 | } | 198 | } |
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 30dd7b10b507..9083ef8af58c 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c | |||
@@ -70,7 +70,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
70 | fnode->len = len; | 70 | fnode->len = len; |
71 | memcpy(fnode->name, name, len > 15 ? 15 : len); | 71 | memcpy(fnode->name, name, len > 15 ? 15 : len); |
72 | fnode->up = cpu_to_le32(dir->i_ino); | 72 | fnode->up = cpu_to_le32(dir->i_ino); |
73 | fnode->dirflag = 1; | 73 | fnode->flags |= FNODE_dir; |
74 | fnode->btree.n_free_nodes = 7; | 74 | fnode->btree.n_free_nodes = 7; |
75 | fnode->btree.n_used_nodes = 1; | 75 | fnode->btree.n_used_nodes = 1; |
76 | fnode->btree.first_free = cpu_to_le16(0x14); | 76 | fnode->btree.first_free = cpu_to_le16(0x14); |
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 54f6eccb79d9..706a12c083ea 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
@@ -572,7 +572,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) | |||
572 | mark_buffer_dirty(bh2); | 572 | mark_buffer_dirty(bh2); |
573 | } | 573 | } |
574 | 574 | ||
575 | if (le32_to_cpu(spareblock->hotfixes_used) || le32_to_cpu(spareblock->n_spares_used)) { | 575 | if (spareblock->hotfixes_used || spareblock->n_spares_used) { |
576 | if (errs >= 2) { | 576 | if (errs >= 2) { |
577 | printk("HPFS: Hotfixes not supported here, try chkdsk\n"); | 577 | printk("HPFS: Hotfixes not supported here, try chkdsk\n"); |
578 | mark_dirty(s, 0); | 578 | mark_dirty(s, 0); |
@@ -645,7 +645,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) | |||
645 | root->i_mtime.tv_nsec = 0; | 645 | root->i_mtime.tv_nsec = 0; |
646 | root->i_ctime.tv_sec = local_to_gmt(s, le32_to_cpu(de->creation_date)); | 646 | root->i_ctime.tv_sec = local_to_gmt(s, le32_to_cpu(de->creation_date)); |
647 | root->i_ctime.tv_nsec = 0; | 647 | root->i_ctime.tv_nsec = 0; |
648 | hpfs_i(root)->i_ea_size = le16_to_cpu(de->ea_size); | 648 | hpfs_i(root)->i_ea_size = le32_to_cpu(de->ea_size); |
649 | hpfs_i(root)->i_parent_dir = root->i_ino; | 649 | hpfs_i(root)->i_parent_dir = root->i_ino; |
650 | if (root->i_size == -1) | 650 | if (root->i_size == -1) |
651 | root->i_size = 2048; | 651 | root->i_size = 2048; |
diff --git a/fs/inode.c b/fs/inode.c index c474c1d7062b..c99163b1b310 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -1487,10 +1487,30 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, | |||
1487 | return 0; | 1487 | return 0; |
1488 | } | 1488 | } |
1489 | 1489 | ||
1490 | /* | ||
1491 | * This does the actual work of updating an inodes time or version. Must have | ||
1492 | * had called mnt_want_write() before calling this. | ||
1493 | */ | ||
1494 | static int update_time(struct inode *inode, struct timespec *time, int flags) | ||
1495 | { | ||
1496 | if (inode->i_op->update_time) | ||
1497 | return inode->i_op->update_time(inode, time, flags); | ||
1498 | |||
1499 | if (flags & S_ATIME) | ||
1500 | inode->i_atime = *time; | ||
1501 | if (flags & S_VERSION) | ||
1502 | inode_inc_iversion(inode); | ||
1503 | if (flags & S_CTIME) | ||
1504 | inode->i_ctime = *time; | ||
1505 | if (flags & S_MTIME) | ||
1506 | inode->i_mtime = *time; | ||
1507 | mark_inode_dirty_sync(inode); | ||
1508 | return 0; | ||
1509 | } | ||
1510 | |||
1490 | /** | 1511 | /** |
1491 | * touch_atime - update the access time | 1512 | * touch_atime - update the access time |
1492 | * @mnt: mount the inode is accessed on | 1513 | * @path: the &struct path to update |
1493 | * @dentry: dentry accessed | ||
1494 | * | 1514 | * |
1495 | * Update the accessed time on an inode and mark it for writeback. | 1515 | * Update the accessed time on an inode and mark it for writeback. |
1496 | * This function automatically handles read only file systems and media, | 1516 | * This function automatically handles read only file systems and media, |
@@ -1525,12 +1545,83 @@ void touch_atime(struct path *path) | |||
1525 | if (mnt_want_write(mnt)) | 1545 | if (mnt_want_write(mnt)) |
1526 | return; | 1546 | return; |
1527 | 1547 | ||
1528 | inode->i_atime = now; | 1548 | /* |
1529 | mark_inode_dirty_sync(inode); | 1549 | * File systems can error out when updating inodes if they need to |
1550 | * allocate new space to modify an inode (such is the case for | ||
1551 | * Btrfs), but since we touch atime while walking down the path we | ||
1552 | * really don't care if we failed to update the atime of the file, | ||
1553 | * so just ignore the return value. | ||
1554 | */ | ||
1555 | update_time(inode, &now, S_ATIME); | ||
1530 | mnt_drop_write(mnt); | 1556 | mnt_drop_write(mnt); |
1531 | } | 1557 | } |
1532 | EXPORT_SYMBOL(touch_atime); | 1558 | EXPORT_SYMBOL(touch_atime); |
1533 | 1559 | ||
1560 | /* | ||
1561 | * The logic we want is | ||
1562 | * | ||
1563 | * if suid or (sgid and xgrp) | ||
1564 | * remove privs | ||
1565 | */ | ||
1566 | int should_remove_suid(struct dentry *dentry) | ||
1567 | { | ||
1568 | umode_t mode = dentry->d_inode->i_mode; | ||
1569 | int kill = 0; | ||
1570 | |||
1571 | /* suid always must be killed */ | ||
1572 | if (unlikely(mode & S_ISUID)) | ||
1573 | kill = ATTR_KILL_SUID; | ||
1574 | |||
1575 | /* | ||
1576 | * sgid without any exec bits is just a mandatory locking mark; leave | ||
1577 | * it alone. If some exec bits are set, it's a real sgid; kill it. | ||
1578 | */ | ||
1579 | if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) | ||
1580 | kill |= ATTR_KILL_SGID; | ||
1581 | |||
1582 | if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode))) | ||
1583 | return kill; | ||
1584 | |||
1585 | return 0; | ||
1586 | } | ||
1587 | EXPORT_SYMBOL(should_remove_suid); | ||
1588 | |||
1589 | static int __remove_suid(struct dentry *dentry, int kill) | ||
1590 | { | ||
1591 | struct iattr newattrs; | ||
1592 | |||
1593 | newattrs.ia_valid = ATTR_FORCE | kill; | ||
1594 | return notify_change(dentry, &newattrs); | ||
1595 | } | ||
1596 | |||
1597 | int file_remove_suid(struct file *file) | ||
1598 | { | ||
1599 | struct dentry *dentry = file->f_path.dentry; | ||
1600 | struct inode *inode = dentry->d_inode; | ||
1601 | int killsuid; | ||
1602 | int killpriv; | ||
1603 | int error = 0; | ||
1604 | |||
1605 | /* Fast path for nothing security related */ | ||
1606 | if (IS_NOSEC(inode)) | ||
1607 | return 0; | ||
1608 | |||
1609 | killsuid = should_remove_suid(dentry); | ||
1610 | killpriv = security_inode_need_killpriv(dentry); | ||
1611 | |||
1612 | if (killpriv < 0) | ||
1613 | return killpriv; | ||
1614 | if (killpriv) | ||
1615 | error = security_inode_killpriv(dentry); | ||
1616 | if (!error && killsuid) | ||
1617 | error = __remove_suid(dentry, killsuid); | ||
1618 | if (!error && (inode->i_sb->s_flags & MS_NOSEC)) | ||
1619 | inode->i_flags |= S_NOSEC; | ||
1620 | |||
1621 | return error; | ||
1622 | } | ||
1623 | EXPORT_SYMBOL(file_remove_suid); | ||
1624 | |||
1534 | /** | 1625 | /** |
1535 | * file_update_time - update mtime and ctime time | 1626 | * file_update_time - update mtime and ctime time |
1536 | * @file: file accessed | 1627 | * @file: file accessed |
@@ -1540,18 +1631,20 @@ EXPORT_SYMBOL(touch_atime); | |||
1540 | * usage in the file write path of filesystems, and filesystems may | 1631 | * usage in the file write path of filesystems, and filesystems may |
1541 | * choose to explicitly ignore update via this function with the | 1632 | * choose to explicitly ignore update via this function with the |
1542 | * S_NOCMTIME inode flag, e.g. for network filesystem where these | 1633 | * S_NOCMTIME inode flag, e.g. for network filesystem where these |
1543 | * timestamps are handled by the server. | 1634 | * timestamps are handled by the server. This can return an error for |
1635 | * file systems who need to allocate space in order to update an inode. | ||
1544 | */ | 1636 | */ |
1545 | 1637 | ||
1546 | void file_update_time(struct file *file) | 1638 | int file_update_time(struct file *file) |
1547 | { | 1639 | { |
1548 | struct inode *inode = file->f_path.dentry->d_inode; | 1640 | struct inode *inode = file->f_path.dentry->d_inode; |
1549 | struct timespec now; | 1641 | struct timespec now; |
1550 | enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; | 1642 | int sync_it = 0; |
1643 | int ret; | ||
1551 | 1644 | ||
1552 | /* First try to exhaust all avenues to not sync */ | 1645 | /* First try to exhaust all avenues to not sync */ |
1553 | if (IS_NOCMTIME(inode)) | 1646 | if (IS_NOCMTIME(inode)) |
1554 | return; | 1647 | return 0; |
1555 | 1648 | ||
1556 | now = current_fs_time(inode->i_sb); | 1649 | now = current_fs_time(inode->i_sb); |
1557 | if (!timespec_equal(&inode->i_mtime, &now)) | 1650 | if (!timespec_equal(&inode->i_mtime, &now)) |
@@ -1564,21 +1657,16 @@ void file_update_time(struct file *file) | |||
1564 | sync_it |= S_VERSION; | 1657 | sync_it |= S_VERSION; |
1565 | 1658 | ||
1566 | if (!sync_it) | 1659 | if (!sync_it) |
1567 | return; | 1660 | return 0; |
1568 | 1661 | ||
1569 | /* Finally allowed to write? Takes lock. */ | 1662 | /* Finally allowed to write? Takes lock. */ |
1570 | if (mnt_want_write_file(file)) | 1663 | if (mnt_want_write_file(file)) |
1571 | return; | 1664 | return 0; |
1572 | 1665 | ||
1573 | /* Only change inode inside the lock region */ | 1666 | ret = update_time(inode, &now, sync_it); |
1574 | if (sync_it & S_VERSION) | ||
1575 | inode_inc_iversion(inode); | ||
1576 | if (sync_it & S_CTIME) | ||
1577 | inode->i_ctime = now; | ||
1578 | if (sync_it & S_MTIME) | ||
1579 | inode->i_mtime = now; | ||
1580 | mark_inode_dirty_sync(inode); | ||
1581 | mnt_drop_write_file(file); | 1667 | mnt_drop_write_file(file); |
1668 | |||
1669 | return ret; | ||
1582 | } | 1670 | } |
1583 | EXPORT_SYMBOL(file_update_time); | 1671 | EXPORT_SYMBOL(file_update_time); |
1584 | 1672 | ||
diff --git a/fs/internal.h b/fs/internal.h index 9962c59ba280..18bc216ea09d 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -56,7 +56,7 @@ extern int sb_prepare_remount_readonly(struct super_block *); | |||
56 | 56 | ||
57 | extern void __init mnt_init(void); | 57 | extern void __init mnt_init(void); |
58 | 58 | ||
59 | DECLARE_BRLOCK(vfsmount_lock); | 59 | extern struct lglock vfsmount_lock; |
60 | 60 | ||
61 | 61 | ||
62 | /* | 62 | /* |
@@ -100,6 +100,7 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, | |||
100 | 100 | ||
101 | extern long do_handle_open(int mountdirfd, | 101 | extern long do_handle_open(int mountdirfd, |
102 | struct file_handle __user *ufh, int open_flag); | 102 | struct file_handle __user *ufh, int open_flag); |
103 | extern int open_check_o_direct(struct file *f); | ||
103 | 104 | ||
104 | /* | 105 | /* |
105 | * inode.c | 106 | * inode.c |
diff --git a/fs/isofs/export.c b/fs/isofs/export.c index dd4687ff30d0..aa4356d09eee 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c | |||
@@ -107,12 +107,11 @@ static struct dentry *isofs_export_get_parent(struct dentry *child) | |||
107 | } | 107 | } |
108 | 108 | ||
109 | static int | 109 | static int |
110 | isofs_export_encode_fh(struct dentry *dentry, | 110 | isofs_export_encode_fh(struct inode *inode, |
111 | __u32 *fh32, | 111 | __u32 *fh32, |
112 | int *max_len, | 112 | int *max_len, |
113 | int connectable) | 113 | struct inode *parent) |
114 | { | 114 | { |
115 | struct inode * inode = dentry->d_inode; | ||
116 | struct iso_inode_info * ei = ISOFS_I(inode); | 115 | struct iso_inode_info * ei = ISOFS_I(inode); |
117 | int len = *max_len; | 116 | int len = *max_len; |
118 | int type = 1; | 117 | int type = 1; |
@@ -124,7 +123,7 @@ isofs_export_encode_fh(struct dentry *dentry, | |||
124 | * offset of the inode and the upper 16 bits of fh32[1] to | 123 | * offset of the inode and the upper 16 bits of fh32[1] to |
125 | * hold the offset of the parent. | 124 | * hold the offset of the parent. |
126 | */ | 125 | */ |
127 | if (connectable && (len < 5)) { | 126 | if (parent && (len < 5)) { |
128 | *max_len = 5; | 127 | *max_len = 5; |
129 | return 255; | 128 | return 255; |
130 | } else if (len < 3) { | 129 | } else if (len < 3) { |
@@ -136,16 +135,12 @@ isofs_export_encode_fh(struct dentry *dentry, | |||
136 | fh32[0] = ei->i_iget5_block; | 135 | fh32[0] = ei->i_iget5_block; |
137 | fh16[2] = (__u16)ei->i_iget5_offset; /* fh16 [sic] */ | 136 | fh16[2] = (__u16)ei->i_iget5_offset; /* fh16 [sic] */ |
138 | fh32[2] = inode->i_generation; | 137 | fh32[2] = inode->i_generation; |
139 | if (connectable && !S_ISDIR(inode->i_mode)) { | 138 | if (parent) { |
140 | struct inode *parent; | ||
141 | struct iso_inode_info *eparent; | 139 | struct iso_inode_info *eparent; |
142 | spin_lock(&dentry->d_lock); | ||
143 | parent = dentry->d_parent->d_inode; | ||
144 | eparent = ISOFS_I(parent); | 140 | eparent = ISOFS_I(parent); |
145 | fh32[3] = eparent->i_iget5_block; | 141 | fh32[3] = eparent->i_iget5_block; |
146 | fh16[3] = (__u16)eparent->i_iget5_offset; /* fh16 [sic] */ | 142 | fh16[3] = (__u16)eparent->i_iget5_offset; /* fh16 [sic] */ |
147 | fh32[4] = parent->i_generation; | 143 | fh32[4] = parent->i_generation; |
148 | spin_unlock(&dentry->d_lock); | ||
149 | len = 5; | 144 | len = 5; |
150 | type = 2; | 145 | type = 2; |
151 | } | 146 | } |
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig index f32f346f4b0a..69a48c2944da 100644 --- a/fs/jbd2/Kconfig +++ b/fs/jbd2/Kconfig | |||
@@ -1,6 +1,8 @@ | |||
1 | config JBD2 | 1 | config JBD2 |
2 | tristate | 2 | tristate |
3 | select CRC32 | 3 | select CRC32 |
4 | select CRYPTO | ||
5 | select CRYPTO_CRC32C | ||
4 | help | 6 | help |
5 | This is a generic journaling layer for block devices that support | 7 | This is a generic journaling layer for block devices that support |
6 | both 32-bit and 64-bit block numbers. It is currently used by | 8 | both 32-bit and 64-bit block numbers. It is currently used by |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 840f70f50792..216f4299f65e 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -85,6 +85,24 @@ nope: | |||
85 | __brelse(bh); | 85 | __brelse(bh); |
86 | } | 86 | } |
87 | 87 | ||
88 | static void jbd2_commit_block_csum_set(journal_t *j, | ||
89 | struct journal_head *descriptor) | ||
90 | { | ||
91 | struct commit_header *h; | ||
92 | __u32 csum; | ||
93 | |||
94 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
95 | return; | ||
96 | |||
97 | h = (struct commit_header *)(jh2bh(descriptor)->b_data); | ||
98 | h->h_chksum_type = 0; | ||
99 | h->h_chksum_size = 0; | ||
100 | h->h_chksum[0] = 0; | ||
101 | csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, | ||
102 | j->j_blocksize); | ||
103 | h->h_chksum[0] = cpu_to_be32(csum); | ||
104 | } | ||
105 | |||
88 | /* | 106 | /* |
89 | * Done it all: now submit the commit record. We should have | 107 | * Done it all: now submit the commit record. We should have |
90 | * cleaned up our previous buffers by now, so if we are in abort | 108 | * cleaned up our previous buffers by now, so if we are in abort |
@@ -128,6 +146,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
128 | tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; | 146 | tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; |
129 | tmp->h_chksum[0] = cpu_to_be32(crc32_sum); | 147 | tmp->h_chksum[0] = cpu_to_be32(crc32_sum); |
130 | } | 148 | } |
149 | jbd2_commit_block_csum_set(journal, descriptor); | ||
131 | 150 | ||
132 | JBUFFER_TRACE(descriptor, "submit commit block"); | 151 | JBUFFER_TRACE(descriptor, "submit commit block"); |
133 | lock_buffer(bh); | 152 | lock_buffer(bh); |
@@ -301,6 +320,44 @@ static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, | |||
301 | tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); | 320 | tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); |
302 | } | 321 | } |
303 | 322 | ||
323 | static void jbd2_descr_block_csum_set(journal_t *j, | ||
324 | struct journal_head *descriptor) | ||
325 | { | ||
326 | struct jbd2_journal_block_tail *tail; | ||
327 | __u32 csum; | ||
328 | |||
329 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
330 | return; | ||
331 | |||
332 | tail = (struct jbd2_journal_block_tail *) | ||
333 | (jh2bh(descriptor)->b_data + j->j_blocksize - | ||
334 | sizeof(struct jbd2_journal_block_tail)); | ||
335 | tail->t_checksum = 0; | ||
336 | csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, | ||
337 | j->j_blocksize); | ||
338 | tail->t_checksum = cpu_to_be32(csum); | ||
339 | } | ||
340 | |||
341 | static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, | ||
342 | struct buffer_head *bh, __u32 sequence) | ||
343 | { | ||
344 | struct page *page = bh->b_page; | ||
345 | __u8 *addr; | ||
346 | __u32 csum; | ||
347 | |||
348 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
349 | return; | ||
350 | |||
351 | sequence = cpu_to_be32(sequence); | ||
352 | addr = kmap_atomic(page, KM_USER0); | ||
353 | csum = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, | ||
354 | sizeof(sequence)); | ||
355 | csum = jbd2_chksum(j, csum, addr + offset_in_page(bh->b_data), | ||
356 | bh->b_size); | ||
357 | kunmap_atomic(addr, KM_USER0); | ||
358 | |||
359 | tag->t_checksum = cpu_to_be32(csum); | ||
360 | } | ||
304 | /* | 361 | /* |
305 | * jbd2_journal_commit_transaction | 362 | * jbd2_journal_commit_transaction |
306 | * | 363 | * |
@@ -334,6 +391,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
334 | unsigned long first_block; | 391 | unsigned long first_block; |
335 | tid_t first_tid; | 392 | tid_t first_tid; |
336 | int update_tail; | 393 | int update_tail; |
394 | int csum_size = 0; | ||
395 | |||
396 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
397 | csum_size = sizeof(struct jbd2_journal_block_tail); | ||
337 | 398 | ||
338 | /* | 399 | /* |
339 | * First job: lock down the current transaction and wait for | 400 | * First job: lock down the current transaction and wait for |
@@ -627,7 +688,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
627 | 688 | ||
628 | tag = (journal_block_tag_t *) tagp; | 689 | tag = (journal_block_tag_t *) tagp; |
629 | write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); | 690 | write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); |
630 | tag->t_flags = cpu_to_be32(tag_flag); | 691 | tag->t_flags = cpu_to_be16(tag_flag); |
692 | jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh), | ||
693 | commit_transaction->t_tid); | ||
631 | tagp += tag_bytes; | 694 | tagp += tag_bytes; |
632 | space_left -= tag_bytes; | 695 | space_left -= tag_bytes; |
633 | 696 | ||
@@ -643,7 +706,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
643 | 706 | ||
644 | if (bufs == journal->j_wbufsize || | 707 | if (bufs == journal->j_wbufsize || |
645 | commit_transaction->t_buffers == NULL || | 708 | commit_transaction->t_buffers == NULL || |
646 | space_left < tag_bytes + 16) { | 709 | space_left < tag_bytes + 16 + csum_size) { |
647 | 710 | ||
648 | jbd_debug(4, "JBD2: Submit %d IOs\n", bufs); | 711 | jbd_debug(4, "JBD2: Submit %d IOs\n", bufs); |
649 | 712 | ||
@@ -651,8 +714,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
651 | submitting the IOs. "tag" still points to | 714 | submitting the IOs. "tag" still points to |
652 | the last tag we set up. */ | 715 | the last tag we set up. */ |
653 | 716 | ||
654 | tag->t_flags |= cpu_to_be32(JBD2_FLAG_LAST_TAG); | 717 | tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG); |
655 | 718 | ||
719 | jbd2_descr_block_csum_set(journal, descriptor); | ||
656 | start_journal_io: | 720 | start_journal_io: |
657 | for (i = 0; i < bufs; i++) { | 721 | for (i = 0; i < bufs; i++) { |
658 | struct buffer_head *bh = wbuf[i]; | 722 | struct buffer_head *bh = wbuf[i]; |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 1afb701622b0..e9a3c4c85594 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -97,6 +97,43 @@ EXPORT_SYMBOL(jbd2_inode_cache); | |||
97 | static void __journal_abort_soft (journal_t *journal, int errno); | 97 | static void __journal_abort_soft (journal_t *journal, int errno); |
98 | static int jbd2_journal_create_slab(size_t slab_size); | 98 | static int jbd2_journal_create_slab(size_t slab_size); |
99 | 99 | ||
100 | /* Checksumming functions */ | ||
101 | int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) | ||
102 | { | ||
103 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
104 | return 1; | ||
105 | |||
106 | return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; | ||
107 | } | ||
108 | |||
109 | static __u32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) | ||
110 | { | ||
111 | __u32 csum, old_csum; | ||
112 | |||
113 | old_csum = sb->s_checksum; | ||
114 | sb->s_checksum = 0; | ||
115 | csum = jbd2_chksum(j, ~0, (char *)sb, sizeof(journal_superblock_t)); | ||
116 | sb->s_checksum = old_csum; | ||
117 | |||
118 | return cpu_to_be32(csum); | ||
119 | } | ||
120 | |||
121 | int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) | ||
122 | { | ||
123 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
124 | return 1; | ||
125 | |||
126 | return sb->s_checksum == jbd2_superblock_csum(j, sb); | ||
127 | } | ||
128 | |||
129 | void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) | ||
130 | { | ||
131 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
132 | return; | ||
133 | |||
134 | sb->s_checksum = jbd2_superblock_csum(j, sb); | ||
135 | } | ||
136 | |||
100 | /* | 137 | /* |
101 | * Helper function used to manage commit timeouts | 138 | * Helper function used to manage commit timeouts |
102 | */ | 139 | */ |
@@ -1348,6 +1385,7 @@ static void jbd2_journal_update_sb_errno(journal_t *journal) | |||
1348 | jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", | 1385 | jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", |
1349 | journal->j_errno); | 1386 | journal->j_errno); |
1350 | sb->s_errno = cpu_to_be32(journal->j_errno); | 1387 | sb->s_errno = cpu_to_be32(journal->j_errno); |
1388 | jbd2_superblock_csum_set(journal, sb); | ||
1351 | read_unlock(&journal->j_state_lock); | 1389 | read_unlock(&journal->j_state_lock); |
1352 | 1390 | ||
1353 | jbd2_write_superblock(journal, WRITE_SYNC); | 1391 | jbd2_write_superblock(journal, WRITE_SYNC); |
@@ -1376,6 +1414,9 @@ static int journal_get_superblock(journal_t *journal) | |||
1376 | } | 1414 | } |
1377 | } | 1415 | } |
1378 | 1416 | ||
1417 | if (buffer_verified(bh)) | ||
1418 | return 0; | ||
1419 | |||
1379 | sb = journal->j_superblock; | 1420 | sb = journal->j_superblock; |
1380 | 1421 | ||
1381 | err = -EINVAL; | 1422 | err = -EINVAL; |
@@ -1413,6 +1454,43 @@ static int journal_get_superblock(journal_t *journal) | |||
1413 | goto out; | 1454 | goto out; |
1414 | } | 1455 | } |
1415 | 1456 | ||
1457 | if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && | ||
1458 | JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { | ||
1459 | /* Can't have checksum v1 and v2 on at the same time! */ | ||
1460 | printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 " | ||
1461 | "at the same time!\n"); | ||
1462 | goto out; | ||
1463 | } | ||
1464 | |||
1465 | if (!jbd2_verify_csum_type(journal, sb)) { | ||
1466 | printk(KERN_ERR "JBD: Unknown checksum type\n"); | ||
1467 | goto out; | ||
1468 | } | ||
1469 | |||
1470 | /* Load the checksum driver */ | ||
1471 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { | ||
1472 | journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); | ||
1473 | if (IS_ERR(journal->j_chksum_driver)) { | ||
1474 | printk(KERN_ERR "JBD: Cannot load crc32c driver.\n"); | ||
1475 | err = PTR_ERR(journal->j_chksum_driver); | ||
1476 | journal->j_chksum_driver = NULL; | ||
1477 | goto out; | ||
1478 | } | ||
1479 | } | ||
1480 | |||
1481 | /* Check superblock checksum */ | ||
1482 | if (!jbd2_superblock_csum_verify(journal, sb)) { | ||
1483 | printk(KERN_ERR "JBD: journal checksum error\n"); | ||
1484 | goto out; | ||
1485 | } | ||
1486 | |||
1487 | /* Precompute checksum seed for all metadata */ | ||
1488 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
1489 | journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, | ||
1490 | sizeof(sb->s_uuid)); | ||
1491 | |||
1492 | set_buffer_verified(bh); | ||
1493 | |||
1416 | return 0; | 1494 | return 0; |
1417 | 1495 | ||
1418 | out: | 1496 | out: |
@@ -1564,6 +1642,8 @@ int jbd2_journal_destroy(journal_t *journal) | |||
1564 | iput(journal->j_inode); | 1642 | iput(journal->j_inode); |
1565 | if (journal->j_revoke) | 1643 | if (journal->j_revoke) |
1566 | jbd2_journal_destroy_revoke(journal); | 1644 | jbd2_journal_destroy_revoke(journal); |
1645 | if (journal->j_chksum_driver) | ||
1646 | crypto_free_shash(journal->j_chksum_driver); | ||
1567 | kfree(journal->j_wbuf); | 1647 | kfree(journal->j_wbuf); |
1568 | kfree(journal); | 1648 | kfree(journal); |
1569 | 1649 | ||
@@ -1653,6 +1733,10 @@ int jbd2_journal_check_available_features (journal_t *journal, unsigned long com | |||
1653 | int jbd2_journal_set_features (journal_t *journal, unsigned long compat, | 1733 | int jbd2_journal_set_features (journal_t *journal, unsigned long compat, |
1654 | unsigned long ro, unsigned long incompat) | 1734 | unsigned long ro, unsigned long incompat) |
1655 | { | 1735 | { |
1736 | #define INCOMPAT_FEATURE_ON(f) \ | ||
1737 | ((incompat & (f)) && !(sb->s_feature_incompat & cpu_to_be32(f))) | ||
1738 | #define COMPAT_FEATURE_ON(f) \ | ||
1739 | ((compat & (f)) && !(sb->s_feature_compat & cpu_to_be32(f))) | ||
1656 | journal_superblock_t *sb; | 1740 | journal_superblock_t *sb; |
1657 | 1741 | ||
1658 | if (jbd2_journal_check_used_features(journal, compat, ro, incompat)) | 1742 | if (jbd2_journal_check_used_features(journal, compat, ro, incompat)) |
@@ -1661,16 +1745,54 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, | |||
1661 | if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) | 1745 | if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) |
1662 | return 0; | 1746 | return 0; |
1663 | 1747 | ||
1748 | /* Asking for checksumming v2 and v1? Only give them v2. */ | ||
1749 | if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 && | ||
1750 | compat & JBD2_FEATURE_COMPAT_CHECKSUM) | ||
1751 | compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; | ||
1752 | |||
1664 | jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n", | 1753 | jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n", |
1665 | compat, ro, incompat); | 1754 | compat, ro, incompat); |
1666 | 1755 | ||
1667 | sb = journal->j_superblock; | 1756 | sb = journal->j_superblock; |
1668 | 1757 | ||
1758 | /* If enabling v2 checksums, update superblock */ | ||
1759 | if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) { | ||
1760 | sb->s_checksum_type = JBD2_CRC32C_CHKSUM; | ||
1761 | sb->s_feature_compat &= | ||
1762 | ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); | ||
1763 | |||
1764 | /* Load the checksum driver */ | ||
1765 | if (journal->j_chksum_driver == NULL) { | ||
1766 | journal->j_chksum_driver = crypto_alloc_shash("crc32c", | ||
1767 | 0, 0); | ||
1768 | if (IS_ERR(journal->j_chksum_driver)) { | ||
1769 | printk(KERN_ERR "JBD: Cannot load crc32c " | ||
1770 | "driver.\n"); | ||
1771 | journal->j_chksum_driver = NULL; | ||
1772 | return 0; | ||
1773 | } | ||
1774 | } | ||
1775 | |||
1776 | /* Precompute checksum seed for all metadata */ | ||
1777 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | ||
1778 | JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
1779 | journal->j_csum_seed = jbd2_chksum(journal, ~0, | ||
1780 | sb->s_uuid, | ||
1781 | sizeof(sb->s_uuid)); | ||
1782 | } | ||
1783 | |||
1784 | /* If enabling v1 checksums, downgrade superblock */ | ||
1785 | if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) | ||
1786 | sb->s_feature_incompat &= | ||
1787 | ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2); | ||
1788 | |||
1669 | sb->s_feature_compat |= cpu_to_be32(compat); | 1789 | sb->s_feature_compat |= cpu_to_be32(compat); |
1670 | sb->s_feature_ro_compat |= cpu_to_be32(ro); | 1790 | sb->s_feature_ro_compat |= cpu_to_be32(ro); |
1671 | sb->s_feature_incompat |= cpu_to_be32(incompat); | 1791 | sb->s_feature_incompat |= cpu_to_be32(incompat); |
1672 | 1792 | ||
1673 | return 1; | 1793 | return 1; |
1794 | #undef COMPAT_FEATURE_ON | ||
1795 | #undef INCOMPAT_FEATURE_ON | ||
1674 | } | 1796 | } |
1675 | 1797 | ||
1676 | /* | 1798 | /* |
@@ -1975,10 +2097,16 @@ int jbd2_journal_blocks_per_page(struct inode *inode) | |||
1975 | */ | 2097 | */ |
1976 | size_t journal_tag_bytes(journal_t *journal) | 2098 | size_t journal_tag_bytes(journal_t *journal) |
1977 | { | 2099 | { |
2100 | journal_block_tag_t tag; | ||
2101 | size_t x = 0; | ||
2102 | |||
2103 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
2104 | x += sizeof(tag.t_checksum); | ||
2105 | |||
1978 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) | 2106 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) |
1979 | return JBD2_TAG_SIZE64; | 2107 | return x + JBD2_TAG_SIZE64; |
1980 | else | 2108 | else |
1981 | return JBD2_TAG_SIZE32; | 2109 | return x + JBD2_TAG_SIZE32; |
1982 | } | 2110 | } |
1983 | 2111 | ||
1984 | /* | 2112 | /* |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index c1a03354a22f..0131e4362534 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -174,6 +174,25 @@ static int jread(struct buffer_head **bhp, journal_t *journal, | |||
174 | return 0; | 174 | return 0; |
175 | } | 175 | } |
176 | 176 | ||
177 | static int jbd2_descr_block_csum_verify(journal_t *j, | ||
178 | void *buf) | ||
179 | { | ||
180 | struct jbd2_journal_block_tail *tail; | ||
181 | __u32 provided, calculated; | ||
182 | |||
183 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
184 | return 1; | ||
185 | |||
186 | tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - | ||
187 | sizeof(struct jbd2_journal_block_tail)); | ||
188 | provided = tail->t_checksum; | ||
189 | tail->t_checksum = 0; | ||
190 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); | ||
191 | tail->t_checksum = provided; | ||
192 | |||
193 | provided = be32_to_cpu(provided); | ||
194 | return provided == calculated; | ||
195 | } | ||
177 | 196 | ||
178 | /* | 197 | /* |
179 | * Count the number of in-use tags in a journal descriptor block. | 198 | * Count the number of in-use tags in a journal descriptor block. |
@@ -186,6 +205,9 @@ static int count_tags(journal_t *journal, struct buffer_head *bh) | |||
186 | int nr = 0, size = journal->j_blocksize; | 205 | int nr = 0, size = journal->j_blocksize; |
187 | int tag_bytes = journal_tag_bytes(journal); | 206 | int tag_bytes = journal_tag_bytes(journal); |
188 | 207 | ||
208 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
209 | size -= sizeof(struct jbd2_journal_block_tail); | ||
210 | |||
189 | tagp = &bh->b_data[sizeof(journal_header_t)]; | 211 | tagp = &bh->b_data[sizeof(journal_header_t)]; |
190 | 212 | ||
191 | while ((tagp - bh->b_data + tag_bytes) <= size) { | 213 | while ((tagp - bh->b_data + tag_bytes) <= size) { |
@@ -193,10 +215,10 @@ static int count_tags(journal_t *journal, struct buffer_head *bh) | |||
193 | 215 | ||
194 | nr++; | 216 | nr++; |
195 | tagp += tag_bytes; | 217 | tagp += tag_bytes; |
196 | if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID))) | 218 | if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) |
197 | tagp += 16; | 219 | tagp += 16; |
198 | 220 | ||
199 | if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG)) | 221 | if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) |
200 | break; | 222 | break; |
201 | } | 223 | } |
202 | 224 | ||
@@ -353,6 +375,41 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh, | |||
353 | return 0; | 375 | return 0; |
354 | } | 376 | } |
355 | 377 | ||
378 | static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) | ||
379 | { | ||
380 | struct commit_header *h; | ||
381 | __u32 provided, calculated; | ||
382 | |||
383 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
384 | return 1; | ||
385 | |||
386 | h = buf; | ||
387 | provided = h->h_chksum[0]; | ||
388 | h->h_chksum[0] = 0; | ||
389 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); | ||
390 | h->h_chksum[0] = provided; | ||
391 | |||
392 | provided = be32_to_cpu(provided); | ||
393 | return provided == calculated; | ||
394 | } | ||
395 | |||
396 | static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, | ||
397 | void *buf, __u32 sequence) | ||
398 | { | ||
399 | __u32 provided, calculated; | ||
400 | |||
401 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
402 | return 1; | ||
403 | |||
404 | sequence = cpu_to_be32(sequence); | ||
405 | calculated = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, | ||
406 | sizeof(sequence)); | ||
407 | calculated = jbd2_chksum(j, calculated, buf, j->j_blocksize); | ||
408 | provided = be32_to_cpu(tag->t_checksum); | ||
409 | |||
410 | return provided == cpu_to_be32(calculated); | ||
411 | } | ||
412 | |||
356 | static int do_one_pass(journal_t *journal, | 413 | static int do_one_pass(journal_t *journal, |
357 | struct recovery_info *info, enum passtype pass) | 414 | struct recovery_info *info, enum passtype pass) |
358 | { | 415 | { |
@@ -366,6 +423,7 @@ static int do_one_pass(journal_t *journal, | |||
366 | int blocktype; | 423 | int blocktype; |
367 | int tag_bytes = journal_tag_bytes(journal); | 424 | int tag_bytes = journal_tag_bytes(journal); |
368 | __u32 crc32_sum = ~0; /* Transactional Checksums */ | 425 | __u32 crc32_sum = ~0; /* Transactional Checksums */ |
426 | int descr_csum_size = 0; | ||
369 | 427 | ||
370 | /* | 428 | /* |
371 | * First thing is to establish what we expect to find in the log | 429 | * First thing is to establish what we expect to find in the log |
@@ -451,6 +509,18 @@ static int do_one_pass(journal_t *journal, | |||
451 | 509 | ||
452 | switch(blocktype) { | 510 | switch(blocktype) { |
453 | case JBD2_DESCRIPTOR_BLOCK: | 511 | case JBD2_DESCRIPTOR_BLOCK: |
512 | /* Verify checksum first */ | ||
513 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | ||
514 | JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
515 | descr_csum_size = | ||
516 | sizeof(struct jbd2_journal_block_tail); | ||
517 | if (descr_csum_size > 0 && | ||
518 | !jbd2_descr_block_csum_verify(journal, | ||
519 | bh->b_data)) { | ||
520 | err = -EIO; | ||
521 | goto failed; | ||
522 | } | ||
523 | |||
454 | /* If it is a valid descriptor block, replay it | 524 | /* If it is a valid descriptor block, replay it |
455 | * in pass REPLAY; if journal_checksums enabled, then | 525 | * in pass REPLAY; if journal_checksums enabled, then |
456 | * calculate checksums in PASS_SCAN, otherwise, | 526 | * calculate checksums in PASS_SCAN, otherwise, |
@@ -481,11 +551,11 @@ static int do_one_pass(journal_t *journal, | |||
481 | 551 | ||
482 | tagp = &bh->b_data[sizeof(journal_header_t)]; | 552 | tagp = &bh->b_data[sizeof(journal_header_t)]; |
483 | while ((tagp - bh->b_data + tag_bytes) | 553 | while ((tagp - bh->b_data + tag_bytes) |
484 | <= journal->j_blocksize) { | 554 | <= journal->j_blocksize - descr_csum_size) { |
485 | unsigned long io_block; | 555 | unsigned long io_block; |
486 | 556 | ||
487 | tag = (journal_block_tag_t *) tagp; | 557 | tag = (journal_block_tag_t *) tagp; |
488 | flags = be32_to_cpu(tag->t_flags); | 558 | flags = be16_to_cpu(tag->t_flags); |
489 | 559 | ||
490 | io_block = next_log_block++; | 560 | io_block = next_log_block++; |
491 | wrap(journal, next_log_block); | 561 | wrap(journal, next_log_block); |
@@ -516,6 +586,19 @@ static int do_one_pass(journal_t *journal, | |||
516 | goto skip_write; | 586 | goto skip_write; |
517 | } | 587 | } |
518 | 588 | ||
589 | /* Look for block corruption */ | ||
590 | if (!jbd2_block_tag_csum_verify( | ||
591 | journal, tag, obh->b_data, | ||
592 | be32_to_cpu(tmp->h_sequence))) { | ||
593 | brelse(obh); | ||
594 | success = -EIO; | ||
595 | printk(KERN_ERR "JBD: Invalid " | ||
596 | "checksum recovering " | ||
597 | "block %llu in log\n", | ||
598 | blocknr); | ||
599 | continue; | ||
600 | } | ||
601 | |||
519 | /* Find a buffer for the new | 602 | /* Find a buffer for the new |
520 | * data being restored */ | 603 | * data being restored */ |
521 | nbh = __getblk(journal->j_fs_dev, | 604 | nbh = __getblk(journal->j_fs_dev, |
@@ -650,6 +733,19 @@ static int do_one_pass(journal_t *journal, | |||
650 | } | 733 | } |
651 | crc32_sum = ~0; | 734 | crc32_sum = ~0; |
652 | } | 735 | } |
736 | if (pass == PASS_SCAN && | ||
737 | !jbd2_commit_block_csum_verify(journal, | ||
738 | bh->b_data)) { | ||
739 | info->end_transaction = next_commit_ID; | ||
740 | |||
741 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | ||
742 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | ||
743 | journal->j_failed_commit = | ||
744 | next_commit_ID; | ||
745 | brelse(bh); | ||
746 | break; | ||
747 | } | ||
748 | } | ||
653 | brelse(bh); | 749 | brelse(bh); |
654 | next_commit_ID++; | 750 | next_commit_ID++; |
655 | continue; | 751 | continue; |
@@ -706,6 +802,25 @@ static int do_one_pass(journal_t *journal, | |||
706 | return err; | 802 | return err; |
707 | } | 803 | } |
708 | 804 | ||
805 | static int jbd2_revoke_block_csum_verify(journal_t *j, | ||
806 | void *buf) | ||
807 | { | ||
808 | struct jbd2_journal_revoke_tail *tail; | ||
809 | __u32 provided, calculated; | ||
810 | |||
811 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
812 | return 1; | ||
813 | |||
814 | tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize - | ||
815 | sizeof(struct jbd2_journal_revoke_tail)); | ||
816 | provided = tail->r_checksum; | ||
817 | tail->r_checksum = 0; | ||
818 | calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); | ||
819 | tail->r_checksum = provided; | ||
820 | |||
821 | provided = be32_to_cpu(provided); | ||
822 | return provided == calculated; | ||
823 | } | ||
709 | 824 | ||
710 | /* Scan a revoke record, marking all blocks mentioned as revoked. */ | 825 | /* Scan a revoke record, marking all blocks mentioned as revoked. */ |
711 | 826 | ||
@@ -720,6 +835,9 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, | |||
720 | offset = sizeof(jbd2_journal_revoke_header_t); | 835 | offset = sizeof(jbd2_journal_revoke_header_t); |
721 | max = be32_to_cpu(header->r_count); | 836 | max = be32_to_cpu(header->r_count); |
722 | 837 | ||
838 | if (!jbd2_revoke_block_csum_verify(journal, header)) | ||
839 | return -EINVAL; | ||
840 | |||
723 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) | 841 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) |
724 | record_len = 8; | 842 | record_len = 8; |
725 | 843 | ||
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 6973705d6a3d..f30b80b4ce8b 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -578,6 +578,7 @@ static void write_one_revoke_record(journal_t *journal, | |||
578 | struct jbd2_revoke_record_s *record, | 578 | struct jbd2_revoke_record_s *record, |
579 | int write_op) | 579 | int write_op) |
580 | { | 580 | { |
581 | int csum_size = 0; | ||
581 | struct journal_head *descriptor; | 582 | struct journal_head *descriptor; |
582 | int offset; | 583 | int offset; |
583 | journal_header_t *header; | 584 | journal_header_t *header; |
@@ -592,9 +593,13 @@ static void write_one_revoke_record(journal_t *journal, | |||
592 | descriptor = *descriptorp; | 593 | descriptor = *descriptorp; |
593 | offset = *offsetp; | 594 | offset = *offsetp; |
594 | 595 | ||
596 | /* Do we need to leave space at the end for a checksum? */ | ||
597 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
598 | csum_size = sizeof(struct jbd2_journal_revoke_tail); | ||
599 | |||
595 | /* Make sure we have a descriptor with space left for the record */ | 600 | /* Make sure we have a descriptor with space left for the record */ |
596 | if (descriptor) { | 601 | if (descriptor) { |
597 | if (offset == journal->j_blocksize) { | 602 | if (offset >= journal->j_blocksize - csum_size) { |
598 | flush_descriptor(journal, descriptor, offset, write_op); | 603 | flush_descriptor(journal, descriptor, offset, write_op); |
599 | descriptor = NULL; | 604 | descriptor = NULL; |
600 | } | 605 | } |
@@ -631,6 +636,24 @@ static void write_one_revoke_record(journal_t *journal, | |||
631 | *offsetp = offset; | 636 | *offsetp = offset; |
632 | } | 637 | } |
633 | 638 | ||
639 | static void jbd2_revoke_csum_set(journal_t *j, | ||
640 | struct journal_head *descriptor) | ||
641 | { | ||
642 | struct jbd2_journal_revoke_tail *tail; | ||
643 | __u32 csum; | ||
644 | |||
645 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | ||
646 | return; | ||
647 | |||
648 | tail = (struct jbd2_journal_revoke_tail *) | ||
649 | (jh2bh(descriptor)->b_data + j->j_blocksize - | ||
650 | sizeof(struct jbd2_journal_revoke_tail)); | ||
651 | tail->r_checksum = 0; | ||
652 | csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, | ||
653 | j->j_blocksize); | ||
654 | tail->r_checksum = cpu_to_be32(csum); | ||
655 | } | ||
656 | |||
634 | /* | 657 | /* |
635 | * Flush a revoke descriptor out to the journal. If we are aborting, | 658 | * Flush a revoke descriptor out to the journal. If we are aborting, |
636 | * this is a noop; otherwise we are generating a buffer which needs to | 659 | * this is a noop; otherwise we are generating a buffer which needs to |
@@ -652,6 +675,8 @@ static void flush_descriptor(journal_t *journal, | |||
652 | 675 | ||
653 | header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data; | 676 | header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data; |
654 | header->r_count = cpu_to_be32(offset); | 677 | header->r_count = cpu_to_be32(offset); |
678 | jbd2_revoke_csum_set(journal, descriptor); | ||
679 | |||
655 | set_buffer_jwrite(bh); | 680 | set_buffer_jwrite(bh); |
656 | BUFFER_TRACE(bh, "write"); | 681 | BUFFER_TRACE(bh, "write"); |
657 | set_buffer_dirty(bh); | 682 | set_buffer_dirty(bh); |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index ddcd3549c6c2..fb1ab9533b67 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -162,8 +162,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle, | |||
162 | 162 | ||
163 | alloc_transaction: | 163 | alloc_transaction: |
164 | if (!journal->j_running_transaction) { | 164 | if (!journal->j_running_transaction) { |
165 | new_transaction = kmem_cache_alloc(transaction_cache, | 165 | new_transaction = kmem_cache_zalloc(transaction_cache, |
166 | gfp_mask | __GFP_ZERO); | 166 | gfp_mask); |
167 | if (!new_transaction) { | 167 | if (!new_transaction) { |
168 | /* | 168 | /* |
169 | * If __GFP_FS is not present, then we may be | 169 | * If __GFP_FS is not present, then we may be |
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index 55a0c1dceadf..44dca1f041c5 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h | |||
@@ -126,6 +126,10 @@ struct jffs2_sb_info { | |||
126 | struct jffs2_inodirty *wbuf_inodes; | 126 | struct jffs2_inodirty *wbuf_inodes; |
127 | struct rw_semaphore wbuf_sem; /* Protects the write buffer */ | 127 | struct rw_semaphore wbuf_sem; /* Protects the write buffer */ |
128 | 128 | ||
129 | struct delayed_work wbuf_dwork; /* write-buffer write-out work */ | ||
130 | int wbuf_queued; /* non-zero delayed work is queued */ | ||
131 | spinlock_t wbuf_dwork_lock; /* protects wbuf_dwork and and wbuf_queued */ | ||
132 | |||
129 | unsigned char *oobbuf; | 133 | unsigned char *oobbuf; |
130 | int oobavail; /* How many bytes are available for JFFS2 in OOB */ | 134 | int oobavail; /* How many bytes are available for JFFS2 in OOB */ |
131 | #endif | 135 | #endif |
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 1cd3aec9d9ae..bcd983d7e7f9 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h | |||
@@ -95,6 +95,7 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) | |||
95 | #define jffs2_ubivol(c) (0) | 95 | #define jffs2_ubivol(c) (0) |
96 | #define jffs2_ubivol_setup(c) (0) | 96 | #define jffs2_ubivol_setup(c) (0) |
97 | #define jffs2_ubivol_cleanup(c) do {} while (0) | 97 | #define jffs2_ubivol_cleanup(c) do {} while (0) |
98 | #define jffs2_dirty_trigger(c) do {} while (0) | ||
98 | 99 | ||
99 | #else /* NAND and/or ECC'd NOR support present */ | 100 | #else /* NAND and/or ECC'd NOR support present */ |
100 | 101 | ||
@@ -135,14 +136,10 @@ void jffs2_ubivol_cleanup(struct jffs2_sb_info *c); | |||
135 | #define jffs2_nor_wbuf_flash(c) (c->mtd->type == MTD_NORFLASH && ! (c->mtd->flags & MTD_BIT_WRITEABLE)) | 136 | #define jffs2_nor_wbuf_flash(c) (c->mtd->type == MTD_NORFLASH && ! (c->mtd->flags & MTD_BIT_WRITEABLE)) |
136 | int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c); | 137 | int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c); |
137 | void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c); | 138 | void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c); |
139 | void jffs2_dirty_trigger(struct jffs2_sb_info *c); | ||
138 | 140 | ||
139 | #endif /* WRITEBUFFER */ | 141 | #endif /* WRITEBUFFER */ |
140 | 142 | ||
141 | static inline void jffs2_dirty_trigger(struct jffs2_sb_info *c) | ||
142 | { | ||
143 | OFNI_BS_2SFFJ(c)->s_dirt = 1; | ||
144 | } | ||
145 | |||
146 | /* background.c */ | 143 | /* background.c */ |
147 | int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c); | 144 | int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c); |
148 | void jffs2_stop_garbage_collect_thread(struct jffs2_sb_info *c); | 145 | void jffs2_stop_garbage_collect_thread(struct jffs2_sb_info *c); |
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index f9916f312bd8..bc586f204228 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c | |||
@@ -63,21 +63,6 @@ static void jffs2_i_init_once(void *foo) | |||
63 | inode_init_once(&f->vfs_inode); | 63 | inode_init_once(&f->vfs_inode); |
64 | } | 64 | } |
65 | 65 | ||
66 | static void jffs2_write_super(struct super_block *sb) | ||
67 | { | ||
68 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); | ||
69 | |||
70 | lock_super(sb); | ||
71 | sb->s_dirt = 0; | ||
72 | |||
73 | if (!(sb->s_flags & MS_RDONLY)) { | ||
74 | jffs2_dbg(1, "%s()\n", __func__); | ||
75 | jffs2_flush_wbuf_gc(c, 0); | ||
76 | } | ||
77 | |||
78 | unlock_super(sb); | ||
79 | } | ||
80 | |||
81 | static const char *jffs2_compr_name(unsigned int compr) | 66 | static const char *jffs2_compr_name(unsigned int compr) |
82 | { | 67 | { |
83 | switch (compr) { | 68 | switch (compr) { |
@@ -113,8 +98,6 @@ static int jffs2_sync_fs(struct super_block *sb, int wait) | |||
113 | { | 98 | { |
114 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); | 99 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); |
115 | 100 | ||
116 | jffs2_write_super(sb); | ||
117 | |||
118 | mutex_lock(&c->alloc_sem); | 101 | mutex_lock(&c->alloc_sem); |
119 | jffs2_flush_wbuf_pad(c); | 102 | jffs2_flush_wbuf_pad(c); |
120 | mutex_unlock(&c->alloc_sem); | 103 | mutex_unlock(&c->alloc_sem); |
@@ -251,7 +234,6 @@ static const struct super_operations jffs2_super_operations = | |||
251 | .alloc_inode = jffs2_alloc_inode, | 234 | .alloc_inode = jffs2_alloc_inode, |
252 | .destroy_inode =jffs2_destroy_inode, | 235 | .destroy_inode =jffs2_destroy_inode, |
253 | .put_super = jffs2_put_super, | 236 | .put_super = jffs2_put_super, |
254 | .write_super = jffs2_write_super, | ||
255 | .statfs = jffs2_statfs, | 237 | .statfs = jffs2_statfs, |
256 | .remount_fs = jffs2_remount_fs, | 238 | .remount_fs = jffs2_remount_fs, |
257 | .evict_inode = jffs2_evict_inode, | 239 | .evict_inode = jffs2_evict_inode, |
@@ -319,9 +301,6 @@ static void jffs2_put_super (struct super_block *sb) | |||
319 | 301 | ||
320 | jffs2_dbg(2, "%s()\n", __func__); | 302 | jffs2_dbg(2, "%s()\n", __func__); |
321 | 303 | ||
322 | if (sb->s_dirt) | ||
323 | jffs2_write_super(sb); | ||
324 | |||
325 | mutex_lock(&c->alloc_sem); | 304 | mutex_lock(&c->alloc_sem); |
326 | jffs2_flush_wbuf_pad(c); | 305 | jffs2_flush_wbuf_pad(c); |
327 | mutex_unlock(&c->alloc_sem); | 306 | mutex_unlock(&c->alloc_sem); |
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 74d9be19df3f..6f4529d3697f 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/mtd/nand.h> | 20 | #include <linux/mtd/nand.h> |
21 | #include <linux/jiffies.h> | 21 | #include <linux/jiffies.h> |
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/writeback.h> | ||
23 | 24 | ||
24 | #include "nodelist.h" | 25 | #include "nodelist.h" |
25 | 26 | ||
@@ -85,7 +86,7 @@ static void jffs2_wbuf_dirties_inode(struct jffs2_sb_info *c, uint32_t ino) | |||
85 | { | 86 | { |
86 | struct jffs2_inodirty *new; | 87 | struct jffs2_inodirty *new; |
87 | 88 | ||
88 | /* Mark the superblock dirty so that kupdated will flush... */ | 89 | /* Schedule delayed write-buffer write-out */ |
89 | jffs2_dirty_trigger(c); | 90 | jffs2_dirty_trigger(c); |
90 | 91 | ||
91 | if (jffs2_wbuf_pending_for_ino(c, ino)) | 92 | if (jffs2_wbuf_pending_for_ino(c, ino)) |
@@ -1148,6 +1149,47 @@ int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock * | |||
1148 | return 1; | 1149 | return 1; |
1149 | } | 1150 | } |
1150 | 1151 | ||
1152 | static struct jffs2_sb_info *work_to_sb(struct work_struct *work) | ||
1153 | { | ||
1154 | struct delayed_work *dwork; | ||
1155 | |||
1156 | dwork = container_of(work, struct delayed_work, work); | ||
1157 | return container_of(dwork, struct jffs2_sb_info, wbuf_dwork); | ||
1158 | } | ||
1159 | |||
1160 | static void delayed_wbuf_sync(struct work_struct *work) | ||
1161 | { | ||
1162 | struct jffs2_sb_info *c = work_to_sb(work); | ||
1163 | struct super_block *sb = OFNI_BS_2SFFJ(c); | ||
1164 | |||
1165 | spin_lock(&c->wbuf_dwork_lock); | ||
1166 | c->wbuf_queued = 0; | ||
1167 | spin_unlock(&c->wbuf_dwork_lock); | ||
1168 | |||
1169 | if (!(sb->s_flags & MS_RDONLY)) { | ||
1170 | jffs2_dbg(1, "%s()\n", __func__); | ||
1171 | jffs2_flush_wbuf_gc(c, 0); | ||
1172 | } | ||
1173 | } | ||
1174 | |||
1175 | void jffs2_dirty_trigger(struct jffs2_sb_info *c) | ||
1176 | { | ||
1177 | struct super_block *sb = OFNI_BS_2SFFJ(c); | ||
1178 | unsigned long delay; | ||
1179 | |||
1180 | if (sb->s_flags & MS_RDONLY) | ||
1181 | return; | ||
1182 | |||
1183 | spin_lock(&c->wbuf_dwork_lock); | ||
1184 | if (!c->wbuf_queued) { | ||
1185 | jffs2_dbg(1, "%s()\n", __func__); | ||
1186 | delay = msecs_to_jiffies(dirty_writeback_interval * 10); | ||
1187 | queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay); | ||
1188 | c->wbuf_queued = 1; | ||
1189 | } | ||
1190 | spin_unlock(&c->wbuf_dwork_lock); | ||
1191 | } | ||
1192 | |||
1151 | int jffs2_nand_flash_setup(struct jffs2_sb_info *c) | 1193 | int jffs2_nand_flash_setup(struct jffs2_sb_info *c) |
1152 | { | 1194 | { |
1153 | struct nand_ecclayout *oinfo = c->mtd->ecclayout; | 1195 | struct nand_ecclayout *oinfo = c->mtd->ecclayout; |
@@ -1169,6 +1211,8 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c) | |||
1169 | 1211 | ||
1170 | /* Initialise write buffer */ | 1212 | /* Initialise write buffer */ |
1171 | init_rwsem(&c->wbuf_sem); | 1213 | init_rwsem(&c->wbuf_sem); |
1214 | spin_lock_init(&c->wbuf_dwork_lock); | ||
1215 | INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); | ||
1172 | c->wbuf_pagesize = c->mtd->writesize; | 1216 | c->wbuf_pagesize = c->mtd->writesize; |
1173 | c->wbuf_ofs = 0xFFFFFFFF; | 1217 | c->wbuf_ofs = 0xFFFFFFFF; |
1174 | 1218 | ||
@@ -1207,8 +1251,8 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) { | |||
1207 | 1251 | ||
1208 | /* Initialize write buffer */ | 1252 | /* Initialize write buffer */ |
1209 | init_rwsem(&c->wbuf_sem); | 1253 | init_rwsem(&c->wbuf_sem); |
1210 | 1254 | spin_lock_init(&c->wbuf_dwork_lock); | |
1211 | 1255 | INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); | |
1212 | c->wbuf_pagesize = c->mtd->erasesize; | 1256 | c->wbuf_pagesize = c->mtd->erasesize; |
1213 | 1257 | ||
1214 | /* Find a suitable c->sector_size | 1258 | /* Find a suitable c->sector_size |
@@ -1267,6 +1311,9 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) { | |||
1267 | 1311 | ||
1268 | /* Initialize write buffer */ | 1312 | /* Initialize write buffer */ |
1269 | init_rwsem(&c->wbuf_sem); | 1313 | init_rwsem(&c->wbuf_sem); |
1314 | spin_lock_init(&c->wbuf_dwork_lock); | ||
1315 | INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); | ||
1316 | |||
1270 | c->wbuf_pagesize = c->mtd->writesize; | 1317 | c->wbuf_pagesize = c->mtd->writesize; |
1271 | c->wbuf_ofs = 0xFFFFFFFF; | 1318 | c->wbuf_ofs = 0xFFFFFFFF; |
1272 | 1319 | ||
@@ -1299,6 +1346,8 @@ int jffs2_ubivol_setup(struct jffs2_sb_info *c) { | |||
1299 | return 0; | 1346 | return 0; |
1300 | 1347 | ||
1301 | init_rwsem(&c->wbuf_sem); | 1348 | init_rwsem(&c->wbuf_sem); |
1349 | spin_lock_init(&c->wbuf_dwork_lock); | ||
1350 | INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); | ||
1302 | 1351 | ||
1303 | c->wbuf_pagesize = c->mtd->writesize; | 1352 | c->wbuf_pagesize = c->mtd->writesize; |
1304 | c->wbuf_ofs = 0xFFFFFFFF; | 1353 | c->wbuf_ofs = 0xFFFFFFFF; |
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 1ead0750cdbb..80938fda67e0 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
@@ -251,39 +251,40 @@ out_err: | |||
251 | return err; | 251 | return err; |
252 | } | 252 | } |
253 | 253 | ||
254 | static int lockd_up_net(struct net *net) | 254 | static int lockd_up_net(struct svc_serv *serv, struct net *net) |
255 | { | 255 | { |
256 | struct lockd_net *ln = net_generic(net, lockd_net_id); | 256 | struct lockd_net *ln = net_generic(net, lockd_net_id); |
257 | struct svc_serv *serv = nlmsvc_rqst->rq_server; | ||
258 | int error; | 257 | int error; |
259 | 258 | ||
260 | if (ln->nlmsvc_users) | 259 | if (ln->nlmsvc_users++) |
261 | return 0; | 260 | return 0; |
262 | 261 | ||
263 | error = svc_rpcb_setup(serv, net); | 262 | error = svc_bind(serv, net); |
264 | if (error) | 263 | if (error) |
265 | goto err_rpcb; | 264 | goto err_bind; |
266 | 265 | ||
267 | error = make_socks(serv, net); | 266 | error = make_socks(serv, net); |
268 | if (error < 0) | 267 | if (error < 0) |
269 | goto err_socks; | 268 | goto err_socks; |
269 | dprintk("lockd_up_net: per-net data created; net=%p\n", net); | ||
270 | return 0; | 270 | return 0; |
271 | 271 | ||
272 | err_socks: | 272 | err_socks: |
273 | svc_rpcb_cleanup(serv, net); | 273 | svc_rpcb_cleanup(serv, net); |
274 | err_rpcb: | 274 | err_bind: |
275 | ln->nlmsvc_users--; | ||
275 | return error; | 276 | return error; |
276 | } | 277 | } |
277 | 278 | ||
278 | static void lockd_down_net(struct net *net) | 279 | static void lockd_down_net(struct svc_serv *serv, struct net *net) |
279 | { | 280 | { |
280 | struct lockd_net *ln = net_generic(net, lockd_net_id); | 281 | struct lockd_net *ln = net_generic(net, lockd_net_id); |
281 | struct svc_serv *serv = nlmsvc_rqst->rq_server; | ||
282 | 282 | ||
283 | if (ln->nlmsvc_users) { | 283 | if (ln->nlmsvc_users) { |
284 | if (--ln->nlmsvc_users == 0) { | 284 | if (--ln->nlmsvc_users == 0) { |
285 | nlm_shutdown_hosts_net(net); | 285 | nlm_shutdown_hosts_net(net); |
286 | svc_shutdown_net(serv, net); | 286 | svc_shutdown_net(serv, net); |
287 | dprintk("lockd_down_net: per-net data destroyed; net=%p\n", net); | ||
287 | } | 288 | } |
288 | } else { | 289 | } else { |
289 | printk(KERN_ERR "lockd_down_net: no users! task=%p, net=%p\n", | 290 | printk(KERN_ERR "lockd_down_net: no users! task=%p, net=%p\n", |
@@ -292,21 +293,60 @@ static void lockd_down_net(struct net *net) | |||
292 | } | 293 | } |
293 | } | 294 | } |
294 | 295 | ||
295 | /* | 296 | static int lockd_start_svc(struct svc_serv *serv) |
296 | * Bring up the lockd process if it's not already up. | 297 | { |
297 | */ | 298 | int error; |
298 | int lockd_up(struct net *net) | 299 | |
300 | if (nlmsvc_rqst) | ||
301 | return 0; | ||
302 | |||
303 | /* | ||
304 | * Create the kernel thread and wait for it to start. | ||
305 | */ | ||
306 | nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); | ||
307 | if (IS_ERR(nlmsvc_rqst)) { | ||
308 | error = PTR_ERR(nlmsvc_rqst); | ||
309 | printk(KERN_WARNING | ||
310 | "lockd_up: svc_rqst allocation failed, error=%d\n", | ||
311 | error); | ||
312 | goto out_rqst; | ||
313 | } | ||
314 | |||
315 | svc_sock_update_bufs(serv); | ||
316 | serv->sv_maxconn = nlm_max_connections; | ||
317 | |||
318 | nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name); | ||
319 | if (IS_ERR(nlmsvc_task)) { | ||
320 | error = PTR_ERR(nlmsvc_task); | ||
321 | printk(KERN_WARNING | ||
322 | "lockd_up: kthread_run failed, error=%d\n", error); | ||
323 | goto out_task; | ||
324 | } | ||
325 | dprintk("lockd_up: service started\n"); | ||
326 | return 0; | ||
327 | |||
328 | out_task: | ||
329 | svc_exit_thread(nlmsvc_rqst); | ||
330 | nlmsvc_task = NULL; | ||
331 | out_rqst: | ||
332 | nlmsvc_rqst = NULL; | ||
333 | return error; | ||
334 | } | ||
335 | |||
336 | static struct svc_serv *lockd_create_svc(void) | ||
299 | { | 337 | { |
300 | struct svc_serv *serv; | 338 | struct svc_serv *serv; |
301 | int error = 0; | ||
302 | 339 | ||
303 | mutex_lock(&nlmsvc_mutex); | ||
304 | /* | 340 | /* |
305 | * Check whether we're already up and running. | 341 | * Check whether we're already up and running. |
306 | */ | 342 | */ |
307 | if (nlmsvc_rqst) { | 343 | if (nlmsvc_rqst) { |
308 | error = lockd_up_net(net); | 344 | /* |
309 | goto out; | 345 | * Note: increase service usage, because later in case of error |
346 | * svc_destroy() will be called. | ||
347 | */ | ||
348 | svc_get(nlmsvc_rqst->rq_server); | ||
349 | return nlmsvc_rqst->rq_server; | ||
310 | } | 350 | } |
311 | 351 | ||
312 | /* | 352 | /* |
@@ -317,59 +357,53 @@ int lockd_up(struct net *net) | |||
317 | printk(KERN_WARNING | 357 | printk(KERN_WARNING |
318 | "lockd_up: no pid, %d users??\n", nlmsvc_users); | 358 | "lockd_up: no pid, %d users??\n", nlmsvc_users); |
319 | 359 | ||
320 | error = -ENOMEM; | ||
321 | serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL); | 360 | serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL); |
322 | if (!serv) { | 361 | if (!serv) { |
323 | printk(KERN_WARNING "lockd_up: create service failed\n"); | 362 | printk(KERN_WARNING "lockd_up: create service failed\n"); |
324 | goto out; | 363 | return ERR_PTR(-ENOMEM); |
325 | } | 364 | } |
365 | dprintk("lockd_up: service created\n"); | ||
366 | return serv; | ||
367 | } | ||
326 | 368 | ||
327 | error = make_socks(serv, net); | 369 | /* |
328 | if (error < 0) | 370 | * Bring up the lockd process if it's not already up. |
329 | goto destroy_and_out; | 371 | */ |
372 | int lockd_up(struct net *net) | ||
373 | { | ||
374 | struct svc_serv *serv; | ||
375 | int error; | ||
330 | 376 | ||
331 | /* | 377 | mutex_lock(&nlmsvc_mutex); |
332 | * Create the kernel thread and wait for it to start. | 378 | |
333 | */ | 379 | serv = lockd_create_svc(); |
334 | nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); | 380 | if (IS_ERR(serv)) { |
335 | if (IS_ERR(nlmsvc_rqst)) { | 381 | error = PTR_ERR(serv); |
336 | error = PTR_ERR(nlmsvc_rqst); | 382 | goto err_create; |
337 | nlmsvc_rqst = NULL; | ||
338 | printk(KERN_WARNING | ||
339 | "lockd_up: svc_rqst allocation failed, error=%d\n", | ||
340 | error); | ||
341 | goto destroy_and_out; | ||
342 | } | 383 | } |
343 | 384 | ||
344 | svc_sock_update_bufs(serv); | 385 | error = lockd_up_net(serv, net); |
345 | serv->sv_maxconn = nlm_max_connections; | 386 | if (error < 0) |
387 | goto err_net; | ||
346 | 388 | ||
347 | nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name); | 389 | error = lockd_start_svc(serv); |
348 | if (IS_ERR(nlmsvc_task)) { | 390 | if (error < 0) |
349 | error = PTR_ERR(nlmsvc_task); | 391 | goto err_start; |
350 | svc_exit_thread(nlmsvc_rqst); | ||
351 | nlmsvc_task = NULL; | ||
352 | nlmsvc_rqst = NULL; | ||
353 | printk(KERN_WARNING | ||
354 | "lockd_up: kthread_run failed, error=%d\n", error); | ||
355 | goto destroy_and_out; | ||
356 | } | ||
357 | 392 | ||
393 | nlmsvc_users++; | ||
358 | /* | 394 | /* |
359 | * Note: svc_serv structures have an initial use count of 1, | 395 | * Note: svc_serv structures have an initial use count of 1, |
360 | * so we exit through here on both success and failure. | 396 | * so we exit through here on both success and failure. |
361 | */ | 397 | */ |
362 | destroy_and_out: | 398 | err_net: |
363 | svc_destroy(serv); | 399 | svc_destroy(serv); |
364 | out: | 400 | err_create: |
365 | if (!error) { | ||
366 | struct lockd_net *ln = net_generic(net, lockd_net_id); | ||
367 | |||
368 | ln->nlmsvc_users++; | ||
369 | nlmsvc_users++; | ||
370 | } | ||
371 | mutex_unlock(&nlmsvc_mutex); | 401 | mutex_unlock(&nlmsvc_mutex); |
372 | return error; | 402 | return error; |
403 | |||
404 | err_start: | ||
405 | lockd_down_net(serv, net); | ||
406 | goto err_net; | ||
373 | } | 407 | } |
374 | EXPORT_SYMBOL_GPL(lockd_up); | 408 | EXPORT_SYMBOL_GPL(lockd_up); |
375 | 409 | ||
@@ -380,11 +414,10 @@ void | |||
380 | lockd_down(struct net *net) | 414 | lockd_down(struct net *net) |
381 | { | 415 | { |
382 | mutex_lock(&nlmsvc_mutex); | 416 | mutex_lock(&nlmsvc_mutex); |
417 | lockd_down_net(nlmsvc_rqst->rq_server, net); | ||
383 | if (nlmsvc_users) { | 418 | if (nlmsvc_users) { |
384 | if (--nlmsvc_users) { | 419 | if (--nlmsvc_users) |
385 | lockd_down_net(net); | ||
386 | goto out; | 420 | goto out; |
387 | } | ||
388 | } else { | 421 | } else { |
389 | printk(KERN_ERR "lockd_down: no users! task=%p\n", | 422 | printk(KERN_ERR "lockd_down: no users! task=%p\n", |
390 | nlmsvc_task); | 423 | nlmsvc_task); |
@@ -396,7 +429,9 @@ lockd_down(struct net *net) | |||
396 | BUG(); | 429 | BUG(); |
397 | } | 430 | } |
398 | kthread_stop(nlmsvc_task); | 431 | kthread_stop(nlmsvc_task); |
432 | dprintk("lockd_down: service stopped\n"); | ||
399 | svc_exit_thread(nlmsvc_rqst); | 433 | svc_exit_thread(nlmsvc_rqst); |
434 | dprintk("lockd_down: service destroyed\n"); | ||
400 | nlmsvc_task = NULL; | 435 | nlmsvc_task = NULL; |
401 | nlmsvc_rqst = NULL; | 436 | nlmsvc_rqst = NULL; |
402 | out: | 437 | out: |
diff --git a/fs/locks.c b/fs/locks.c index 4f441e46cef4..814c51d0de47 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -1636,12 +1636,13 @@ EXPORT_SYMBOL(flock_lock_file_wait); | |||
1636 | SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) | 1636 | SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) |
1637 | { | 1637 | { |
1638 | struct file *filp; | 1638 | struct file *filp; |
1639 | int fput_needed; | ||
1639 | struct file_lock *lock; | 1640 | struct file_lock *lock; |
1640 | int can_sleep, unlock; | 1641 | int can_sleep, unlock; |
1641 | int error; | 1642 | int error; |
1642 | 1643 | ||
1643 | error = -EBADF; | 1644 | error = -EBADF; |
1644 | filp = fget(fd); | 1645 | filp = fget_light(fd, &fput_needed); |
1645 | if (!filp) | 1646 | if (!filp) |
1646 | goto out; | 1647 | goto out; |
1647 | 1648 | ||
@@ -1674,7 +1675,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) | |||
1674 | locks_free_lock(lock); | 1675 | locks_free_lock(lock); |
1675 | 1676 | ||
1676 | out_putf: | 1677 | out_putf: |
1677 | fput(filp); | 1678 | fput_light(filp, fput_needed); |
1678 | out: | 1679 | out: |
1679 | return error; | 1680 | return error; |
1680 | } | 1681 | } |
diff --git a/fs/namei.c b/fs/namei.c index c651f02c9fec..7d694194024a 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -449,7 +449,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) | |||
449 | mntget(nd->path.mnt); | 449 | mntget(nd->path.mnt); |
450 | 450 | ||
451 | rcu_read_unlock(); | 451 | rcu_read_unlock(); |
452 | br_read_unlock(vfsmount_lock); | 452 | br_read_unlock(&vfsmount_lock); |
453 | nd->flags &= ~LOOKUP_RCU; | 453 | nd->flags &= ~LOOKUP_RCU; |
454 | return 0; | 454 | return 0; |
455 | 455 | ||
@@ -507,14 +507,14 @@ static int complete_walk(struct nameidata *nd) | |||
507 | if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { | 507 | if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { |
508 | spin_unlock(&dentry->d_lock); | 508 | spin_unlock(&dentry->d_lock); |
509 | rcu_read_unlock(); | 509 | rcu_read_unlock(); |
510 | br_read_unlock(vfsmount_lock); | 510 | br_read_unlock(&vfsmount_lock); |
511 | return -ECHILD; | 511 | return -ECHILD; |
512 | } | 512 | } |
513 | BUG_ON(nd->inode != dentry->d_inode); | 513 | BUG_ON(nd->inode != dentry->d_inode); |
514 | spin_unlock(&dentry->d_lock); | 514 | spin_unlock(&dentry->d_lock); |
515 | mntget(nd->path.mnt); | 515 | mntget(nd->path.mnt); |
516 | rcu_read_unlock(); | 516 | rcu_read_unlock(); |
517 | br_read_unlock(vfsmount_lock); | 517 | br_read_unlock(&vfsmount_lock); |
518 | } | 518 | } |
519 | 519 | ||
520 | if (likely(!(nd->flags & LOOKUP_JUMPED))) | 520 | if (likely(!(nd->flags & LOOKUP_JUMPED))) |
@@ -681,15 +681,15 @@ int follow_up(struct path *path) | |||
681 | struct mount *parent; | 681 | struct mount *parent; |
682 | struct dentry *mountpoint; | 682 | struct dentry *mountpoint; |
683 | 683 | ||
684 | br_read_lock(vfsmount_lock); | 684 | br_read_lock(&vfsmount_lock); |
685 | parent = mnt->mnt_parent; | 685 | parent = mnt->mnt_parent; |
686 | if (&parent->mnt == path->mnt) { | 686 | if (&parent->mnt == path->mnt) { |
687 | br_read_unlock(vfsmount_lock); | 687 | br_read_unlock(&vfsmount_lock); |
688 | return 0; | 688 | return 0; |
689 | } | 689 | } |
690 | mntget(&parent->mnt); | 690 | mntget(&parent->mnt); |
691 | mountpoint = dget(mnt->mnt_mountpoint); | 691 | mountpoint = dget(mnt->mnt_mountpoint); |
692 | br_read_unlock(vfsmount_lock); | 692 | br_read_unlock(&vfsmount_lock); |
693 | dput(path->dentry); | 693 | dput(path->dentry); |
694 | path->dentry = mountpoint; | 694 | path->dentry = mountpoint; |
695 | mntput(path->mnt); | 695 | mntput(path->mnt); |
@@ -947,7 +947,7 @@ failed: | |||
947 | if (!(nd->flags & LOOKUP_ROOT)) | 947 | if (!(nd->flags & LOOKUP_ROOT)) |
948 | nd->root.mnt = NULL; | 948 | nd->root.mnt = NULL; |
949 | rcu_read_unlock(); | 949 | rcu_read_unlock(); |
950 | br_read_unlock(vfsmount_lock); | 950 | br_read_unlock(&vfsmount_lock); |
951 | return -ECHILD; | 951 | return -ECHILD; |
952 | } | 952 | } |
953 | 953 | ||
@@ -1125,8 +1125,8 @@ static struct dentry *__lookup_hash(struct qstr *name, | |||
1125 | * small and for now I'd prefer to have fast path as straight as possible. | 1125 | * small and for now I'd prefer to have fast path as straight as possible. |
1126 | * It _is_ time-critical. | 1126 | * It _is_ time-critical. |
1127 | */ | 1127 | */ |
1128 | static int do_lookup(struct nameidata *nd, struct qstr *name, | 1128 | static int lookup_fast(struct nameidata *nd, struct qstr *name, |
1129 | struct path *path, struct inode **inode) | 1129 | struct path *path, struct inode **inode) |
1130 | { | 1130 | { |
1131 | struct vfsmount *mnt = nd->path.mnt; | 1131 | struct vfsmount *mnt = nd->path.mnt; |
1132 | struct dentry *dentry, *parent = nd->path.dentry; | 1132 | struct dentry *dentry, *parent = nd->path.dentry; |
@@ -1208,7 +1208,7 @@ unlazy: | |||
1208 | goto need_lookup; | 1208 | goto need_lookup; |
1209 | } | 1209 | } |
1210 | } | 1210 | } |
1211 | done: | 1211 | |
1212 | path->mnt = mnt; | 1212 | path->mnt = mnt; |
1213 | path->dentry = dentry; | 1213 | path->dentry = dentry; |
1214 | err = follow_managed(path, nd->flags); | 1214 | err = follow_managed(path, nd->flags); |
@@ -1222,6 +1222,17 @@ done: | |||
1222 | return 0; | 1222 | return 0; |
1223 | 1223 | ||
1224 | need_lookup: | 1224 | need_lookup: |
1225 | return 1; | ||
1226 | } | ||
1227 | |||
1228 | /* Fast lookup failed, do it the slow way */ | ||
1229 | static int lookup_slow(struct nameidata *nd, struct qstr *name, | ||
1230 | struct path *path) | ||
1231 | { | ||
1232 | struct dentry *dentry, *parent; | ||
1233 | int err; | ||
1234 | |||
1235 | parent = nd->path.dentry; | ||
1225 | BUG_ON(nd->inode != parent->d_inode); | 1236 | BUG_ON(nd->inode != parent->d_inode); |
1226 | 1237 | ||
1227 | mutex_lock(&parent->d_inode->i_mutex); | 1238 | mutex_lock(&parent->d_inode->i_mutex); |
@@ -1229,7 +1240,16 @@ need_lookup: | |||
1229 | mutex_unlock(&parent->d_inode->i_mutex); | 1240 | mutex_unlock(&parent->d_inode->i_mutex); |
1230 | if (IS_ERR(dentry)) | 1241 | if (IS_ERR(dentry)) |
1231 | return PTR_ERR(dentry); | 1242 | return PTR_ERR(dentry); |
1232 | goto done; | 1243 | path->mnt = nd->path.mnt; |
1244 | path->dentry = dentry; | ||
1245 | err = follow_managed(path, nd->flags); | ||
1246 | if (unlikely(err < 0)) { | ||
1247 | path_put_conditional(path, nd); | ||
1248 | return err; | ||
1249 | } | ||
1250 | if (err) | ||
1251 | nd->flags |= LOOKUP_JUMPED; | ||
1252 | return 0; | ||
1233 | } | 1253 | } |
1234 | 1254 | ||
1235 | static inline int may_lookup(struct nameidata *nd) | 1255 | static inline int may_lookup(struct nameidata *nd) |
@@ -1265,7 +1285,7 @@ static void terminate_walk(struct nameidata *nd) | |||
1265 | if (!(nd->flags & LOOKUP_ROOT)) | 1285 | if (!(nd->flags & LOOKUP_ROOT)) |
1266 | nd->root.mnt = NULL; | 1286 | nd->root.mnt = NULL; |
1267 | rcu_read_unlock(); | 1287 | rcu_read_unlock(); |
1268 | br_read_unlock(vfsmount_lock); | 1288 | br_read_unlock(&vfsmount_lock); |
1269 | } | 1289 | } |
1270 | } | 1290 | } |
1271 | 1291 | ||
@@ -1301,21 +1321,26 @@ static inline int walk_component(struct nameidata *nd, struct path *path, | |||
1301 | */ | 1321 | */ |
1302 | if (unlikely(type != LAST_NORM)) | 1322 | if (unlikely(type != LAST_NORM)) |
1303 | return handle_dots(nd, type); | 1323 | return handle_dots(nd, type); |
1304 | err = do_lookup(nd, name, path, &inode); | 1324 | err = lookup_fast(nd, name, path, &inode); |
1305 | if (unlikely(err)) { | 1325 | if (unlikely(err)) { |
1306 | terminate_walk(nd); | 1326 | if (err < 0) |
1307 | return err; | 1327 | goto out_err; |
1308 | } | 1328 | |
1309 | if (!inode) { | 1329 | err = lookup_slow(nd, name, path); |
1310 | path_to_nameidata(path, nd); | 1330 | if (err < 0) |
1311 | terminate_walk(nd); | 1331 | goto out_err; |
1312 | return -ENOENT; | 1332 | |
1333 | inode = path->dentry->d_inode; | ||
1313 | } | 1334 | } |
1335 | err = -ENOENT; | ||
1336 | if (!inode) | ||
1337 | goto out_path_put; | ||
1338 | |||
1314 | if (should_follow_link(inode, follow)) { | 1339 | if (should_follow_link(inode, follow)) { |
1315 | if (nd->flags & LOOKUP_RCU) { | 1340 | if (nd->flags & LOOKUP_RCU) { |
1316 | if (unlikely(unlazy_walk(nd, path->dentry))) { | 1341 | if (unlikely(unlazy_walk(nd, path->dentry))) { |
1317 | terminate_walk(nd); | 1342 | err = -ECHILD; |
1318 | return -ECHILD; | 1343 | goto out_err; |
1319 | } | 1344 | } |
1320 | } | 1345 | } |
1321 | BUG_ON(inode != path->dentry->d_inode); | 1346 | BUG_ON(inode != path->dentry->d_inode); |
@@ -1324,6 +1349,12 @@ static inline int walk_component(struct nameidata *nd, struct path *path, | |||
1324 | path_to_nameidata(path, nd); | 1349 | path_to_nameidata(path, nd); |
1325 | nd->inode = inode; | 1350 | nd->inode = inode; |
1326 | return 0; | 1351 | return 0; |
1352 | |||
1353 | out_path_put: | ||
1354 | path_to_nameidata(path, nd); | ||
1355 | out_err: | ||
1356 | terminate_walk(nd); | ||
1357 | return err; | ||
1327 | } | 1358 | } |
1328 | 1359 | ||
1329 | /* | 1360 | /* |
@@ -1620,7 +1651,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
1620 | nd->path = nd->root; | 1651 | nd->path = nd->root; |
1621 | nd->inode = inode; | 1652 | nd->inode = inode; |
1622 | if (flags & LOOKUP_RCU) { | 1653 | if (flags & LOOKUP_RCU) { |
1623 | br_read_lock(vfsmount_lock); | 1654 | br_read_lock(&vfsmount_lock); |
1624 | rcu_read_lock(); | 1655 | rcu_read_lock(); |
1625 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | 1656 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); |
1626 | } else { | 1657 | } else { |
@@ -1633,7 +1664,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
1633 | 1664 | ||
1634 | if (*name=='/') { | 1665 | if (*name=='/') { |
1635 | if (flags & LOOKUP_RCU) { | 1666 | if (flags & LOOKUP_RCU) { |
1636 | br_read_lock(vfsmount_lock); | 1667 | br_read_lock(&vfsmount_lock); |
1637 | rcu_read_lock(); | 1668 | rcu_read_lock(); |
1638 | set_root_rcu(nd); | 1669 | set_root_rcu(nd); |
1639 | } else { | 1670 | } else { |
@@ -1646,7 +1677,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
1646 | struct fs_struct *fs = current->fs; | 1677 | struct fs_struct *fs = current->fs; |
1647 | unsigned seq; | 1678 | unsigned seq; |
1648 | 1679 | ||
1649 | br_read_lock(vfsmount_lock); | 1680 | br_read_lock(&vfsmount_lock); |
1650 | rcu_read_lock(); | 1681 | rcu_read_lock(); |
1651 | 1682 | ||
1652 | do { | 1683 | do { |
@@ -1682,7 +1713,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
1682 | if (fput_needed) | 1713 | if (fput_needed) |
1683 | *fp = file; | 1714 | *fp = file; |
1684 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | 1715 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); |
1685 | br_read_lock(vfsmount_lock); | 1716 | br_read_lock(&vfsmount_lock); |
1686 | rcu_read_lock(); | 1717 | rcu_read_lock(); |
1687 | } else { | 1718 | } else { |
1688 | path_get(&file->f_path); | 1719 | path_get(&file->f_path); |
@@ -2169,6 +2200,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
2169 | int want_write = 0; | 2200 | int want_write = 0; |
2170 | int acc_mode = op->acc_mode; | 2201 | int acc_mode = op->acc_mode; |
2171 | struct file *filp; | 2202 | struct file *filp; |
2203 | struct inode *inode; | ||
2204 | int symlink_ok = 0; | ||
2205 | struct path save_parent = { .dentry = NULL, .mnt = NULL }; | ||
2206 | bool retried = false; | ||
2172 | int error; | 2207 | int error; |
2173 | 2208 | ||
2174 | nd->flags &= ~LOOKUP_PARENT; | 2209 | nd->flags &= ~LOOKUP_PARENT; |
@@ -2200,30 +2235,23 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
2200 | } | 2235 | } |
2201 | 2236 | ||
2202 | if (!(open_flag & O_CREAT)) { | 2237 | if (!(open_flag & O_CREAT)) { |
2203 | int symlink_ok = 0; | ||
2204 | if (nd->last.name[nd->last.len]) | 2238 | if (nd->last.name[nd->last.len]) |
2205 | nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; | 2239 | nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; |
2206 | if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) | 2240 | if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) |
2207 | symlink_ok = 1; | 2241 | symlink_ok = 1; |
2208 | /* we _can_ be in RCU mode here */ | 2242 | /* we _can_ be in RCU mode here */ |
2209 | error = walk_component(nd, path, &nd->last, LAST_NORM, | 2243 | error = lookup_fast(nd, &nd->last, path, &inode); |
2210 | !symlink_ok); | 2244 | if (unlikely(error)) { |
2211 | if (error < 0) | 2245 | if (error < 0) |
2212 | return ERR_PTR(error); | 2246 | goto exit; |
2213 | if (error) /* symlink */ | ||
2214 | return NULL; | ||
2215 | /* sayonara */ | ||
2216 | error = complete_walk(nd); | ||
2217 | if (error) | ||
2218 | return ERR_PTR(error); | ||
2219 | 2247 | ||
2220 | error = -ENOTDIR; | 2248 | error = lookup_slow(nd, &nd->last, path); |
2221 | if (nd->flags & LOOKUP_DIRECTORY) { | 2249 | if (error < 0) |
2222 | if (!nd->inode->i_op->lookup) | ||
2223 | goto exit; | 2250 | goto exit; |
2251 | |||
2252 | inode = path->dentry->d_inode; | ||
2224 | } | 2253 | } |
2225 | audit_inode(pathname, nd->path.dentry); | 2254 | goto finish_lookup; |
2226 | goto ok; | ||
2227 | } | 2255 | } |
2228 | 2256 | ||
2229 | /* create side of things */ | 2257 | /* create side of things */ |
@@ -2241,6 +2269,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
2241 | if (nd->last.name[nd->last.len]) | 2269 | if (nd->last.name[nd->last.len]) |
2242 | goto exit; | 2270 | goto exit; |
2243 | 2271 | ||
2272 | retry_lookup: | ||
2244 | mutex_lock(&dir->d_inode->i_mutex); | 2273 | mutex_lock(&dir->d_inode->i_mutex); |
2245 | 2274 | ||
2246 | dentry = lookup_hash(nd); | 2275 | dentry = lookup_hash(nd); |
@@ -2302,22 +2331,49 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
2302 | if (error) | 2331 | if (error) |
2303 | nd->flags |= LOOKUP_JUMPED; | 2332 | nd->flags |= LOOKUP_JUMPED; |
2304 | 2333 | ||
2334 | BUG_ON(nd->flags & LOOKUP_RCU); | ||
2335 | inode = path->dentry->d_inode; | ||
2336 | finish_lookup: | ||
2337 | /* we _can_ be in RCU mode here */ | ||
2305 | error = -ENOENT; | 2338 | error = -ENOENT; |
2306 | if (!path->dentry->d_inode) | 2339 | if (!inode) { |
2307 | goto exit_dput; | 2340 | path_to_nameidata(path, nd); |
2341 | goto exit; | ||
2342 | } | ||
2308 | 2343 | ||
2309 | if (path->dentry->d_inode->i_op->follow_link) | 2344 | if (should_follow_link(inode, !symlink_ok)) { |
2345 | if (nd->flags & LOOKUP_RCU) { | ||
2346 | if (unlikely(unlazy_walk(nd, path->dentry))) { | ||
2347 | error = -ECHILD; | ||
2348 | goto exit; | ||
2349 | } | ||
2350 | } | ||
2351 | BUG_ON(inode != path->dentry->d_inode); | ||
2310 | return NULL; | 2352 | return NULL; |
2353 | } | ||
2311 | 2354 | ||
2312 | path_to_nameidata(path, nd); | 2355 | if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) { |
2313 | nd->inode = path->dentry->d_inode; | 2356 | path_to_nameidata(path, nd); |
2357 | } else { | ||
2358 | save_parent.dentry = nd->path.dentry; | ||
2359 | save_parent.mnt = mntget(path->mnt); | ||
2360 | nd->path.dentry = path->dentry; | ||
2361 | |||
2362 | } | ||
2363 | nd->inode = inode; | ||
2314 | /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ | 2364 | /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ |
2315 | error = complete_walk(nd); | 2365 | error = complete_walk(nd); |
2316 | if (error) | 2366 | if (error) { |
2367 | path_put(&save_parent); | ||
2317 | return ERR_PTR(error); | 2368 | return ERR_PTR(error); |
2369 | } | ||
2318 | error = -EISDIR; | 2370 | error = -EISDIR; |
2319 | if (S_ISDIR(nd->inode->i_mode)) | 2371 | if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) |
2372 | goto exit; | ||
2373 | error = -ENOTDIR; | ||
2374 | if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) | ||
2320 | goto exit; | 2375 | goto exit; |
2376 | audit_inode(pathname, nd->path.dentry); | ||
2321 | ok: | 2377 | ok: |
2322 | if (!S_ISREG(nd->inode->i_mode)) | 2378 | if (!S_ISREG(nd->inode->i_mode)) |
2323 | will_truncate = 0; | 2379 | will_truncate = 0; |
@@ -2333,6 +2389,20 @@ common: | |||
2333 | if (error) | 2389 | if (error) |
2334 | goto exit; | 2390 | goto exit; |
2335 | filp = nameidata_to_filp(nd); | 2391 | filp = nameidata_to_filp(nd); |
2392 | if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) { | ||
2393 | BUG_ON(save_parent.dentry != dir); | ||
2394 | path_put(&nd->path); | ||
2395 | nd->path = save_parent; | ||
2396 | nd->inode = dir->d_inode; | ||
2397 | save_parent.mnt = NULL; | ||
2398 | save_parent.dentry = NULL; | ||
2399 | if (want_write) { | ||
2400 | mnt_drop_write(nd->path.mnt); | ||
2401 | want_write = 0; | ||
2402 | } | ||
2403 | retried = true; | ||
2404 | goto retry_lookup; | ||
2405 | } | ||
2336 | if (!IS_ERR(filp)) { | 2406 | if (!IS_ERR(filp)) { |
2337 | error = ima_file_check(filp, op->acc_mode); | 2407 | error = ima_file_check(filp, op->acc_mode); |
2338 | if (error) { | 2408 | if (error) { |
@@ -2352,7 +2422,8 @@ common: | |||
2352 | out: | 2422 | out: |
2353 | if (want_write) | 2423 | if (want_write) |
2354 | mnt_drop_write(nd->path.mnt); | 2424 | mnt_drop_write(nd->path.mnt); |
2355 | path_put(&nd->path); | 2425 | path_put(&save_parent); |
2426 | terminate_walk(nd); | ||
2356 | return filp; | 2427 | return filp; |
2357 | 2428 | ||
2358 | exit_mutex_unlock: | 2429 | exit_mutex_unlock: |
@@ -2415,6 +2486,12 @@ out: | |||
2415 | if (base) | 2486 | if (base) |
2416 | fput(base); | 2487 | fput(base); |
2417 | release_open_intent(nd); | 2488 | release_open_intent(nd); |
2489 | if (filp == ERR_PTR(-EOPENSTALE)) { | ||
2490 | if (flags & LOOKUP_RCU) | ||
2491 | filp = ERR_PTR(-ECHILD); | ||
2492 | else | ||
2493 | filp = ERR_PTR(-ESTALE); | ||
2494 | } | ||
2418 | return filp; | 2495 | return filp; |
2419 | 2496 | ||
2420 | out_filp: | 2497 | out_filp: |
diff --git a/fs/namespace.c b/fs/namespace.c index e6081996c9a2..1e4a5fe3d7b7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -397,7 +397,7 @@ static int mnt_make_readonly(struct mount *mnt) | |||
397 | { | 397 | { |
398 | int ret = 0; | 398 | int ret = 0; |
399 | 399 | ||
400 | br_write_lock(vfsmount_lock); | 400 | br_write_lock(&vfsmount_lock); |
401 | mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; | 401 | mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; |
402 | /* | 402 | /* |
403 | * After storing MNT_WRITE_HOLD, we'll read the counters. This store | 403 | * After storing MNT_WRITE_HOLD, we'll read the counters. This store |
@@ -431,15 +431,15 @@ static int mnt_make_readonly(struct mount *mnt) | |||
431 | */ | 431 | */ |
432 | smp_wmb(); | 432 | smp_wmb(); |
433 | mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; | 433 | mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; |
434 | br_write_unlock(vfsmount_lock); | 434 | br_write_unlock(&vfsmount_lock); |
435 | return ret; | 435 | return ret; |
436 | } | 436 | } |
437 | 437 | ||
438 | static void __mnt_unmake_readonly(struct mount *mnt) | 438 | static void __mnt_unmake_readonly(struct mount *mnt) |
439 | { | 439 | { |
440 | br_write_lock(vfsmount_lock); | 440 | br_write_lock(&vfsmount_lock); |
441 | mnt->mnt.mnt_flags &= ~MNT_READONLY; | 441 | mnt->mnt.mnt_flags &= ~MNT_READONLY; |
442 | br_write_unlock(vfsmount_lock); | 442 | br_write_unlock(&vfsmount_lock); |
443 | } | 443 | } |
444 | 444 | ||
445 | int sb_prepare_remount_readonly(struct super_block *sb) | 445 | int sb_prepare_remount_readonly(struct super_block *sb) |
@@ -451,7 +451,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) | |||
451 | if (atomic_long_read(&sb->s_remove_count)) | 451 | if (atomic_long_read(&sb->s_remove_count)) |
452 | return -EBUSY; | 452 | return -EBUSY; |
453 | 453 | ||
454 | br_write_lock(vfsmount_lock); | 454 | br_write_lock(&vfsmount_lock); |
455 | list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { | 455 | list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { |
456 | if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { | 456 | if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { |
457 | mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; | 457 | mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; |
@@ -473,7 +473,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) | |||
473 | if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) | 473 | if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) |
474 | mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; | 474 | mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; |
475 | } | 475 | } |
476 | br_write_unlock(vfsmount_lock); | 476 | br_write_unlock(&vfsmount_lock); |
477 | 477 | ||
478 | return err; | 478 | return err; |
479 | } | 479 | } |
@@ -522,14 +522,14 @@ struct vfsmount *lookup_mnt(struct path *path) | |||
522 | { | 522 | { |
523 | struct mount *child_mnt; | 523 | struct mount *child_mnt; |
524 | 524 | ||
525 | br_read_lock(vfsmount_lock); | 525 | br_read_lock(&vfsmount_lock); |
526 | child_mnt = __lookup_mnt(path->mnt, path->dentry, 1); | 526 | child_mnt = __lookup_mnt(path->mnt, path->dentry, 1); |
527 | if (child_mnt) { | 527 | if (child_mnt) { |
528 | mnt_add_count(child_mnt, 1); | 528 | mnt_add_count(child_mnt, 1); |
529 | br_read_unlock(vfsmount_lock); | 529 | br_read_unlock(&vfsmount_lock); |
530 | return &child_mnt->mnt; | 530 | return &child_mnt->mnt; |
531 | } else { | 531 | } else { |
532 | br_read_unlock(vfsmount_lock); | 532 | br_read_unlock(&vfsmount_lock); |
533 | return NULL; | 533 | return NULL; |
534 | } | 534 | } |
535 | } | 535 | } |
@@ -714,9 +714,9 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void | |||
714 | mnt->mnt.mnt_sb = root->d_sb; | 714 | mnt->mnt.mnt_sb = root->d_sb; |
715 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; | 715 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; |
716 | mnt->mnt_parent = mnt; | 716 | mnt->mnt_parent = mnt; |
717 | br_write_lock(vfsmount_lock); | 717 | br_write_lock(&vfsmount_lock); |
718 | list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); | 718 | list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); |
719 | br_write_unlock(vfsmount_lock); | 719 | br_write_unlock(&vfsmount_lock); |
720 | return &mnt->mnt; | 720 | return &mnt->mnt; |
721 | } | 721 | } |
722 | EXPORT_SYMBOL_GPL(vfs_kern_mount); | 722 | EXPORT_SYMBOL_GPL(vfs_kern_mount); |
@@ -745,9 +745,9 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, | |||
745 | mnt->mnt.mnt_root = dget(root); | 745 | mnt->mnt.mnt_root = dget(root); |
746 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; | 746 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; |
747 | mnt->mnt_parent = mnt; | 747 | mnt->mnt_parent = mnt; |
748 | br_write_lock(vfsmount_lock); | 748 | br_write_lock(&vfsmount_lock); |
749 | list_add_tail(&mnt->mnt_instance, &sb->s_mounts); | 749 | list_add_tail(&mnt->mnt_instance, &sb->s_mounts); |
750 | br_write_unlock(vfsmount_lock); | 750 | br_write_unlock(&vfsmount_lock); |
751 | 751 | ||
752 | if (flag & CL_SLAVE) { | 752 | if (flag & CL_SLAVE) { |
753 | list_add(&mnt->mnt_slave, &old->mnt_slave_list); | 753 | list_add(&mnt->mnt_slave, &old->mnt_slave_list); |
@@ -803,35 +803,36 @@ static void mntput_no_expire(struct mount *mnt) | |||
803 | { | 803 | { |
804 | put_again: | 804 | put_again: |
805 | #ifdef CONFIG_SMP | 805 | #ifdef CONFIG_SMP |
806 | br_read_lock(vfsmount_lock); | 806 | br_read_lock(&vfsmount_lock); |
807 | if (likely(atomic_read(&mnt->mnt_longterm))) { | 807 | if (likely(atomic_read(&mnt->mnt_longterm))) { |
808 | mnt_add_count(mnt, -1); | 808 | mnt_add_count(mnt, -1); |
809 | br_read_unlock(vfsmount_lock); | 809 | br_read_unlock(&vfsmount_lock); |
810 | return; | 810 | return; |
811 | } | 811 | } |
812 | br_read_unlock(vfsmount_lock); | 812 | br_read_unlock(&vfsmount_lock); |
813 | 813 | ||
814 | br_write_lock(vfsmount_lock); | 814 | br_write_lock(&vfsmount_lock); |
815 | mnt_add_count(mnt, -1); | 815 | mnt_add_count(mnt, -1); |
816 | if (mnt_get_count(mnt)) { | 816 | if (mnt_get_count(mnt)) { |
817 | br_write_unlock(vfsmount_lock); | 817 | br_write_unlock(&vfsmount_lock); |
818 | return; | 818 | return; |
819 | } | 819 | } |
820 | #else | 820 | #else |
821 | mnt_add_count(mnt, -1); | 821 | mnt_add_count(mnt, -1); |
822 | if (likely(mnt_get_count(mnt))) | 822 | if (likely(mnt_get_count(mnt))) |
823 | return; | 823 | return; |
824 | br_write_lock(vfsmount_lock); | 824 | br_write_lock(&vfsmount_lock); |
825 | #endif | 825 | #endif |
826 | if (unlikely(mnt->mnt_pinned)) { | 826 | if (unlikely(mnt->mnt_pinned)) { |
827 | mnt_add_count(mnt, mnt->mnt_pinned + 1); | 827 | mnt_add_count(mnt, mnt->mnt_pinned + 1); |
828 | mnt->mnt_pinned = 0; | 828 | mnt->mnt_pinned = 0; |
829 | br_write_unlock(vfsmount_lock); | 829 | br_write_unlock(&vfsmount_lock); |
830 | acct_auto_close_mnt(&mnt->mnt); | 830 | acct_auto_close_mnt(&mnt->mnt); |
831 | goto put_again; | 831 | goto put_again; |
832 | } | 832 | } |
833 | |||
833 | list_del(&mnt->mnt_instance); | 834 | list_del(&mnt->mnt_instance); |
834 | br_write_unlock(vfsmount_lock); | 835 | br_write_unlock(&vfsmount_lock); |
835 | mntfree(mnt); | 836 | mntfree(mnt); |
836 | } | 837 | } |
837 | 838 | ||
@@ -857,21 +858,21 @@ EXPORT_SYMBOL(mntget); | |||
857 | 858 | ||
858 | void mnt_pin(struct vfsmount *mnt) | 859 | void mnt_pin(struct vfsmount *mnt) |
859 | { | 860 | { |
860 | br_write_lock(vfsmount_lock); | 861 | br_write_lock(&vfsmount_lock); |
861 | real_mount(mnt)->mnt_pinned++; | 862 | real_mount(mnt)->mnt_pinned++; |
862 | br_write_unlock(vfsmount_lock); | 863 | br_write_unlock(&vfsmount_lock); |
863 | } | 864 | } |
864 | EXPORT_SYMBOL(mnt_pin); | 865 | EXPORT_SYMBOL(mnt_pin); |
865 | 866 | ||
866 | void mnt_unpin(struct vfsmount *m) | 867 | void mnt_unpin(struct vfsmount *m) |
867 | { | 868 | { |
868 | struct mount *mnt = real_mount(m); | 869 | struct mount *mnt = real_mount(m); |
869 | br_write_lock(vfsmount_lock); | 870 | br_write_lock(&vfsmount_lock); |
870 | if (mnt->mnt_pinned) { | 871 | if (mnt->mnt_pinned) { |
871 | mnt_add_count(mnt, 1); | 872 | mnt_add_count(mnt, 1); |
872 | mnt->mnt_pinned--; | 873 | mnt->mnt_pinned--; |
873 | } | 874 | } |
874 | br_write_unlock(vfsmount_lock); | 875 | br_write_unlock(&vfsmount_lock); |
875 | } | 876 | } |
876 | EXPORT_SYMBOL(mnt_unpin); | 877 | EXPORT_SYMBOL(mnt_unpin); |
877 | 878 | ||
@@ -988,12 +989,12 @@ int may_umount_tree(struct vfsmount *m) | |||
988 | BUG_ON(!m); | 989 | BUG_ON(!m); |
989 | 990 | ||
990 | /* write lock needed for mnt_get_count */ | 991 | /* write lock needed for mnt_get_count */ |
991 | br_write_lock(vfsmount_lock); | 992 | br_write_lock(&vfsmount_lock); |
992 | for (p = mnt; p; p = next_mnt(p, mnt)) { | 993 | for (p = mnt; p; p = next_mnt(p, mnt)) { |
993 | actual_refs += mnt_get_count(p); | 994 | actual_refs += mnt_get_count(p); |
994 | minimum_refs += 2; | 995 | minimum_refs += 2; |
995 | } | 996 | } |
996 | br_write_unlock(vfsmount_lock); | 997 | br_write_unlock(&vfsmount_lock); |
997 | 998 | ||
998 | if (actual_refs > minimum_refs) | 999 | if (actual_refs > minimum_refs) |
999 | return 0; | 1000 | return 0; |
@@ -1020,10 +1021,10 @@ int may_umount(struct vfsmount *mnt) | |||
1020 | { | 1021 | { |
1021 | int ret = 1; | 1022 | int ret = 1; |
1022 | down_read(&namespace_sem); | 1023 | down_read(&namespace_sem); |
1023 | br_write_lock(vfsmount_lock); | 1024 | br_write_lock(&vfsmount_lock); |
1024 | if (propagate_mount_busy(real_mount(mnt), 2)) | 1025 | if (propagate_mount_busy(real_mount(mnt), 2)) |
1025 | ret = 0; | 1026 | ret = 0; |
1026 | br_write_unlock(vfsmount_lock); | 1027 | br_write_unlock(&vfsmount_lock); |
1027 | up_read(&namespace_sem); | 1028 | up_read(&namespace_sem); |
1028 | return ret; | 1029 | return ret; |
1029 | } | 1030 | } |
@@ -1040,13 +1041,13 @@ void release_mounts(struct list_head *head) | |||
1040 | struct dentry *dentry; | 1041 | struct dentry *dentry; |
1041 | struct mount *m; | 1042 | struct mount *m; |
1042 | 1043 | ||
1043 | br_write_lock(vfsmount_lock); | 1044 | br_write_lock(&vfsmount_lock); |
1044 | dentry = mnt->mnt_mountpoint; | 1045 | dentry = mnt->mnt_mountpoint; |
1045 | m = mnt->mnt_parent; | 1046 | m = mnt->mnt_parent; |
1046 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; | 1047 | mnt->mnt_mountpoint = mnt->mnt.mnt_root; |
1047 | mnt->mnt_parent = mnt; | 1048 | mnt->mnt_parent = mnt; |
1048 | m->mnt_ghosts--; | 1049 | m->mnt_ghosts--; |
1049 | br_write_unlock(vfsmount_lock); | 1050 | br_write_unlock(&vfsmount_lock); |
1050 | dput(dentry); | 1051 | dput(dentry); |
1051 | mntput(&m->mnt); | 1052 | mntput(&m->mnt); |
1052 | } | 1053 | } |
@@ -1073,8 +1074,9 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill) | |||
1073 | list_del_init(&p->mnt_expire); | 1074 | list_del_init(&p->mnt_expire); |
1074 | list_del_init(&p->mnt_list); | 1075 | list_del_init(&p->mnt_list); |
1075 | __touch_mnt_namespace(p->mnt_ns); | 1076 | __touch_mnt_namespace(p->mnt_ns); |
1077 | if (p->mnt_ns) | ||
1078 | __mnt_make_shortterm(p); | ||
1076 | p->mnt_ns = NULL; | 1079 | p->mnt_ns = NULL; |
1077 | __mnt_make_shortterm(p); | ||
1078 | list_del_init(&p->mnt_child); | 1080 | list_del_init(&p->mnt_child); |
1079 | if (mnt_has_parent(p)) { | 1081 | if (mnt_has_parent(p)) { |
1080 | p->mnt_parent->mnt_ghosts++; | 1082 | p->mnt_parent->mnt_ghosts++; |
@@ -1112,12 +1114,12 @@ static int do_umount(struct mount *mnt, int flags) | |||
1112 | * probably don't strictly need the lock here if we examined | 1114 | * probably don't strictly need the lock here if we examined |
1113 | * all race cases, but it's a slowpath. | 1115 | * all race cases, but it's a slowpath. |
1114 | */ | 1116 | */ |
1115 | br_write_lock(vfsmount_lock); | 1117 | br_write_lock(&vfsmount_lock); |
1116 | if (mnt_get_count(mnt) != 2) { | 1118 | if (mnt_get_count(mnt) != 2) { |
1117 | br_write_unlock(vfsmount_lock); | 1119 | br_write_unlock(&vfsmount_lock); |
1118 | return -EBUSY; | 1120 | return -EBUSY; |
1119 | } | 1121 | } |
1120 | br_write_unlock(vfsmount_lock); | 1122 | br_write_unlock(&vfsmount_lock); |
1121 | 1123 | ||
1122 | if (!xchg(&mnt->mnt_expiry_mark, 1)) | 1124 | if (!xchg(&mnt->mnt_expiry_mark, 1)) |
1123 | return -EAGAIN; | 1125 | return -EAGAIN; |
@@ -1159,7 +1161,7 @@ static int do_umount(struct mount *mnt, int flags) | |||
1159 | } | 1161 | } |
1160 | 1162 | ||
1161 | down_write(&namespace_sem); | 1163 | down_write(&namespace_sem); |
1162 | br_write_lock(vfsmount_lock); | 1164 | br_write_lock(&vfsmount_lock); |
1163 | event++; | 1165 | event++; |
1164 | 1166 | ||
1165 | if (!(flags & MNT_DETACH)) | 1167 | if (!(flags & MNT_DETACH)) |
@@ -1171,7 +1173,7 @@ static int do_umount(struct mount *mnt, int flags) | |||
1171 | umount_tree(mnt, 1, &umount_list); | 1173 | umount_tree(mnt, 1, &umount_list); |
1172 | retval = 0; | 1174 | retval = 0; |
1173 | } | 1175 | } |
1174 | br_write_unlock(vfsmount_lock); | 1176 | br_write_unlock(&vfsmount_lock); |
1175 | up_write(&namespace_sem); | 1177 | up_write(&namespace_sem); |
1176 | release_mounts(&umount_list); | 1178 | release_mounts(&umount_list); |
1177 | return retval; | 1179 | return retval; |
@@ -1286,19 +1288,19 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, | |||
1286 | q = clone_mnt(p, p->mnt.mnt_root, flag); | 1288 | q = clone_mnt(p, p->mnt.mnt_root, flag); |
1287 | if (!q) | 1289 | if (!q) |
1288 | goto Enomem; | 1290 | goto Enomem; |
1289 | br_write_lock(vfsmount_lock); | 1291 | br_write_lock(&vfsmount_lock); |
1290 | list_add_tail(&q->mnt_list, &res->mnt_list); | 1292 | list_add_tail(&q->mnt_list, &res->mnt_list); |
1291 | attach_mnt(q, &path); | 1293 | attach_mnt(q, &path); |
1292 | br_write_unlock(vfsmount_lock); | 1294 | br_write_unlock(&vfsmount_lock); |
1293 | } | 1295 | } |
1294 | } | 1296 | } |
1295 | return res; | 1297 | return res; |
1296 | Enomem: | 1298 | Enomem: |
1297 | if (res) { | 1299 | if (res) { |
1298 | LIST_HEAD(umount_list); | 1300 | LIST_HEAD(umount_list); |
1299 | br_write_lock(vfsmount_lock); | 1301 | br_write_lock(&vfsmount_lock); |
1300 | umount_tree(res, 0, &umount_list); | 1302 | umount_tree(res, 0, &umount_list); |
1301 | br_write_unlock(vfsmount_lock); | 1303 | br_write_unlock(&vfsmount_lock); |
1302 | release_mounts(&umount_list); | 1304 | release_mounts(&umount_list); |
1303 | } | 1305 | } |
1304 | return NULL; | 1306 | return NULL; |
@@ -1318,9 +1320,9 @@ void drop_collected_mounts(struct vfsmount *mnt) | |||
1318 | { | 1320 | { |
1319 | LIST_HEAD(umount_list); | 1321 | LIST_HEAD(umount_list); |
1320 | down_write(&namespace_sem); | 1322 | down_write(&namespace_sem); |
1321 | br_write_lock(vfsmount_lock); | 1323 | br_write_lock(&vfsmount_lock); |
1322 | umount_tree(real_mount(mnt), 0, &umount_list); | 1324 | umount_tree(real_mount(mnt), 0, &umount_list); |
1323 | br_write_unlock(vfsmount_lock); | 1325 | br_write_unlock(&vfsmount_lock); |
1324 | up_write(&namespace_sem); | 1326 | up_write(&namespace_sem); |
1325 | release_mounts(&umount_list); | 1327 | release_mounts(&umount_list); |
1326 | } | 1328 | } |
@@ -1448,7 +1450,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
1448 | if (err) | 1450 | if (err) |
1449 | goto out_cleanup_ids; | 1451 | goto out_cleanup_ids; |
1450 | 1452 | ||
1451 | br_write_lock(vfsmount_lock); | 1453 | br_write_lock(&vfsmount_lock); |
1452 | 1454 | ||
1453 | if (IS_MNT_SHARED(dest_mnt)) { | 1455 | if (IS_MNT_SHARED(dest_mnt)) { |
1454 | for (p = source_mnt; p; p = next_mnt(p, source_mnt)) | 1456 | for (p = source_mnt; p; p = next_mnt(p, source_mnt)) |
@@ -1467,7 +1469,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, | |||
1467 | list_del_init(&child->mnt_hash); | 1469 | list_del_init(&child->mnt_hash); |
1468 | commit_tree(child); | 1470 | commit_tree(child); |
1469 | } | 1471 | } |
1470 | br_write_unlock(vfsmount_lock); | 1472 | br_write_unlock(&vfsmount_lock); |
1471 | 1473 | ||
1472 | return 0; | 1474 | return 0; |
1473 | 1475 | ||
@@ -1565,10 +1567,10 @@ static int do_change_type(struct path *path, int flag) | |||
1565 | goto out_unlock; | 1567 | goto out_unlock; |
1566 | } | 1568 | } |
1567 | 1569 | ||
1568 | br_write_lock(vfsmount_lock); | 1570 | br_write_lock(&vfsmount_lock); |
1569 | for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) | 1571 | for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) |
1570 | change_mnt_propagation(m, type); | 1572 | change_mnt_propagation(m, type); |
1571 | br_write_unlock(vfsmount_lock); | 1573 | br_write_unlock(&vfsmount_lock); |
1572 | 1574 | ||
1573 | out_unlock: | 1575 | out_unlock: |
1574 | up_write(&namespace_sem); | 1576 | up_write(&namespace_sem); |
@@ -1617,9 +1619,9 @@ static int do_loopback(struct path *path, char *old_name, | |||
1617 | 1619 | ||
1618 | err = graft_tree(mnt, path); | 1620 | err = graft_tree(mnt, path); |
1619 | if (err) { | 1621 | if (err) { |
1620 | br_write_lock(vfsmount_lock); | 1622 | br_write_lock(&vfsmount_lock); |
1621 | umount_tree(mnt, 0, &umount_list); | 1623 | umount_tree(mnt, 0, &umount_list); |
1622 | br_write_unlock(vfsmount_lock); | 1624 | br_write_unlock(&vfsmount_lock); |
1623 | } | 1625 | } |
1624 | out2: | 1626 | out2: |
1625 | unlock_mount(path); | 1627 | unlock_mount(path); |
@@ -1677,16 +1679,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags, | |||
1677 | else | 1679 | else |
1678 | err = do_remount_sb(sb, flags, data, 0); | 1680 | err = do_remount_sb(sb, flags, data, 0); |
1679 | if (!err) { | 1681 | if (!err) { |
1680 | br_write_lock(vfsmount_lock); | 1682 | br_write_lock(&vfsmount_lock); |
1681 | mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; | 1683 | mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; |
1682 | mnt->mnt.mnt_flags = mnt_flags; | 1684 | mnt->mnt.mnt_flags = mnt_flags; |
1683 | br_write_unlock(vfsmount_lock); | 1685 | br_write_unlock(&vfsmount_lock); |
1684 | } | 1686 | } |
1685 | up_write(&sb->s_umount); | 1687 | up_write(&sb->s_umount); |
1686 | if (!err) { | 1688 | if (!err) { |
1687 | br_write_lock(vfsmount_lock); | 1689 | br_write_lock(&vfsmount_lock); |
1688 | touch_mnt_namespace(mnt->mnt_ns); | 1690 | touch_mnt_namespace(mnt->mnt_ns); |
1689 | br_write_unlock(vfsmount_lock); | 1691 | br_write_unlock(&vfsmount_lock); |
1690 | } | 1692 | } |
1691 | return err; | 1693 | return err; |
1692 | } | 1694 | } |
@@ -1893,9 +1895,9 @@ fail: | |||
1893 | /* remove m from any expiration list it may be on */ | 1895 | /* remove m from any expiration list it may be on */ |
1894 | if (!list_empty(&mnt->mnt_expire)) { | 1896 | if (!list_empty(&mnt->mnt_expire)) { |
1895 | down_write(&namespace_sem); | 1897 | down_write(&namespace_sem); |
1896 | br_write_lock(vfsmount_lock); | 1898 | br_write_lock(&vfsmount_lock); |
1897 | list_del_init(&mnt->mnt_expire); | 1899 | list_del_init(&mnt->mnt_expire); |
1898 | br_write_unlock(vfsmount_lock); | 1900 | br_write_unlock(&vfsmount_lock); |
1899 | up_write(&namespace_sem); | 1901 | up_write(&namespace_sem); |
1900 | } | 1902 | } |
1901 | mntput(m); | 1903 | mntput(m); |
@@ -1911,11 +1913,11 @@ fail: | |||
1911 | void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) | 1913 | void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) |
1912 | { | 1914 | { |
1913 | down_write(&namespace_sem); | 1915 | down_write(&namespace_sem); |
1914 | br_write_lock(vfsmount_lock); | 1916 | br_write_lock(&vfsmount_lock); |
1915 | 1917 | ||
1916 | list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); | 1918 | list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); |
1917 | 1919 | ||
1918 | br_write_unlock(vfsmount_lock); | 1920 | br_write_unlock(&vfsmount_lock); |
1919 | up_write(&namespace_sem); | 1921 | up_write(&namespace_sem); |
1920 | } | 1922 | } |
1921 | EXPORT_SYMBOL(mnt_set_expiry); | 1923 | EXPORT_SYMBOL(mnt_set_expiry); |
@@ -1935,7 +1937,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) | |||
1935 | return; | 1937 | return; |
1936 | 1938 | ||
1937 | down_write(&namespace_sem); | 1939 | down_write(&namespace_sem); |
1938 | br_write_lock(vfsmount_lock); | 1940 | br_write_lock(&vfsmount_lock); |
1939 | 1941 | ||
1940 | /* extract from the expiration list every vfsmount that matches the | 1942 | /* extract from the expiration list every vfsmount that matches the |
1941 | * following criteria: | 1943 | * following criteria: |
@@ -1954,7 +1956,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) | |||
1954 | touch_mnt_namespace(mnt->mnt_ns); | 1956 | touch_mnt_namespace(mnt->mnt_ns); |
1955 | umount_tree(mnt, 1, &umounts); | 1957 | umount_tree(mnt, 1, &umounts); |
1956 | } | 1958 | } |
1957 | br_write_unlock(vfsmount_lock); | 1959 | br_write_unlock(&vfsmount_lock); |
1958 | up_write(&namespace_sem); | 1960 | up_write(&namespace_sem); |
1959 | 1961 | ||
1960 | release_mounts(&umounts); | 1962 | release_mounts(&umounts); |
@@ -2218,9 +2220,9 @@ void mnt_make_shortterm(struct vfsmount *m) | |||
2218 | struct mount *mnt = real_mount(m); | 2220 | struct mount *mnt = real_mount(m); |
2219 | if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) | 2221 | if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) |
2220 | return; | 2222 | return; |
2221 | br_write_lock(vfsmount_lock); | 2223 | br_write_lock(&vfsmount_lock); |
2222 | atomic_dec(&mnt->mnt_longterm); | 2224 | atomic_dec(&mnt->mnt_longterm); |
2223 | br_write_unlock(vfsmount_lock); | 2225 | br_write_unlock(&vfsmount_lock); |
2224 | #endif | 2226 | #endif |
2225 | } | 2227 | } |
2226 | 2228 | ||
@@ -2250,9 +2252,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, | |||
2250 | return ERR_PTR(-ENOMEM); | 2252 | return ERR_PTR(-ENOMEM); |
2251 | } | 2253 | } |
2252 | new_ns->root = new; | 2254 | new_ns->root = new; |
2253 | br_write_lock(vfsmount_lock); | 2255 | br_write_lock(&vfsmount_lock); |
2254 | list_add_tail(&new_ns->list, &new->mnt_list); | 2256 | list_add_tail(&new_ns->list, &new->mnt_list); |
2255 | br_write_unlock(vfsmount_lock); | 2257 | br_write_unlock(&vfsmount_lock); |
2256 | 2258 | ||
2257 | /* | 2259 | /* |
2258 | * Second pass: switch the tsk->fs->* elements and mark new vfsmounts | 2260 | * Second pass: switch the tsk->fs->* elements and mark new vfsmounts |
@@ -2416,9 +2418,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry, | |||
2416 | int path_is_under(struct path *path1, struct path *path2) | 2418 | int path_is_under(struct path *path1, struct path *path2) |
2417 | { | 2419 | { |
2418 | int res; | 2420 | int res; |
2419 | br_read_lock(vfsmount_lock); | 2421 | br_read_lock(&vfsmount_lock); |
2420 | res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); | 2422 | res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); |
2421 | br_read_unlock(vfsmount_lock); | 2423 | br_read_unlock(&vfsmount_lock); |
2422 | return res; | 2424 | return res; |
2423 | } | 2425 | } |
2424 | EXPORT_SYMBOL(path_is_under); | 2426 | EXPORT_SYMBOL(path_is_under); |
@@ -2505,7 +2507,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2505 | /* make sure we can reach put_old from new_root */ | 2507 | /* make sure we can reach put_old from new_root */ |
2506 | if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new)) | 2508 | if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new)) |
2507 | goto out4; | 2509 | goto out4; |
2508 | br_write_lock(vfsmount_lock); | 2510 | br_write_lock(&vfsmount_lock); |
2509 | detach_mnt(new_mnt, &parent_path); | 2511 | detach_mnt(new_mnt, &parent_path); |
2510 | detach_mnt(root_mnt, &root_parent); | 2512 | detach_mnt(root_mnt, &root_parent); |
2511 | /* mount old root on put_old */ | 2513 | /* mount old root on put_old */ |
@@ -2513,7 +2515,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, | |||
2513 | /* mount new_root on / */ | 2515 | /* mount new_root on / */ |
2514 | attach_mnt(new_mnt, &root_parent); | 2516 | attach_mnt(new_mnt, &root_parent); |
2515 | touch_mnt_namespace(current->nsproxy->mnt_ns); | 2517 | touch_mnt_namespace(current->nsproxy->mnt_ns); |
2516 | br_write_unlock(vfsmount_lock); | 2518 | br_write_unlock(&vfsmount_lock); |
2517 | chroot_fs_refs(&root, &new); | 2519 | chroot_fs_refs(&root, &new); |
2518 | error = 0; | 2520 | error = 0; |
2519 | out4: | 2521 | out4: |
@@ -2576,7 +2578,7 @@ void __init mnt_init(void) | |||
2576 | for (u = 0; u < HASH_SIZE; u++) | 2578 | for (u = 0; u < HASH_SIZE; u++) |
2577 | INIT_LIST_HEAD(&mount_hashtable[u]); | 2579 | INIT_LIST_HEAD(&mount_hashtable[u]); |
2578 | 2580 | ||
2579 | br_lock_init(vfsmount_lock); | 2581 | br_lock_init(&vfsmount_lock); |
2580 | 2582 | ||
2581 | err = sysfs_init(); | 2583 | err = sysfs_init(); |
2582 | if (err) | 2584 | if (err) |
@@ -2596,9 +2598,9 @@ void put_mnt_ns(struct mnt_namespace *ns) | |||
2596 | if (!atomic_dec_and_test(&ns->count)) | 2598 | if (!atomic_dec_and_test(&ns->count)) |
2597 | return; | 2599 | return; |
2598 | down_write(&namespace_sem); | 2600 | down_write(&namespace_sem); |
2599 | br_write_lock(vfsmount_lock); | 2601 | br_write_lock(&vfsmount_lock); |
2600 | umount_tree(ns->root, 0, &umount_list); | 2602 | umount_tree(ns->root, 0, &umount_list); |
2601 | br_write_unlock(vfsmount_lock); | 2603 | br_write_unlock(&vfsmount_lock); |
2602 | up_write(&namespace_sem); | 2604 | up_write(&namespace_sem); |
2603 | release_mounts(&umount_list); | 2605 | release_mounts(&umount_list); |
2604 | kfree(ns); | 2606 | kfree(ns); |
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 3ff5fcc1528f..122e260247f5 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c | |||
@@ -221,6 +221,10 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * | |||
221 | 221 | ||
222 | already_written = 0; | 222 | already_written = 0; |
223 | 223 | ||
224 | errno = file_update_time(file); | ||
225 | if (errno) | ||
226 | goto outrel; | ||
227 | |||
224 | bouncebuffer = vmalloc(bufsize); | 228 | bouncebuffer = vmalloc(bufsize); |
225 | if (!bouncebuffer) { | 229 | if (!bouncebuffer) { |
226 | errno = -EIO; /* -ENOMEM */ | 230 | errno = -EIO; /* -ENOMEM */ |
@@ -252,8 +256,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * | |||
252 | } | 256 | } |
253 | vfree(bouncebuffer); | 257 | vfree(bouncebuffer); |
254 | 258 | ||
255 | file_update_time(file); | ||
256 | |||
257 | *ppos = pos; | 259 | *ppos = pos; |
258 | 260 | ||
259 | if (pos > i_size_read(inode)) { | 261 | if (pos > i_size_read(inode)) { |
diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h index 4af803f13516..54cc0cdb3dcb 100644 --- a/fs/ncpfs/ncp_fs_sb.h +++ b/fs/ncpfs/ncp_fs_sb.h | |||
@@ -23,17 +23,17 @@ struct ncp_mount_data_kernel { | |||
23 | unsigned long flags; /* NCP_MOUNT_* flags */ | 23 | unsigned long flags; /* NCP_MOUNT_* flags */ |
24 | unsigned int int_flags; /* internal flags */ | 24 | unsigned int int_flags; /* internal flags */ |
25 | #define NCP_IMOUNT_LOGGEDIN_POSSIBLE 0x0001 | 25 | #define NCP_IMOUNT_LOGGEDIN_POSSIBLE 0x0001 |
26 | __kernel_uid32_t mounted_uid; /* Who may umount() this filesystem? */ | 26 | uid_t mounted_uid; /* Who may umount() this filesystem? */ |
27 | struct pid *wdog_pid; /* Who cares for our watchdog packets? */ | 27 | struct pid *wdog_pid; /* Who cares for our watchdog packets? */ |
28 | unsigned int ncp_fd; /* The socket to the ncp port */ | 28 | unsigned int ncp_fd; /* The socket to the ncp port */ |
29 | unsigned int time_out; /* How long should I wait after | 29 | unsigned int time_out; /* How long should I wait after |
30 | sending a NCP request? */ | 30 | sending a NCP request? */ |
31 | unsigned int retry_count; /* And how often should I retry? */ | 31 | unsigned int retry_count; /* And how often should I retry? */ |
32 | unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; | 32 | unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; |
33 | __kernel_uid32_t uid; | 33 | uid_t uid; |
34 | __kernel_gid32_t gid; | 34 | gid_t gid; |
35 | __kernel_mode_t file_mode; | 35 | umode_t file_mode; |
36 | __kernel_mode_t dir_mode; | 36 | umode_t dir_mode; |
37 | int info_fd; | 37 | int info_fd; |
38 | }; | 38 | }; |
39 | 39 | ||
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index eb95f5091c1a..970659daa323 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/kthread.h> | 17 | #include <linux/kthread.h> |
18 | #include <linux/sunrpc/svcauth_gss.h> | 18 | #include <linux/sunrpc/svcauth_gss.h> |
19 | #include <linux/sunrpc/bc_xprt.h> | 19 | #include <linux/sunrpc/bc_xprt.h> |
20 | #include <linux/nsproxy.h> | ||
20 | 21 | ||
21 | #include <net/inet_sock.h> | 22 | #include <net/inet_sock.h> |
22 | 23 | ||
@@ -253,6 +254,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) | |||
253 | char svc_name[12]; | 254 | char svc_name[12]; |
254 | int ret = 0; | 255 | int ret = 0; |
255 | int minorversion_setup; | 256 | int minorversion_setup; |
257 | struct net *net = current->nsproxy->net_ns; | ||
256 | 258 | ||
257 | mutex_lock(&nfs_callback_mutex); | 259 | mutex_lock(&nfs_callback_mutex); |
258 | if (cb_info->users++ || cb_info->task != NULL) { | 260 | if (cb_info->users++ || cb_info->task != NULL) { |
@@ -265,6 +267,12 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) | |||
265 | goto out_err; | 267 | goto out_err; |
266 | } | 268 | } |
267 | 269 | ||
270 | ret = svc_bind(serv, net); | ||
271 | if (ret < 0) { | ||
272 | printk(KERN_WARNING "NFS: bind callback service failed\n"); | ||
273 | goto out_err; | ||
274 | } | ||
275 | |||
268 | minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion, | 276 | minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion, |
269 | serv, xprt, &rqstp, &callback_svc); | 277 | serv, xprt, &rqstp, &callback_svc); |
270 | if (!minorversion_setup) { | 278 | if (!minorversion_setup) { |
@@ -306,6 +314,8 @@ out_err: | |||
306 | dprintk("NFS: Couldn't create callback socket or server thread; " | 314 | dprintk("NFS: Couldn't create callback socket or server thread; " |
307 | "err = %d\n", ret); | 315 | "err = %d\n", ret); |
308 | cb_info->users--; | 316 | cb_info->users--; |
317 | if (serv) | ||
318 | svc_shutdown_net(serv, net); | ||
309 | goto out; | 319 | goto out; |
310 | } | 320 | } |
311 | 321 | ||
@@ -320,6 +330,7 @@ void nfs_callback_down(int minorversion) | |||
320 | cb_info->users--; | 330 | cb_info->users--; |
321 | if (cb_info->users == 0 && cb_info->task != NULL) { | 331 | if (cb_info->users == 0 && cb_info->task != NULL) { |
322 | kthread_stop(cb_info->task); | 332 | kthread_stop(cb_info->task); |
333 | svc_shutdown_net(cb_info->serv, current->nsproxy->net_ns); | ||
323 | svc_exit_thread(cb_info->rqst); | 334 | svc_exit_thread(cb_info->rqst); |
324 | cb_info->serv = NULL; | 335 | cb_info->serv = NULL; |
325 | cb_info->rqst = NULL; | 336 | cb_info->rqst = NULL; |
@@ -332,7 +343,7 @@ void nfs_callback_down(int minorversion) | |||
332 | int | 343 | int |
333 | check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) | 344 | check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) |
334 | { | 345 | { |
335 | char *p = svc_gss_principal(rqstp); | 346 | char *p = rqstp->rq_cred.cr_principal; |
336 | 347 | ||
337 | if (rqstp->rq_authop->flavour != RPC_AUTH_GSS) | 348 | if (rqstp->rq_authop->flavour != RPC_AUTH_GSS) |
338 | return 1; | 349 | return 1; |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0989a2099688..f430057ff3b3 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -1354,10 +1354,10 @@ out: | |||
1354 | } | 1354 | } |
1355 | 1355 | ||
1356 | #ifdef CONFIG_NFS_V4 | 1356 | #ifdef CONFIG_NFS_V4 |
1357 | static int nfs_open_revalidate(struct dentry *, struct nameidata *); | 1357 | static int nfs4_lookup_revalidate(struct dentry *, struct nameidata *); |
1358 | 1358 | ||
1359 | const struct dentry_operations nfs4_dentry_operations = { | 1359 | const struct dentry_operations nfs4_dentry_operations = { |
1360 | .d_revalidate = nfs_open_revalidate, | 1360 | .d_revalidate = nfs4_lookup_revalidate, |
1361 | .d_delete = nfs_dentry_delete, | 1361 | .d_delete = nfs_dentry_delete, |
1362 | .d_iput = nfs_dentry_iput, | 1362 | .d_iput = nfs_dentry_iput, |
1363 | .d_automount = nfs_d_automount, | 1363 | .d_automount = nfs_d_automount, |
@@ -1519,13 +1519,11 @@ no_open: | |||
1519 | return nfs_lookup(dir, dentry, nd); | 1519 | return nfs_lookup(dir, dentry, nd); |
1520 | } | 1520 | } |
1521 | 1521 | ||
1522 | static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) | 1522 | static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) |
1523 | { | 1523 | { |
1524 | struct dentry *parent = NULL; | 1524 | struct dentry *parent = NULL; |
1525 | struct inode *inode; | 1525 | struct inode *inode; |
1526 | struct inode *dir; | 1526 | struct inode *dir; |
1527 | struct nfs_open_context *ctx; | ||
1528 | struct iattr attr; | ||
1529 | int openflags, ret = 0; | 1527 | int openflags, ret = 0; |
1530 | 1528 | ||
1531 | if (nd->flags & LOOKUP_RCU) | 1529 | if (nd->flags & LOOKUP_RCU) |
@@ -1554,57 +1552,13 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1554 | /* We cannot do exclusive creation on a positive dentry */ | 1552 | /* We cannot do exclusive creation on a positive dentry */ |
1555 | if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) | 1553 | if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) |
1556 | goto no_open_dput; | 1554 | goto no_open_dput; |
1557 | /* We can't create new files here */ | ||
1558 | openflags &= ~(O_CREAT|O_EXCL); | ||
1559 | |||
1560 | ctx = create_nfs_open_context(dentry, openflags); | ||
1561 | ret = PTR_ERR(ctx); | ||
1562 | if (IS_ERR(ctx)) | ||
1563 | goto out; | ||
1564 | 1555 | ||
1565 | attr.ia_valid = ATTR_OPEN; | 1556 | /* Let f_op->open() actually open (and revalidate) the file */ |
1566 | if (openflags & O_TRUNC) { | 1557 | ret = 1; |
1567 | attr.ia_valid |= ATTR_SIZE; | ||
1568 | attr.ia_size = 0; | ||
1569 | nfs_wb_all(inode); | ||
1570 | } | ||
1571 | |||
1572 | /* | ||
1573 | * Note: we're not holding inode->i_mutex and so may be racing with | ||
1574 | * operations that change the directory. We therefore save the | ||
1575 | * change attribute *before* we do the RPC call. | ||
1576 | */ | ||
1577 | inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); | ||
1578 | if (IS_ERR(inode)) { | ||
1579 | ret = PTR_ERR(inode); | ||
1580 | switch (ret) { | ||
1581 | case -EPERM: | ||
1582 | case -EACCES: | ||
1583 | case -EDQUOT: | ||
1584 | case -ENOSPC: | ||
1585 | case -EROFS: | ||
1586 | goto out_put_ctx; | ||
1587 | default: | ||
1588 | goto out_drop; | ||
1589 | } | ||
1590 | } | ||
1591 | iput(inode); | ||
1592 | if (inode != dentry->d_inode) | ||
1593 | goto out_drop; | ||
1594 | 1558 | ||
1595 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1596 | ret = nfs_intent_set_file(nd, ctx); | ||
1597 | if (ret >= 0) | ||
1598 | ret = 1; | ||
1599 | out: | 1559 | out: |
1600 | dput(parent); | 1560 | dput(parent); |
1601 | return ret; | 1561 | return ret; |
1602 | out_drop: | ||
1603 | d_drop(dentry); | ||
1604 | ret = 0; | ||
1605 | out_put_ctx: | ||
1606 | put_nfs_open_context(ctx); | ||
1607 | goto out; | ||
1608 | 1562 | ||
1609 | no_open_dput: | 1563 | no_open_dput: |
1610 | dput(parent); | 1564 | dput(parent); |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 56311ca5f9f8..a6708e6b438d 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -879,12 +879,81 @@ const struct file_operations nfs_file_operations = { | |||
879 | static int | 879 | static int |
880 | nfs4_file_open(struct inode *inode, struct file *filp) | 880 | nfs4_file_open(struct inode *inode, struct file *filp) |
881 | { | 881 | { |
882 | struct nfs_open_context *ctx; | ||
883 | struct dentry *dentry = filp->f_path.dentry; | ||
884 | struct dentry *parent = NULL; | ||
885 | struct inode *dir; | ||
886 | unsigned openflags = filp->f_flags; | ||
887 | struct iattr attr; | ||
888 | int err; | ||
889 | |||
890 | BUG_ON(inode != dentry->d_inode); | ||
882 | /* | 891 | /* |
883 | * NFSv4 opens are handled in d_lookup and d_revalidate. If we get to | 892 | * If no cached dentry exists or if it's negative, NFSv4 handled the |
884 | * this point, then something is very wrong | 893 | * opens in ->lookup() or ->create(). |
894 | * | ||
895 | * We only get this far for a cached positive dentry. We skipped | ||
896 | * revalidation, so handle it here by dropping the dentry and returning | ||
897 | * -EOPENSTALE. The VFS will retry the lookup/create/open. | ||
885 | */ | 898 | */ |
886 | dprintk("NFS: %s called! inode=%p filp=%p\n", __func__, inode, filp); | 899 | |
887 | return -ENOTDIR; | 900 | dprintk("NFS: open file(%s/%s)\n", |
901 | dentry->d_parent->d_name.name, | ||
902 | dentry->d_name.name); | ||
903 | |||
904 | if ((openflags & O_ACCMODE) == 3) | ||
905 | openflags--; | ||
906 | |||
907 | /* We can't create new files here */ | ||
908 | openflags &= ~(O_CREAT|O_EXCL); | ||
909 | |||
910 | parent = dget_parent(dentry); | ||
911 | dir = parent->d_inode; | ||
912 | |||
913 | ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode); | ||
914 | err = PTR_ERR(ctx); | ||
915 | if (IS_ERR(ctx)) | ||
916 | goto out; | ||
917 | |||
918 | attr.ia_valid = ATTR_OPEN; | ||
919 | if (openflags & O_TRUNC) { | ||
920 | attr.ia_valid |= ATTR_SIZE; | ||
921 | attr.ia_size = 0; | ||
922 | nfs_wb_all(inode); | ||
923 | } | ||
924 | |||
925 | inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); | ||
926 | if (IS_ERR(inode)) { | ||
927 | err = PTR_ERR(inode); | ||
928 | switch (err) { | ||
929 | case -EPERM: | ||
930 | case -EACCES: | ||
931 | case -EDQUOT: | ||
932 | case -ENOSPC: | ||
933 | case -EROFS: | ||
934 | goto out_put_ctx; | ||
935 | default: | ||
936 | goto out_drop; | ||
937 | } | ||
938 | } | ||
939 | iput(inode); | ||
940 | if (inode != dentry->d_inode) | ||
941 | goto out_drop; | ||
942 | |||
943 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
944 | nfs_file_set_open_context(filp, ctx); | ||
945 | err = 0; | ||
946 | |||
947 | out_put_ctx: | ||
948 | put_nfs_open_context(ctx); | ||
949 | out: | ||
950 | dput(parent); | ||
951 | return err; | ||
952 | |||
953 | out_drop: | ||
954 | d_drop(dentry); | ||
955 | err = -EOPENSTALE; | ||
956 | goto out_put_ctx; | ||
888 | } | 957 | } |
889 | 958 | ||
890 | const struct file_operations nfs4_file_operations = { | 959 | const struct file_operations nfs4_file_operations = { |
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 204438cc914e..34a10d78b839 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c | |||
@@ -11,7 +11,7 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) | |||
11 | struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; | 11 | struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; |
12 | 12 | ||
13 | for (f = exp->ex_flavors; f < end; f++) { | 13 | for (f = exp->ex_flavors; f < end; f++) { |
14 | if (f->pseudoflavor == rqstp->rq_flavor) | 14 | if (f->pseudoflavor == rqstp->rq_cred.cr_flavor) |
15 | return f->flags; | 15 | return f->flags; |
16 | } | 16 | } |
17 | return exp->ex_flags; | 17 | return exp->ex_flags; |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index dcb52b884519..ba233499b9a5 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -706,7 +706,7 @@ static struct cache_head *svc_export_alloc(void) | |||
706 | return NULL; | 706 | return NULL; |
707 | } | 707 | } |
708 | 708 | ||
709 | struct cache_detail svc_export_cache_template = { | 709 | static struct cache_detail svc_export_cache_template = { |
710 | .owner = THIS_MODULE, | 710 | .owner = THIS_MODULE, |
711 | .hash_size = EXPORT_HASHMAX, | 711 | .hash_size = EXPORT_HASHMAX, |
712 | .name = "nfsd.export", | 712 | .name = "nfsd.export", |
@@ -904,13 +904,13 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) | |||
904 | return 0; | 904 | return 0; |
905 | /* ip-address based client; check sec= export option: */ | 905 | /* ip-address based client; check sec= export option: */ |
906 | for (f = exp->ex_flavors; f < end; f++) { | 906 | for (f = exp->ex_flavors; f < end; f++) { |
907 | if (f->pseudoflavor == rqstp->rq_flavor) | 907 | if (f->pseudoflavor == rqstp->rq_cred.cr_flavor) |
908 | return 0; | 908 | return 0; |
909 | } | 909 | } |
910 | /* defaults in absence of sec= options: */ | 910 | /* defaults in absence of sec= options: */ |
911 | if (exp->ex_nflavors == 0) { | 911 | if (exp->ex_nflavors == 0) { |
912 | if (rqstp->rq_flavor == RPC_AUTH_NULL || | 912 | if (rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL || |
913 | rqstp->rq_flavor == RPC_AUTH_UNIX) | 913 | rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX) |
914 | return 0; | 914 | return 0; |
915 | } | 915 | } |
916 | return nfserr_wrongsec; | 916 | return nfserr_wrongsec; |
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index 9559ce468732..e6c38159622f 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c | |||
@@ -58,6 +58,7 @@ static int nfsd_inject_set(void *op_ptr, u64 val) | |||
58 | 58 | ||
59 | static int nfsd_inject_get(void *data, u64 *val) | 59 | static int nfsd_inject_get(void *data, u64 *val) |
60 | { | 60 | { |
61 | *val = 0; | ||
61 | return 0; | 62 | return 0; |
62 | } | 63 | } |
63 | 64 | ||
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index c8e9f637153a..a5fd6b982f27 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -650,9 +650,10 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c | |||
650 | struct rpc_clnt *client; | 650 | struct rpc_clnt *client; |
651 | 651 | ||
652 | if (clp->cl_minorversion == 0) { | 652 | if (clp->cl_minorversion == 0) { |
653 | if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) | 653 | if (!clp->cl_cred.cr_principal && |
654 | (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) | ||
654 | return -EINVAL; | 655 | return -EINVAL; |
655 | args.client_name = clp->cl_principal; | 656 | args.client_name = clp->cl_cred.cr_principal; |
656 | args.prognumber = conn->cb_prog, | 657 | args.prognumber = conn->cb_prog, |
657 | args.protocol = XPRT_TRANSPORT_TCP; | 658 | args.protocol = XPRT_TRANSPORT_TCP; |
658 | args.authflavor = clp->cl_flavor; | 659 | args.authflavor = clp->cl_flavor; |
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 286a7f8f2024..dae36f1dee95 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c | |||
@@ -605,7 +605,7 @@ numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namel | |||
605 | static __be32 | 605 | static __be32 |
606 | do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id) | 606 | do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id) |
607 | { | 607 | { |
608 | if (nfs4_disable_idmapping && rqstp->rq_flavor < RPC_AUTH_GSS) | 608 | if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) |
609 | if (numeric_name_to_id(rqstp, type, name, namelen, id)) | 609 | if (numeric_name_to_id(rqstp, type, name, namelen, id)) |
610 | return 0; | 610 | return 0; |
611 | /* | 611 | /* |
@@ -618,7 +618,7 @@ do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u | |||
618 | static int | 618 | static int |
619 | do_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) | 619 | do_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) |
620 | { | 620 | { |
621 | if (nfs4_disable_idmapping && rqstp->rq_flavor < RPC_AUTH_GSS) | 621 | if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) |
622 | return sprintf(name, "%u", id); | 622 | return sprintf(name, "%u", id); |
623 | return idmap_id_to_name(rqstp, type, id, name); | 623 | return idmap_id_to_name(rqstp, type, id, name); |
624 | } | 624 | } |
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index ed3f9206a0ee..5ff0b7b9fc08 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -570,7 +570,7 @@ static ssize_t | |||
570 | cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) | 570 | cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) |
571 | { | 571 | { |
572 | struct cld_upcall *tmp, *cup; | 572 | struct cld_upcall *tmp, *cup; |
573 | struct cld_msg *cmsg = (struct cld_msg *)src; | 573 | struct cld_msg __user *cmsg = (struct cld_msg __user *)src; |
574 | uint32_t xid; | 574 | uint32_t xid; |
575 | struct nfsd_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, | 575 | struct nfsd_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, |
576 | nfsd_net_id); | 576 | nfsd_net_id); |
@@ -1029,7 +1029,7 @@ rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr) | |||
1029 | return ret; | 1029 | return ret; |
1030 | } | 1030 | } |
1031 | 1031 | ||
1032 | struct notifier_block nfsd4_cld_block = { | 1032 | static struct notifier_block nfsd4_cld_block = { |
1033 | .notifier_call = rpc_pipefs_event, | 1033 | .notifier_call = rpc_pipefs_event, |
1034 | }; | 1034 | }; |
1035 | 1035 | ||
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 03f82c0bc35d..8fdc9ec5c5d3 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <linux/sunrpc/clnt.h> | 42 | #include <linux/sunrpc/clnt.h> |
43 | #include "xdr4.h" | 43 | #include "xdr4.h" |
44 | #include "vfs.h" | 44 | #include "vfs.h" |
45 | #include "current_stateid.h" | ||
45 | 46 | ||
46 | #define NFSDDBG_FACILITY NFSDDBG_PROC | 47 | #define NFSDDBG_FACILITY NFSDDBG_PROC |
47 | 48 | ||
@@ -447,37 +448,69 @@ static struct list_head close_lru; | |||
447 | * | 448 | * |
448 | * which we should reject. | 449 | * which we should reject. |
449 | */ | 450 | */ |
450 | static void | 451 | static unsigned int |
451 | set_access(unsigned int *access, unsigned long bmap) { | 452 | bmap_to_share_mode(unsigned long bmap) { |
452 | int i; | 453 | int i; |
454 | unsigned int access = 0; | ||
453 | 455 | ||
454 | *access = 0; | ||
455 | for (i = 1; i < 4; i++) { | 456 | for (i = 1; i < 4; i++) { |
456 | if (test_bit(i, &bmap)) | 457 | if (test_bit(i, &bmap)) |
457 | *access |= i; | 458 | access |= i; |
458 | } | ||
459 | } | ||
460 | |||
461 | static void | ||
462 | set_deny(unsigned int *deny, unsigned long bmap) { | ||
463 | int i; | ||
464 | |||
465 | *deny = 0; | ||
466 | for (i = 0; i < 4; i++) { | ||
467 | if (test_bit(i, &bmap)) | ||
468 | *deny |= i ; | ||
469 | } | 459 | } |
460 | return access; | ||
470 | } | 461 | } |
471 | 462 | ||
472 | static int | 463 | static bool |
473 | test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { | 464 | test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { |
474 | unsigned int access, deny; | 465 | unsigned int access, deny; |
475 | 466 | ||
476 | set_access(&access, stp->st_access_bmap); | 467 | access = bmap_to_share_mode(stp->st_access_bmap); |
477 | set_deny(&deny, stp->st_deny_bmap); | 468 | deny = bmap_to_share_mode(stp->st_deny_bmap); |
478 | if ((access & open->op_share_deny) || (deny & open->op_share_access)) | 469 | if ((access & open->op_share_deny) || (deny & open->op_share_access)) |
479 | return 0; | 470 | return false; |
480 | return 1; | 471 | return true; |
472 | } | ||
473 | |||
474 | /* set share access for a given stateid */ | ||
475 | static inline void | ||
476 | set_access(u32 access, struct nfs4_ol_stateid *stp) | ||
477 | { | ||
478 | __set_bit(access, &stp->st_access_bmap); | ||
479 | } | ||
480 | |||
481 | /* clear share access for a given stateid */ | ||
482 | static inline void | ||
483 | clear_access(u32 access, struct nfs4_ol_stateid *stp) | ||
484 | { | ||
485 | __clear_bit(access, &stp->st_access_bmap); | ||
486 | } | ||
487 | |||
488 | /* test whether a given stateid has access */ | ||
489 | static inline bool | ||
490 | test_access(u32 access, struct nfs4_ol_stateid *stp) | ||
491 | { | ||
492 | return test_bit(access, &stp->st_access_bmap); | ||
493 | } | ||
494 | |||
495 | /* set share deny for a given stateid */ | ||
496 | static inline void | ||
497 | set_deny(u32 access, struct nfs4_ol_stateid *stp) | ||
498 | { | ||
499 | __set_bit(access, &stp->st_deny_bmap); | ||
500 | } | ||
501 | |||
502 | /* clear share deny for a given stateid */ | ||
503 | static inline void | ||
504 | clear_deny(u32 access, struct nfs4_ol_stateid *stp) | ||
505 | { | ||
506 | __clear_bit(access, &stp->st_deny_bmap); | ||
507 | } | ||
508 | |||
509 | /* test whether a given stateid is denying specific access */ | ||
510 | static inline bool | ||
511 | test_deny(u32 access, struct nfs4_ol_stateid *stp) | ||
512 | { | ||
513 | return test_bit(access, &stp->st_deny_bmap); | ||
481 | } | 514 | } |
482 | 515 | ||
483 | static int nfs4_access_to_omode(u32 access) | 516 | static int nfs4_access_to_omode(u32 access) |
@@ -493,6 +526,20 @@ static int nfs4_access_to_omode(u32 access) | |||
493 | BUG(); | 526 | BUG(); |
494 | } | 527 | } |
495 | 528 | ||
529 | /* release all access and file references for a given stateid */ | ||
530 | static void | ||
531 | release_all_access(struct nfs4_ol_stateid *stp) | ||
532 | { | ||
533 | int i; | ||
534 | |||
535 | for (i = 1; i < 4; i++) { | ||
536 | if (test_access(i, stp)) | ||
537 | nfs4_file_put_access(stp->st_file, | ||
538 | nfs4_access_to_omode(i)); | ||
539 | clear_access(i, stp); | ||
540 | } | ||
541 | } | ||
542 | |||
496 | static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) | 543 | static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) |
497 | { | 544 | { |
498 | list_del(&stp->st_perfile); | 545 | list_del(&stp->st_perfile); |
@@ -501,16 +548,7 @@ static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) | |||
501 | 548 | ||
502 | static void close_generic_stateid(struct nfs4_ol_stateid *stp) | 549 | static void close_generic_stateid(struct nfs4_ol_stateid *stp) |
503 | { | 550 | { |
504 | int i; | 551 | release_all_access(stp); |
505 | |||
506 | if (stp->st_access_bmap) { | ||
507 | for (i = 1; i < 4; i++) { | ||
508 | if (test_bit(i, &stp->st_access_bmap)) | ||
509 | nfs4_file_put_access(stp->st_file, | ||
510 | nfs4_access_to_omode(i)); | ||
511 | __clear_bit(i, &stp->st_access_bmap); | ||
512 | } | ||
513 | } | ||
514 | put_nfs4_file(stp->st_file); | 552 | put_nfs4_file(stp->st_file); |
515 | stp->st_file = NULL; | 553 | stp->st_file = NULL; |
516 | } | 554 | } |
@@ -885,7 +923,7 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n | |||
885 | struct nfsd4_session *new; | 923 | struct nfsd4_session *new; |
886 | struct nfsd4_channel_attrs *fchan = &cses->fore_channel; | 924 | struct nfsd4_channel_attrs *fchan = &cses->fore_channel; |
887 | int numslots, slotsize; | 925 | int numslots, slotsize; |
888 | int status; | 926 | __be32 status; |
889 | int idx; | 927 | int idx; |
890 | 928 | ||
891 | /* | 929 | /* |
@@ -984,7 +1022,8 @@ static inline void | |||
984 | renew_client_locked(struct nfs4_client *clp) | 1022 | renew_client_locked(struct nfs4_client *clp) |
985 | { | 1023 | { |
986 | if (is_client_expired(clp)) { | 1024 | if (is_client_expired(clp)) { |
987 | dprintk("%s: client (clientid %08x/%08x) already expired\n", | 1025 | WARN_ON(1); |
1026 | printk("%s: client (clientid %08x/%08x) already expired\n", | ||
988 | __func__, | 1027 | __func__, |
989 | clp->cl_clientid.cl_boot, | 1028 | clp->cl_clientid.cl_boot, |
990 | clp->cl_clientid.cl_id); | 1029 | clp->cl_clientid.cl_id); |
@@ -1049,9 +1088,7 @@ free_client(struct nfs4_client *clp) | |||
1049 | list_del(&ses->se_perclnt); | 1088 | list_del(&ses->se_perclnt); |
1050 | nfsd4_put_session_locked(ses); | 1089 | nfsd4_put_session_locked(ses); |
1051 | } | 1090 | } |
1052 | if (clp->cl_cred.cr_group_info) | 1091 | free_svc_cred(&clp->cl_cred); |
1053 | put_group_info(clp->cl_cred.cr_group_info); | ||
1054 | kfree(clp->cl_principal); | ||
1055 | kfree(clp->cl_name.data); | 1092 | kfree(clp->cl_name.data); |
1056 | kfree(clp); | 1093 | kfree(clp); |
1057 | } | 1094 | } |
@@ -1132,12 +1169,21 @@ static void copy_clid(struct nfs4_client *target, struct nfs4_client *source) | |||
1132 | target->cl_clientid.cl_id = source->cl_clientid.cl_id; | 1169 | target->cl_clientid.cl_id = source->cl_clientid.cl_id; |
1133 | } | 1170 | } |
1134 | 1171 | ||
1135 | static void copy_cred(struct svc_cred *target, struct svc_cred *source) | 1172 | static int copy_cred(struct svc_cred *target, struct svc_cred *source) |
1136 | { | 1173 | { |
1174 | if (source->cr_principal) { | ||
1175 | target->cr_principal = | ||
1176 | kstrdup(source->cr_principal, GFP_KERNEL); | ||
1177 | if (target->cr_principal == NULL) | ||
1178 | return -ENOMEM; | ||
1179 | } else | ||
1180 | target->cr_principal = NULL; | ||
1181 | target->cr_flavor = source->cr_flavor; | ||
1137 | target->cr_uid = source->cr_uid; | 1182 | target->cr_uid = source->cr_uid; |
1138 | target->cr_gid = source->cr_gid; | 1183 | target->cr_gid = source->cr_gid; |
1139 | target->cr_group_info = source->cr_group_info; | 1184 | target->cr_group_info = source->cr_group_info; |
1140 | get_group_info(target->cr_group_info); | 1185 | get_group_info(target->cr_group_info); |
1186 | return 0; | ||
1141 | } | 1187 | } |
1142 | 1188 | ||
1143 | static int same_name(const char *n1, const char *n2) | 1189 | static int same_name(const char *n1, const char *n2) |
@@ -1157,11 +1203,31 @@ same_clid(clientid_t *cl1, clientid_t *cl2) | |||
1157 | return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id); | 1203 | return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id); |
1158 | } | 1204 | } |
1159 | 1205 | ||
1160 | /* XXX what about NGROUP */ | 1206 | static bool groups_equal(struct group_info *g1, struct group_info *g2) |
1207 | { | ||
1208 | int i; | ||
1209 | |||
1210 | if (g1->ngroups != g2->ngroups) | ||
1211 | return false; | ||
1212 | for (i=0; i<g1->ngroups; i++) | ||
1213 | if (GROUP_AT(g1, i) != GROUP_AT(g2, i)) | ||
1214 | return false; | ||
1215 | return true; | ||
1216 | } | ||
1217 | |||
1161 | static int | 1218 | static int |
1162 | same_creds(struct svc_cred *cr1, struct svc_cred *cr2) | 1219 | same_creds(struct svc_cred *cr1, struct svc_cred *cr2) |
1163 | { | 1220 | { |
1164 | return cr1->cr_uid == cr2->cr_uid; | 1221 | if ((cr1->cr_flavor != cr2->cr_flavor) |
1222 | || (cr1->cr_uid != cr2->cr_uid) | ||
1223 | || (cr1->cr_gid != cr2->cr_gid) | ||
1224 | || !groups_equal(cr1->cr_group_info, cr2->cr_group_info)) | ||
1225 | return false; | ||
1226 | if (cr1->cr_principal == cr2->cr_principal) | ||
1227 | return true; | ||
1228 | if (!cr1->cr_principal || !cr2->cr_principal) | ||
1229 | return false; | ||
1230 | return 0 == strcmp(cr1->cr_principal, cr1->cr_principal); | ||
1165 | } | 1231 | } |
1166 | 1232 | ||
1167 | static void gen_clid(struct nfs4_client *clp) | 1233 | static void gen_clid(struct nfs4_client *clp) |
@@ -1204,25 +1270,20 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, | |||
1204 | { | 1270 | { |
1205 | struct nfs4_client *clp; | 1271 | struct nfs4_client *clp; |
1206 | struct sockaddr *sa = svc_addr(rqstp); | 1272 | struct sockaddr *sa = svc_addr(rqstp); |
1207 | char *princ; | 1273 | int ret; |
1208 | 1274 | ||
1209 | clp = alloc_client(name); | 1275 | clp = alloc_client(name); |
1210 | if (clp == NULL) | 1276 | if (clp == NULL) |
1211 | return NULL; | 1277 | return NULL; |
1212 | 1278 | ||
1213 | INIT_LIST_HEAD(&clp->cl_sessions); | 1279 | INIT_LIST_HEAD(&clp->cl_sessions); |
1214 | 1280 | ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); | |
1215 | princ = svc_gss_principal(rqstp); | 1281 | if (ret) { |
1216 | if (princ) { | 1282 | spin_lock(&client_lock); |
1217 | clp->cl_principal = kstrdup(princ, GFP_KERNEL); | 1283 | free_client(clp); |
1218 | if (clp->cl_principal == NULL) { | 1284 | spin_unlock(&client_lock); |
1219 | spin_lock(&client_lock); | 1285 | return NULL; |
1220 | free_client(clp); | ||
1221 | spin_unlock(&client_lock); | ||
1222 | return NULL; | ||
1223 | } | ||
1224 | } | 1286 | } |
1225 | |||
1226 | idr_init(&clp->cl_stateids); | 1287 | idr_init(&clp->cl_stateids); |
1227 | memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); | 1288 | memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); |
1228 | atomic_set(&clp->cl_refcount, 0); | 1289 | atomic_set(&clp->cl_refcount, 0); |
@@ -1240,8 +1301,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, | |||
1240 | rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); | 1301 | rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); |
1241 | copy_verf(clp, verf); | 1302 | copy_verf(clp, verf); |
1242 | rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); | 1303 | rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); |
1243 | clp->cl_flavor = rqstp->rq_flavor; | ||
1244 | copy_cred(&clp->cl_cred, &rqstp->rq_cred); | ||
1245 | gen_confirm(clp); | 1304 | gen_confirm(clp); |
1246 | clp->cl_cb_session = NULL; | 1305 | clp->cl_cb_session = NULL; |
1247 | return clp; | 1306 | return clp; |
@@ -1470,18 +1529,32 @@ nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) | |||
1470 | clid->flags = new->cl_exchange_flags; | 1529 | clid->flags = new->cl_exchange_flags; |
1471 | } | 1530 | } |
1472 | 1531 | ||
1532 | static bool client_has_state(struct nfs4_client *clp) | ||
1533 | { | ||
1534 | /* | ||
1535 | * Note clp->cl_openowners check isn't quite right: there's no | ||
1536 | * need to count owners without stateid's. | ||
1537 | * | ||
1538 | * Also note we should probably be using this in 4.0 case too. | ||
1539 | */ | ||
1540 | return !list_empty(&clp->cl_openowners) | ||
1541 | || !list_empty(&clp->cl_delegations) | ||
1542 | || !list_empty(&clp->cl_sessions); | ||
1543 | } | ||
1544 | |||
1473 | __be32 | 1545 | __be32 |
1474 | nfsd4_exchange_id(struct svc_rqst *rqstp, | 1546 | nfsd4_exchange_id(struct svc_rqst *rqstp, |
1475 | struct nfsd4_compound_state *cstate, | 1547 | struct nfsd4_compound_state *cstate, |
1476 | struct nfsd4_exchange_id *exid) | 1548 | struct nfsd4_exchange_id *exid) |
1477 | { | 1549 | { |
1478 | struct nfs4_client *unconf, *conf, *new; | 1550 | struct nfs4_client *unconf, *conf, *new; |
1479 | int status; | 1551 | __be32 status; |
1480 | unsigned int strhashval; | 1552 | unsigned int strhashval; |
1481 | char dname[HEXDIR_LEN]; | 1553 | char dname[HEXDIR_LEN]; |
1482 | char addr_str[INET6_ADDRSTRLEN]; | 1554 | char addr_str[INET6_ADDRSTRLEN]; |
1483 | nfs4_verifier verf = exid->verifier; | 1555 | nfs4_verifier verf = exid->verifier; |
1484 | struct sockaddr *sa = svc_addr(rqstp); | 1556 | struct sockaddr *sa = svc_addr(rqstp); |
1557 | bool update = exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A; | ||
1485 | 1558 | ||
1486 | rpc_ntop(sa, addr_str, sizeof(addr_str)); | 1559 | rpc_ntop(sa, addr_str, sizeof(addr_str)); |
1487 | dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " | 1560 | dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " |
@@ -1507,71 +1580,63 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, | |||
1507 | status = nfs4_make_rec_clidname(dname, &exid->clname); | 1580 | status = nfs4_make_rec_clidname(dname, &exid->clname); |
1508 | 1581 | ||
1509 | if (status) | 1582 | if (status) |
1510 | goto error; | 1583 | return status; |
1511 | 1584 | ||
1512 | strhashval = clientstr_hashval(dname); | 1585 | strhashval = clientstr_hashval(dname); |
1513 | 1586 | ||
1587 | /* Cases below refer to rfc 5661 section 18.35.4: */ | ||
1514 | nfs4_lock_state(); | 1588 | nfs4_lock_state(); |
1515 | status = nfs_ok; | ||
1516 | |||
1517 | conf = find_confirmed_client_by_str(dname, strhashval); | 1589 | conf = find_confirmed_client_by_str(dname, strhashval); |
1518 | if (conf) { | 1590 | if (conf) { |
1519 | if (!clp_used_exchangeid(conf)) { | 1591 | bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred); |
1520 | status = nfserr_clid_inuse; /* XXX: ? */ | 1592 | bool verfs_match = same_verf(&verf, &conf->cl_verifier); |
1521 | goto out; | 1593 | |
1522 | } | 1594 | if (update) { |
1523 | if (!same_verf(&verf, &conf->cl_verifier)) { | 1595 | if (!clp_used_exchangeid(conf)) { /* buggy client */ |
1524 | /* 18.35.4 case 8 */ | 1596 | status = nfserr_inval; |
1525 | if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { | 1597 | goto out; |
1598 | } | ||
1599 | if (!creds_match) { /* case 9 */ | ||
1600 | status = nfserr_perm; | ||
1601 | goto out; | ||
1602 | } | ||
1603 | if (!verfs_match) { /* case 8 */ | ||
1526 | status = nfserr_not_same; | 1604 | status = nfserr_not_same; |
1527 | goto out; | 1605 | goto out; |
1528 | } | 1606 | } |
1529 | /* Client reboot: destroy old state */ | 1607 | /* case 6 */ |
1530 | expire_client(conf); | 1608 | exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; |
1531 | goto out_new; | 1609 | new = conf; |
1610 | goto out_copy; | ||
1532 | } | 1611 | } |
1533 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { | 1612 | if (!creds_match) { /* case 3 */ |
1534 | /* 18.35.4 case 9 */ | 1613 | if (client_has_state(conf)) { |
1535 | if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { | 1614 | status = nfserr_clid_inuse; |
1536 | status = nfserr_perm; | ||
1537 | goto out; | 1615 | goto out; |
1538 | } | 1616 | } |
1539 | expire_client(conf); | 1617 | expire_client(conf); |
1540 | goto out_new; | 1618 | goto out_new; |
1541 | } | 1619 | } |
1542 | /* | 1620 | if (verfs_match) { /* case 2 */ |
1543 | * Set bit when the owner id and verifier map to an already | 1621 | conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R; |
1544 | * confirmed client id (18.35.3). | 1622 | new = conf; |
1545 | */ | 1623 | goto out_copy; |
1546 | exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; | 1624 | } |
1547 | 1625 | /* case 5, client reboot */ | |
1548 | /* | 1626 | goto out_new; |
1549 | * Falling into 18.35.4 case 2, possible router replay. | ||
1550 | * Leave confirmed record intact and return same result. | ||
1551 | */ | ||
1552 | copy_verf(conf, &verf); | ||
1553 | new = conf; | ||
1554 | goto out_copy; | ||
1555 | } | 1627 | } |
1556 | 1628 | ||
1557 | /* 18.35.4 case 7 */ | 1629 | if (update) { /* case 7 */ |
1558 | if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { | ||
1559 | status = nfserr_noent; | 1630 | status = nfserr_noent; |
1560 | goto out; | 1631 | goto out; |
1561 | } | 1632 | } |
1562 | 1633 | ||
1563 | unconf = find_unconfirmed_client_by_str(dname, strhashval); | 1634 | unconf = find_unconfirmed_client_by_str(dname, strhashval); |
1564 | if (unconf) { | 1635 | if (unconf) /* case 4, possible retry or client restart */ |
1565 | /* | ||
1566 | * Possible retry or client restart. Per 18.35.4 case 4, | ||
1567 | * a new unconfirmed record should be generated regardless | ||
1568 | * of whether any properties have changed. | ||
1569 | */ | ||
1570 | expire_client(unconf); | 1636 | expire_client(unconf); |
1571 | } | ||
1572 | 1637 | ||
1638 | /* case 1 (normal case) */ | ||
1573 | out_new: | 1639 | out_new: |
1574 | /* Normal case */ | ||
1575 | new = create_client(exid->clname, dname, rqstp, &verf); | 1640 | new = create_client(exid->clname, dname, rqstp, &verf); |
1576 | if (new == NULL) { | 1641 | if (new == NULL) { |
1577 | status = nfserr_jukebox; | 1642 | status = nfserr_jukebox; |
@@ -1584,7 +1649,7 @@ out_copy: | |||
1584 | exid->clientid.cl_boot = new->cl_clientid.cl_boot; | 1649 | exid->clientid.cl_boot = new->cl_clientid.cl_boot; |
1585 | exid->clientid.cl_id = new->cl_clientid.cl_id; | 1650 | exid->clientid.cl_id = new->cl_clientid.cl_id; |
1586 | 1651 | ||
1587 | exid->seqid = 1; | 1652 | exid->seqid = new->cl_cs_slot.sl_seqid + 1; |
1588 | nfsd4_set_ex_flags(new, exid); | 1653 | nfsd4_set_ex_flags(new, exid); |
1589 | 1654 | ||
1590 | dprintk("nfsd4_exchange_id seqid %d flags %x\n", | 1655 | dprintk("nfsd4_exchange_id seqid %d flags %x\n", |
@@ -1593,12 +1658,10 @@ out_copy: | |||
1593 | 1658 | ||
1594 | out: | 1659 | out: |
1595 | nfs4_unlock_state(); | 1660 | nfs4_unlock_state(); |
1596 | error: | ||
1597 | dprintk("nfsd4_exchange_id returns %d\n", ntohl(status)); | ||
1598 | return status; | 1661 | return status; |
1599 | } | 1662 | } |
1600 | 1663 | ||
1601 | static int | 1664 | static __be32 |
1602 | check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) | 1665 | check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) |
1603 | { | 1666 | { |
1604 | dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid, | 1667 | dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid, |
@@ -1626,7 +1689,7 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) | |||
1626 | */ | 1689 | */ |
1627 | static void | 1690 | static void |
1628 | nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses, | 1691 | nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses, |
1629 | struct nfsd4_clid_slot *slot, int nfserr) | 1692 | struct nfsd4_clid_slot *slot, __be32 nfserr) |
1630 | { | 1693 | { |
1631 | slot->sl_status = nfserr; | 1694 | slot->sl_status = nfserr; |
1632 | memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses)); | 1695 | memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses)); |
@@ -1657,7 +1720,7 @@ nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, | |||
1657 | /* seqid, slotID, slotID, slotID, status */ \ | 1720 | /* seqid, slotID, slotID, slotID, status */ \ |
1658 | 5 ) * sizeof(__be32)) | 1721 | 5 ) * sizeof(__be32)) |
1659 | 1722 | ||
1660 | static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs fchannel) | 1723 | static bool check_forechannel_attrs(struct nfsd4_channel_attrs fchannel) |
1661 | { | 1724 | { |
1662 | return fchannel.maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ | 1725 | return fchannel.maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ |
1663 | || fchannel.maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ; | 1726 | || fchannel.maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ; |
@@ -1673,7 +1736,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1673 | struct nfsd4_session *new; | 1736 | struct nfsd4_session *new; |
1674 | struct nfsd4_clid_slot *cs_slot = NULL; | 1737 | struct nfsd4_clid_slot *cs_slot = NULL; |
1675 | bool confirm_me = false; | 1738 | bool confirm_me = false; |
1676 | int status = 0; | 1739 | __be32 status = 0; |
1677 | 1740 | ||
1678 | if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) | 1741 | if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) |
1679 | return nfserr_inval; | 1742 | return nfserr_inval; |
@@ -1686,16 +1749,10 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1686 | cs_slot = &conf->cl_cs_slot; | 1749 | cs_slot = &conf->cl_cs_slot; |
1687 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); | 1750 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); |
1688 | if (status == nfserr_replay_cache) { | 1751 | if (status == nfserr_replay_cache) { |
1689 | dprintk("Got a create_session replay! seqid= %d\n", | ||
1690 | cs_slot->sl_seqid); | ||
1691 | /* Return the cached reply status */ | ||
1692 | status = nfsd4_replay_create_session(cr_ses, cs_slot); | 1752 | status = nfsd4_replay_create_session(cr_ses, cs_slot); |
1693 | goto out; | 1753 | goto out; |
1694 | } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) { | 1754 | } else if (cr_ses->seqid != cs_slot->sl_seqid + 1) { |
1695 | status = nfserr_seq_misordered; | 1755 | status = nfserr_seq_misordered; |
1696 | dprintk("Sequence misordered!\n"); | ||
1697 | dprintk("Expected seqid= %d but got seqid= %d\n", | ||
1698 | cs_slot->sl_seqid, cr_ses->seqid); | ||
1699 | goto out; | 1756 | goto out; |
1700 | } | 1757 | } |
1701 | } else if (unconf) { | 1758 | } else if (unconf) { |
@@ -1704,7 +1761,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1704 | status = nfserr_clid_inuse; | 1761 | status = nfserr_clid_inuse; |
1705 | goto out; | 1762 | goto out; |
1706 | } | 1763 | } |
1707 | |||
1708 | cs_slot = &unconf->cl_cs_slot; | 1764 | cs_slot = &unconf->cl_cs_slot; |
1709 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); | 1765 | status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); |
1710 | if (status) { | 1766 | if (status) { |
@@ -1712,7 +1768,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1712 | status = nfserr_seq_misordered; | 1768 | status = nfserr_seq_misordered; |
1713 | goto out; | 1769 | goto out; |
1714 | } | 1770 | } |
1715 | |||
1716 | confirm_me = true; | 1771 | confirm_me = true; |
1717 | conf = unconf; | 1772 | conf = unconf; |
1718 | } else { | 1773 | } else { |
@@ -1749,8 +1804,14 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1749 | 1804 | ||
1750 | /* cache solo and embedded create sessions under the state lock */ | 1805 | /* cache solo and embedded create sessions under the state lock */ |
1751 | nfsd4_cache_create_session(cr_ses, cs_slot, status); | 1806 | nfsd4_cache_create_session(cr_ses, cs_slot, status); |
1752 | if (confirm_me) | 1807 | if (confirm_me) { |
1808 | unsigned int hash = clientstr_hashval(unconf->cl_recdir); | ||
1809 | struct nfs4_client *old = | ||
1810 | find_confirmed_client_by_str(conf->cl_recdir, hash); | ||
1811 | if (old) | ||
1812 | expire_client(old); | ||
1753 | move_to_confirmed(conf); | 1813 | move_to_confirmed(conf); |
1814 | } | ||
1754 | out: | 1815 | out: |
1755 | nfs4_unlock_state(); | 1816 | nfs4_unlock_state(); |
1756 | dprintk("%s returns %d\n", __func__, ntohl(status)); | 1817 | dprintk("%s returns %d\n", __func__, ntohl(status)); |
@@ -1818,7 +1879,7 @@ nfsd4_destroy_session(struct svc_rqst *r, | |||
1818 | struct nfsd4_destroy_session *sessionid) | 1879 | struct nfsd4_destroy_session *sessionid) |
1819 | { | 1880 | { |
1820 | struct nfsd4_session *ses; | 1881 | struct nfsd4_session *ses; |
1821 | u32 status = nfserr_badsession; | 1882 | __be32 status = nfserr_badsession; |
1822 | 1883 | ||
1823 | /* Notes: | 1884 | /* Notes: |
1824 | * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid | 1885 | * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid |
@@ -1914,7 +1975,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, | |||
1914 | struct nfsd4_session *session; | 1975 | struct nfsd4_session *session; |
1915 | struct nfsd4_slot *slot; | 1976 | struct nfsd4_slot *slot; |
1916 | struct nfsd4_conn *conn; | 1977 | struct nfsd4_conn *conn; |
1917 | int status; | 1978 | __be32 status; |
1918 | 1979 | ||
1919 | if (resp->opcnt != 1) | 1980 | if (resp->opcnt != 1) |
1920 | return nfserr_sequence_pos; | 1981 | return nfserr_sequence_pos; |
@@ -2008,18 +2069,11 @@ out: | |||
2008 | return status; | 2069 | return status; |
2009 | } | 2070 | } |
2010 | 2071 | ||
2011 | static inline bool has_resources(struct nfs4_client *clp) | ||
2012 | { | ||
2013 | return !list_empty(&clp->cl_openowners) | ||
2014 | || !list_empty(&clp->cl_delegations) | ||
2015 | || !list_empty(&clp->cl_sessions); | ||
2016 | } | ||
2017 | |||
2018 | __be32 | 2072 | __be32 |
2019 | nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc) | 2073 | nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc) |
2020 | { | 2074 | { |
2021 | struct nfs4_client *conf, *unconf, *clp; | 2075 | struct nfs4_client *conf, *unconf, *clp; |
2022 | int status = 0; | 2076 | __be32 status = 0; |
2023 | 2077 | ||
2024 | nfs4_lock_state(); | 2078 | nfs4_lock_state(); |
2025 | unconf = find_unconfirmed_client(&dc->clientid); | 2079 | unconf = find_unconfirmed_client(&dc->clientid); |
@@ -2028,7 +2082,7 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta | |||
2028 | if (conf) { | 2082 | if (conf) { |
2029 | clp = conf; | 2083 | clp = conf; |
2030 | 2084 | ||
2031 | if (!is_client_expired(conf) && has_resources(conf)) { | 2085 | if (!is_client_expired(conf) && client_has_state(conf)) { |
2032 | status = nfserr_clientid_busy; | 2086 | status = nfserr_clientid_busy; |
2033 | goto out; | 2087 | goto out; |
2034 | } | 2088 | } |
@@ -2055,7 +2109,7 @@ out: | |||
2055 | __be32 | 2109 | __be32 |
2056 | nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) | 2110 | nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) |
2057 | { | 2111 | { |
2058 | int status = 0; | 2112 | __be32 status = 0; |
2059 | 2113 | ||
2060 | if (rc->rca_one_fs) { | 2114 | if (rc->rca_one_fs) { |
2061 | if (!cstate->current_fh.fh_dentry) | 2115 | if (!cstate->current_fh.fh_dentry) |
@@ -2106,17 +2160,13 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2106 | if (status) | 2160 | if (status) |
2107 | return status; | 2161 | return status; |
2108 | 2162 | ||
2109 | /* | ||
2110 | * XXX The Duplicate Request Cache (DRC) has been checked (??) | ||
2111 | * We get here on a DRC miss. | ||
2112 | */ | ||
2113 | |||
2114 | strhashval = clientstr_hashval(dname); | 2163 | strhashval = clientstr_hashval(dname); |
2115 | 2164 | ||
2165 | /* Cases below refer to rfc 3530 section 14.2.33: */ | ||
2116 | nfs4_lock_state(); | 2166 | nfs4_lock_state(); |
2117 | conf = find_confirmed_client_by_str(dname, strhashval); | 2167 | conf = find_confirmed_client_by_str(dname, strhashval); |
2118 | if (conf) { | 2168 | if (conf) { |
2119 | /* RFC 3530 14.2.33 CASE 0: */ | 2169 | /* case 0: */ |
2120 | status = nfserr_clid_inuse; | 2170 | status = nfserr_clid_inuse; |
2121 | if (clp_used_exchangeid(conf)) | 2171 | if (clp_used_exchangeid(conf)) |
2122 | goto out; | 2172 | goto out; |
@@ -2129,63 +2179,18 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2129 | goto out; | 2179 | goto out; |
2130 | } | 2180 | } |
2131 | } | 2181 | } |
2132 | /* | ||
2133 | * section 14.2.33 of RFC 3530 (under the heading "IMPLEMENTATION") | ||
2134 | * has a description of SETCLIENTID request processing consisting | ||
2135 | * of 5 bullet points, labeled as CASE0 - CASE4 below. | ||
2136 | */ | ||
2137 | unconf = find_unconfirmed_client_by_str(dname, strhashval); | 2182 | unconf = find_unconfirmed_client_by_str(dname, strhashval); |
2183 | if (unconf) | ||
2184 | expire_client(unconf); | ||
2138 | status = nfserr_jukebox; | 2185 | status = nfserr_jukebox; |
2139 | if (!conf) { | 2186 | new = create_client(clname, dname, rqstp, &clverifier); |
2140 | /* | 2187 | if (new == NULL) |
2141 | * RFC 3530 14.2.33 CASE 4: | 2188 | goto out; |
2142 | * placed first, because it is the normal case | 2189 | if (conf && same_verf(&conf->cl_verifier, &clverifier)) |
2143 | */ | 2190 | /* case 1: probable callback update */ |
2144 | if (unconf) | ||
2145 | expire_client(unconf); | ||
2146 | new = create_client(clname, dname, rqstp, &clverifier); | ||
2147 | if (new == NULL) | ||
2148 | goto out; | ||
2149 | gen_clid(new); | ||
2150 | } else if (same_verf(&conf->cl_verifier, &clverifier)) { | ||
2151 | /* | ||
2152 | * RFC 3530 14.2.33 CASE 1: | ||
2153 | * probable callback update | ||
2154 | */ | ||
2155 | if (unconf) { | ||
2156 | /* Note this is removing unconfirmed {*x***}, | ||
2157 | * which is stronger than RFC recommended {vxc**}. | ||
2158 | * This has the advantage that there is at most | ||
2159 | * one {*x***} in either list at any time. | ||
2160 | */ | ||
2161 | expire_client(unconf); | ||
2162 | } | ||
2163 | new = create_client(clname, dname, rqstp, &clverifier); | ||
2164 | if (new == NULL) | ||
2165 | goto out; | ||
2166 | copy_clid(new, conf); | 2191 | copy_clid(new, conf); |
2167 | } else if (!unconf) { | 2192 | else /* case 4 (new client) or cases 2, 3 (client reboot): */ |
2168 | /* | ||
2169 | * RFC 3530 14.2.33 CASE 2: | ||
2170 | * probable client reboot; state will be removed if | ||
2171 | * confirmed. | ||
2172 | */ | ||
2173 | new = create_client(clname, dname, rqstp, &clverifier); | ||
2174 | if (new == NULL) | ||
2175 | goto out; | ||
2176 | gen_clid(new); | ||
2177 | } else { | ||
2178 | /* | ||
2179 | * RFC 3530 14.2.33 CASE 3: | ||
2180 | * probable client reboot; state will be removed if | ||
2181 | * confirmed. | ||
2182 | */ | ||
2183 | expire_client(unconf); | ||
2184 | new = create_client(clname, dname, rqstp, &clverifier); | ||
2185 | if (new == NULL) | ||
2186 | goto out; | ||
2187 | gen_clid(new); | 2193 | gen_clid(new); |
2188 | } | ||
2189 | /* | 2194 | /* |
2190 | * XXX: we should probably set this at creation time, and check | 2195 | * XXX: we should probably set this at creation time, and check |
2191 | * for consistent minorversion use throughout: | 2196 | * for consistent minorversion use throughout: |
@@ -2203,17 +2208,11 @@ out: | |||
2203 | } | 2208 | } |
2204 | 2209 | ||
2205 | 2210 | ||
2206 | /* | ||
2207 | * Section 14.2.34 of RFC 3530 (under the heading "IMPLEMENTATION") has | ||
2208 | * a description of SETCLIENTID_CONFIRM request processing consisting of 4 | ||
2209 | * bullets, labeled as CASE1 - CASE4 below. | ||
2210 | */ | ||
2211 | __be32 | 2211 | __be32 |
2212 | nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | 2212 | nfsd4_setclientid_confirm(struct svc_rqst *rqstp, |
2213 | struct nfsd4_compound_state *cstate, | 2213 | struct nfsd4_compound_state *cstate, |
2214 | struct nfsd4_setclientid_confirm *setclientid_confirm) | 2214 | struct nfsd4_setclientid_confirm *setclientid_confirm) |
2215 | { | 2215 | { |
2216 | struct sockaddr *sa = svc_addr(rqstp); | ||
2217 | struct nfs4_client *conf, *unconf; | 2216 | struct nfs4_client *conf, *unconf; |
2218 | nfs4_verifier confirm = setclientid_confirm->sc_confirm; | 2217 | nfs4_verifier confirm = setclientid_confirm->sc_confirm; |
2219 | clientid_t * clid = &setclientid_confirm->sc_clientid; | 2218 | clientid_t * clid = &setclientid_confirm->sc_clientid; |
@@ -2221,84 +2220,44 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
2221 | 2220 | ||
2222 | if (STALE_CLIENTID(clid)) | 2221 | if (STALE_CLIENTID(clid)) |
2223 | return nfserr_stale_clientid; | 2222 | return nfserr_stale_clientid; |
2224 | /* | ||
2225 | * XXX The Duplicate Request Cache (DRC) has been checked (??) | ||
2226 | * We get here on a DRC miss. | ||
2227 | */ | ||
2228 | |||
2229 | nfs4_lock_state(); | 2223 | nfs4_lock_state(); |
2230 | 2224 | ||
2231 | conf = find_confirmed_client(clid); | 2225 | conf = find_confirmed_client(clid); |
2232 | unconf = find_unconfirmed_client(clid); | 2226 | unconf = find_unconfirmed_client(clid); |
2233 | |||
2234 | status = nfserr_clid_inuse; | ||
2235 | if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa)) | ||
2236 | goto out; | ||
2237 | if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa)) | ||
2238 | goto out; | ||
2239 | |||
2240 | /* | 2227 | /* |
2241 | * section 14.2.34 of RFC 3530 has a description of | 2228 | * We try hard to give out unique clientid's, so if we get an |
2242 | * SETCLIENTID_CONFIRM request processing consisting | 2229 | * attempt to confirm the same clientid with a different cred, |
2243 | * of 4 bullet points, labeled as CASE1 - CASE4 below. | 2230 | * there's a bug somewhere. Let's charitably assume it's our |
2231 | * bug. | ||
2244 | */ | 2232 | */ |
2245 | if (conf && unconf && same_verf(&confirm, &unconf->cl_confirm)) { | 2233 | status = nfserr_serverfault; |
2246 | /* | 2234 | if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) |
2247 | * RFC 3530 14.2.34 CASE 1: | 2235 | goto out; |
2248 | * callback update | 2236 | if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) |
2249 | */ | 2237 | goto out; |
2250 | if (!same_creds(&conf->cl_cred, &unconf->cl_cred)) | 2238 | /* cases below refer to rfc 3530 section 14.2.34: */ |
2251 | status = nfserr_clid_inuse; | 2239 | if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) { |
2252 | else { | 2240 | if (conf && !unconf) /* case 2: probable retransmit */ |
2253 | nfsd4_change_callback(conf, &unconf->cl_cb_conn); | ||
2254 | nfsd4_probe_callback(conf); | ||
2255 | expire_client(unconf); | ||
2256 | status = nfs_ok; | 2241 | status = nfs_ok; |
2242 | else /* case 4: client hasn't noticed we rebooted yet? */ | ||
2243 | status = nfserr_stale_clientid; | ||
2244 | goto out; | ||
2245 | } | ||
2246 | status = nfs_ok; | ||
2247 | if (conf) { /* case 1: callback update */ | ||
2248 | nfsd4_change_callback(conf, &unconf->cl_cb_conn); | ||
2249 | nfsd4_probe_callback(conf); | ||
2250 | expire_client(unconf); | ||
2251 | } else { /* case 3: normal case; new or rebooted client */ | ||
2252 | unsigned int hash = clientstr_hashval(unconf->cl_recdir); | ||
2257 | 2253 | ||
2254 | conf = find_confirmed_client_by_str(unconf->cl_recdir, hash); | ||
2255 | if (conf) { | ||
2256 | nfsd4_client_record_remove(conf); | ||
2257 | expire_client(conf); | ||
2258 | } | 2258 | } |
2259 | } else if (conf && !unconf) { | 2259 | move_to_confirmed(unconf); |
2260 | /* | 2260 | nfsd4_probe_callback(unconf); |
2261 | * RFC 3530 14.2.34 CASE 2: | ||
2262 | * probable retransmitted request; play it safe and | ||
2263 | * do nothing. | ||
2264 | */ | ||
2265 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) | ||
2266 | status = nfserr_clid_inuse; | ||
2267 | else | ||
2268 | status = nfs_ok; | ||
2269 | } else if (!conf && unconf | ||
2270 | && same_verf(&unconf->cl_confirm, &confirm)) { | ||
2271 | /* | ||
2272 | * RFC 3530 14.2.34 CASE 3: | ||
2273 | * Normal case; new or rebooted client: | ||
2274 | */ | ||
2275 | if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) { | ||
2276 | status = nfserr_clid_inuse; | ||
2277 | } else { | ||
2278 | unsigned int hash = | ||
2279 | clientstr_hashval(unconf->cl_recdir); | ||
2280 | conf = find_confirmed_client_by_str(unconf->cl_recdir, | ||
2281 | hash); | ||
2282 | if (conf) { | ||
2283 | nfsd4_client_record_remove(conf); | ||
2284 | expire_client(conf); | ||
2285 | } | ||
2286 | move_to_confirmed(unconf); | ||
2287 | conf = unconf; | ||
2288 | nfsd4_probe_callback(conf); | ||
2289 | status = nfs_ok; | ||
2290 | } | ||
2291 | } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) | ||
2292 | && (!unconf || (unconf && !same_verf(&unconf->cl_confirm, | ||
2293 | &confirm)))) { | ||
2294 | /* | ||
2295 | * RFC 3530 14.2.34 CASE 4: | ||
2296 | * Client probably hasn't noticed that we rebooted yet. | ||
2297 | */ | ||
2298 | status = nfserr_stale_clientid; | ||
2299 | } else { | ||
2300 | /* check that we have hit one of the cases...*/ | ||
2301 | status = nfserr_clid_inuse; | ||
2302 | } | 2261 | } |
2303 | out: | 2262 | out: |
2304 | nfs4_unlock_state(); | 2263 | nfs4_unlock_state(); |
@@ -2454,8 +2413,8 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, | |||
2454 | stp->st_file = fp; | 2413 | stp->st_file = fp; |
2455 | stp->st_access_bmap = 0; | 2414 | stp->st_access_bmap = 0; |
2456 | stp->st_deny_bmap = 0; | 2415 | stp->st_deny_bmap = 0; |
2457 | __set_bit(open->op_share_access, &stp->st_access_bmap); | 2416 | set_access(open->op_share_access, stp); |
2458 | __set_bit(open->op_share_deny, &stp->st_deny_bmap); | 2417 | set_deny(open->op_share_deny, stp); |
2459 | stp->st_openstp = NULL; | 2418 | stp->st_openstp = NULL; |
2460 | } | 2419 | } |
2461 | 2420 | ||
@@ -2534,8 +2493,8 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) | |||
2534 | ret = nfserr_locked; | 2493 | ret = nfserr_locked; |
2535 | /* Search for conflicting share reservations */ | 2494 | /* Search for conflicting share reservations */ |
2536 | list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { | 2495 | list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { |
2537 | if (test_bit(deny_type, &stp->st_deny_bmap) || | 2496 | if (test_deny(deny_type, stp) || |
2538 | test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap)) | 2497 | test_deny(NFS4_SHARE_DENY_BOTH, stp)) |
2539 | goto out; | 2498 | goto out; |
2540 | } | 2499 | } |
2541 | ret = nfs_ok; | 2500 | ret = nfs_ok; |
@@ -2791,7 +2750,7 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c | |||
2791 | bool new_access; | 2750 | bool new_access; |
2792 | __be32 status; | 2751 | __be32 status; |
2793 | 2752 | ||
2794 | new_access = !test_bit(op_share_access, &stp->st_access_bmap); | 2753 | new_access = !test_access(op_share_access, stp); |
2795 | if (new_access) { | 2754 | if (new_access) { |
2796 | status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open); | 2755 | status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open); |
2797 | if (status) | 2756 | if (status) |
@@ -2806,8 +2765,8 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c | |||
2806 | return status; | 2765 | return status; |
2807 | } | 2766 | } |
2808 | /* remember the open */ | 2767 | /* remember the open */ |
2809 | __set_bit(op_share_access, &stp->st_access_bmap); | 2768 | set_access(op_share_access, stp); |
2810 | __set_bit(open->op_share_deny, &stp->st_deny_bmap); | 2769 | set_deny(open->op_share_deny, stp); |
2811 | 2770 | ||
2812 | return nfs_ok; | 2771 | return nfs_ok; |
2813 | } | 2772 | } |
@@ -3282,18 +3241,18 @@ STALE_STATEID(stateid_t *stateid) | |||
3282 | } | 3241 | } |
3283 | 3242 | ||
3284 | static inline int | 3243 | static inline int |
3285 | access_permit_read(unsigned long access_bmap) | 3244 | access_permit_read(struct nfs4_ol_stateid *stp) |
3286 | { | 3245 | { |
3287 | return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) || | 3246 | return test_access(NFS4_SHARE_ACCESS_READ, stp) || |
3288 | test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap) || | 3247 | test_access(NFS4_SHARE_ACCESS_BOTH, stp) || |
3289 | test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap); | 3248 | test_access(NFS4_SHARE_ACCESS_WRITE, stp); |
3290 | } | 3249 | } |
3291 | 3250 | ||
3292 | static inline int | 3251 | static inline int |
3293 | access_permit_write(unsigned long access_bmap) | 3252 | access_permit_write(struct nfs4_ol_stateid *stp) |
3294 | { | 3253 | { |
3295 | return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) || | 3254 | return test_access(NFS4_SHARE_ACCESS_WRITE, stp) || |
3296 | test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap); | 3255 | test_access(NFS4_SHARE_ACCESS_BOTH, stp); |
3297 | } | 3256 | } |
3298 | 3257 | ||
3299 | static | 3258 | static |
@@ -3304,9 +3263,9 @@ __be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags) | |||
3304 | /* For lock stateid's, we test the parent open, not the lock: */ | 3263 | /* For lock stateid's, we test the parent open, not the lock: */ |
3305 | if (stp->st_openstp) | 3264 | if (stp->st_openstp) |
3306 | stp = stp->st_openstp; | 3265 | stp = stp->st_openstp; |
3307 | if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap))) | 3266 | if ((flags & WR_STATE) && !access_permit_write(stp)) |
3308 | goto out; | 3267 | goto out; |
3309 | if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap))) | 3268 | if ((flags & RD_STATE) && !access_permit_read(stp)) |
3310 | goto out; | 3269 | goto out; |
3311 | status = nfs_ok; | 3270 | status = nfs_ok; |
3312 | out: | 3271 | out: |
@@ -3346,7 +3305,7 @@ static bool stateid_generation_after(stateid_t *a, stateid_t *b) | |||
3346 | return (s32)a->si_generation - (s32)b->si_generation > 0; | 3305 | return (s32)a->si_generation - (s32)b->si_generation > 0; |
3347 | } | 3306 | } |
3348 | 3307 | ||
3349 | static int check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) | 3308 | static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) |
3350 | { | 3309 | { |
3351 | /* | 3310 | /* |
3352 | * When sessions are used the stateid generation number is ignored | 3311 | * When sessions are used the stateid generation number is ignored |
@@ -3655,10 +3614,10 @@ out: | |||
3655 | 3614 | ||
3656 | static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 access) | 3615 | static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 access) |
3657 | { | 3616 | { |
3658 | if (!test_bit(access, &stp->st_access_bmap)) | 3617 | if (!test_access(access, stp)) |
3659 | return; | 3618 | return; |
3660 | nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access)); | 3619 | nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access)); |
3661 | __clear_bit(access, &stp->st_access_bmap); | 3620 | clear_access(access, stp); |
3662 | } | 3621 | } |
3663 | 3622 | ||
3664 | static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_access) | 3623 | static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_access) |
@@ -3680,12 +3639,12 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac | |||
3680 | } | 3639 | } |
3681 | 3640 | ||
3682 | static void | 3641 | static void |
3683 | reset_union_bmap_deny(unsigned long deny, unsigned long *bmap) | 3642 | reset_union_bmap_deny(unsigned long deny, struct nfs4_ol_stateid *stp) |
3684 | { | 3643 | { |
3685 | int i; | 3644 | int i; |
3686 | for (i = 0; i < 4; i++) { | 3645 | for (i = 0; i < 4; i++) { |
3687 | if ((i & deny) != i) | 3646 | if ((i & deny) != i) |
3688 | __clear_bit(i, bmap); | 3647 | clear_deny(i, stp); |
3689 | } | 3648 | } |
3690 | } | 3649 | } |
3691 | 3650 | ||
@@ -3712,19 +3671,19 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, | |||
3712 | if (status) | 3671 | if (status) |
3713 | goto out; | 3672 | goto out; |
3714 | status = nfserr_inval; | 3673 | status = nfserr_inval; |
3715 | if (!test_bit(od->od_share_access, &stp->st_access_bmap)) { | 3674 | if (!test_access(od->od_share_access, stp)) { |
3716 | dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n", | 3675 | dprintk("NFSD: access not a subset current bitmap: 0x%lx, input access=%08x\n", |
3717 | stp->st_access_bmap, od->od_share_access); | 3676 | stp->st_access_bmap, od->od_share_access); |
3718 | goto out; | 3677 | goto out; |
3719 | } | 3678 | } |
3720 | if (!test_bit(od->od_share_deny, &stp->st_deny_bmap)) { | 3679 | if (!test_deny(od->od_share_deny, stp)) { |
3721 | dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n", | 3680 | dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n", |
3722 | stp->st_deny_bmap, od->od_share_deny); | 3681 | stp->st_deny_bmap, od->od_share_deny); |
3723 | goto out; | 3682 | goto out; |
3724 | } | 3683 | } |
3725 | nfs4_stateid_downgrade(stp, od->od_share_access); | 3684 | nfs4_stateid_downgrade(stp, od->od_share_access); |
3726 | 3685 | ||
3727 | reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); | 3686 | reset_union_bmap_deny(od->od_share_deny, stp); |
3728 | 3687 | ||
3729 | update_stateid(&stp->st_stid.sc_stateid); | 3688 | update_stateid(&stp->st_stid.sc_stateid); |
3730 | memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); | 3689 | memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); |
@@ -4014,13 +3973,13 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) | |||
4014 | struct nfs4_file *fp = lock_stp->st_file; | 3973 | struct nfs4_file *fp = lock_stp->st_file; |
4015 | int oflag = nfs4_access_to_omode(access); | 3974 | int oflag = nfs4_access_to_omode(access); |
4016 | 3975 | ||
4017 | if (test_bit(access, &lock_stp->st_access_bmap)) | 3976 | if (test_access(access, lock_stp)) |
4018 | return; | 3977 | return; |
4019 | nfs4_file_get_access(fp, oflag); | 3978 | nfs4_file_get_access(fp, oflag); |
4020 | __set_bit(access, &lock_stp->st_access_bmap); | 3979 | set_access(access, lock_stp); |
4021 | } | 3980 | } |
4022 | 3981 | ||
4023 | __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) | 3982 | static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) |
4024 | { | 3983 | { |
4025 | struct nfs4_file *fi = ost->st_file; | 3984 | struct nfs4_file *fi = ost->st_file; |
4026 | struct nfs4_openowner *oo = openowner(ost->st_stateowner); | 3985 | struct nfs4_openowner *oo = openowner(ost->st_stateowner); |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 74c00bc92b9a..4949667c84ea 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -1674,12 +1674,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) | |||
1674 | 1674 | ||
1675 | static void write32(__be32 **p, u32 n) | 1675 | static void write32(__be32 **p, u32 n) |
1676 | { | 1676 | { |
1677 | *(*p)++ = n; | 1677 | *(*p)++ = htonl(n); |
1678 | } | 1678 | } |
1679 | 1679 | ||
1680 | static void write64(__be32 **p, u64 n) | 1680 | static void write64(__be32 **p, u64 n) |
1681 | { | 1681 | { |
1682 | write32(p, (u32)(n >> 32)); | 1682 | write32(p, (n >> 32)); |
1683 | write32(p, (u32)n); | 1683 | write32(p, (u32)n); |
1684 | } | 1684 | } |
1685 | 1685 | ||
@@ -1744,15 +1744,16 @@ static void encode_seqid_op_tail(struct nfsd4_compoundres *resp, __be32 *save, _ | |||
1744 | } | 1744 | } |
1745 | 1745 | ||
1746 | /* Encode as an array of strings the string given with components | 1746 | /* Encode as an array of strings the string given with components |
1747 | * separated @sep. | 1747 | * separated @sep, escaped with esc_enter and esc_exit. |
1748 | */ | 1748 | */ |
1749 | static __be32 nfsd4_encode_components(char sep, char *components, | 1749 | static __be32 nfsd4_encode_components_esc(char sep, char *components, |
1750 | __be32 **pp, int *buflen) | 1750 | __be32 **pp, int *buflen, |
1751 | char esc_enter, char esc_exit) | ||
1751 | { | 1752 | { |
1752 | __be32 *p = *pp; | 1753 | __be32 *p = *pp; |
1753 | __be32 *countp = p; | 1754 | __be32 *countp = p; |
1754 | int strlen, count=0; | 1755 | int strlen, count=0; |
1755 | char *str, *end; | 1756 | char *str, *end, *next; |
1756 | 1757 | ||
1757 | dprintk("nfsd4_encode_components(%s)\n", components); | 1758 | dprintk("nfsd4_encode_components(%s)\n", components); |
1758 | if ((*buflen -= 4) < 0) | 1759 | if ((*buflen -= 4) < 0) |
@@ -1760,8 +1761,23 @@ static __be32 nfsd4_encode_components(char sep, char *components, | |||
1760 | WRITE32(0); /* We will fill this in with @count later */ | 1761 | WRITE32(0); /* We will fill this in with @count later */ |
1761 | end = str = components; | 1762 | end = str = components; |
1762 | while (*end) { | 1763 | while (*end) { |
1763 | for (; *end && (*end != sep); end++) | 1764 | bool found_esc = false; |
1764 | ; /* Point to end of component */ | 1765 | |
1766 | /* try to parse as esc_start, ..., esc_end, sep */ | ||
1767 | if (*str == esc_enter) { | ||
1768 | for (; *end && (*end != esc_exit); end++) | ||
1769 | /* find esc_exit or end of string */; | ||
1770 | next = end + 1; | ||
1771 | if (*end && (!*next || *next == sep)) { | ||
1772 | str++; | ||
1773 | found_esc = true; | ||
1774 | } | ||
1775 | } | ||
1776 | |||
1777 | if (!found_esc) | ||
1778 | for (; *end && (*end != sep); end++) | ||
1779 | /* find sep or end of string */; | ||
1780 | |||
1765 | strlen = end - str; | 1781 | strlen = end - str; |
1766 | if (strlen) { | 1782 | if (strlen) { |
1767 | if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0) | 1783 | if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0) |
@@ -1780,6 +1796,15 @@ static __be32 nfsd4_encode_components(char sep, char *components, | |||
1780 | return 0; | 1796 | return 0; |
1781 | } | 1797 | } |
1782 | 1798 | ||
1799 | /* Encode as an array of strings the string given with components | ||
1800 | * separated @sep. | ||
1801 | */ | ||
1802 | static __be32 nfsd4_encode_components(char sep, char *components, | ||
1803 | __be32 **pp, int *buflen) | ||
1804 | { | ||
1805 | return nfsd4_encode_components_esc(sep, components, pp, buflen, 0, 0); | ||
1806 | } | ||
1807 | |||
1783 | /* | 1808 | /* |
1784 | * encode a location element of a fs_locations structure | 1809 | * encode a location element of a fs_locations structure |
1785 | */ | 1810 | */ |
@@ -1789,7 +1814,8 @@ static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location, | |||
1789 | __be32 status; | 1814 | __be32 status; |
1790 | __be32 *p = *pp; | 1815 | __be32 *p = *pp; |
1791 | 1816 | ||
1792 | status = nfsd4_encode_components(':', location->hosts, &p, buflen); | 1817 | status = nfsd4_encode_components_esc(':', location->hosts, &p, buflen, |
1818 | '[', ']'); | ||
1793 | if (status) | 1819 | if (status) |
1794 | return status; | 1820 | return status; |
1795 | status = nfsd4_encode_components('/', location->path, &p, buflen); | 1821 | status = nfsd4_encode_components('/', location->path, &p, buflen); |
@@ -3251,7 +3277,7 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w | |||
3251 | } | 3277 | } |
3252 | 3278 | ||
3253 | static __be32 | 3279 | static __be32 |
3254 | nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr, | 3280 | nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, |
3255 | struct nfsd4_exchange_id *exid) | 3281 | struct nfsd4_exchange_id *exid) |
3256 | { | 3282 | { |
3257 | __be32 *p; | 3283 | __be32 *p; |
@@ -3306,7 +3332,7 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr, | |||
3306 | } | 3332 | } |
3307 | 3333 | ||
3308 | static __be32 | 3334 | static __be32 |
3309 | nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr, | 3335 | nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, |
3310 | struct nfsd4_create_session *sess) | 3336 | struct nfsd4_create_session *sess) |
3311 | { | 3337 | { |
3312 | __be32 *p; | 3338 | __be32 *p; |
@@ -3355,14 +3381,14 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr, | |||
3355 | } | 3381 | } |
3356 | 3382 | ||
3357 | static __be32 | 3383 | static __be32 |
3358 | nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr, | 3384 | nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, __be32 nfserr, |
3359 | struct nfsd4_destroy_session *destroy_session) | 3385 | struct nfsd4_destroy_session *destroy_session) |
3360 | { | 3386 | { |
3361 | return nfserr; | 3387 | return nfserr; |
3362 | } | 3388 | } |
3363 | 3389 | ||
3364 | static __be32 | 3390 | static __be32 |
3365 | nfsd4_encode_free_stateid(struct nfsd4_compoundres *resp, int nfserr, | 3391 | nfsd4_encode_free_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, |
3366 | struct nfsd4_free_stateid *free_stateid) | 3392 | struct nfsd4_free_stateid *free_stateid) |
3367 | { | 3393 | { |
3368 | __be32 *p; | 3394 | __be32 *p; |
@@ -3371,13 +3397,13 @@ nfsd4_encode_free_stateid(struct nfsd4_compoundres *resp, int nfserr, | |||
3371 | return nfserr; | 3397 | return nfserr; |
3372 | 3398 | ||
3373 | RESERVE_SPACE(4); | 3399 | RESERVE_SPACE(4); |
3374 | WRITE32(nfserr); | 3400 | *p++ = nfserr; |
3375 | ADJUST_ARGS(); | 3401 | ADJUST_ARGS(); |
3376 | return nfserr; | 3402 | return nfserr; |
3377 | } | 3403 | } |
3378 | 3404 | ||
3379 | static __be32 | 3405 | static __be32 |
3380 | nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, | 3406 | nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, |
3381 | struct nfsd4_sequence *seq) | 3407 | struct nfsd4_sequence *seq) |
3382 | { | 3408 | { |
3383 | __be32 *p; | 3409 | __be32 *p; |
@@ -3399,8 +3425,8 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, | |||
3399 | return 0; | 3425 | return 0; |
3400 | } | 3426 | } |
3401 | 3427 | ||
3402 | __be32 | 3428 | static __be32 |
3403 | nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr, | 3429 | nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, |
3404 | struct nfsd4_test_stateid *test_stateid) | 3430 | struct nfsd4_test_stateid *test_stateid) |
3405 | { | 3431 | { |
3406 | struct nfsd4_test_stateid_id *stateid, *next; | 3432 | struct nfsd4_test_stateid_id *stateid, *next; |
@@ -3503,7 +3529,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { | |||
3503 | * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so | 3529 | * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so |
3504 | * will be at least a page and will therefore hold the xdr_buf head. | 3530 | * will be at least a page and will therefore hold the xdr_buf head. |
3505 | */ | 3531 | */ |
3506 | int nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) | 3532 | __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) |
3507 | { | 3533 | { |
3508 | struct xdr_buf *xb = &resp->rqstp->rq_res; | 3534 | struct xdr_buf *xb = &resp->rqstp->rq_res; |
3509 | struct nfsd4_session *session = NULL; | 3535 | struct nfsd4_session *session = NULL; |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 72699885ac48..c55298ed5772 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -661,6 +661,7 @@ static ssize_t __write_ports_addfd(char *buf) | |||
661 | { | 661 | { |
662 | char *mesg = buf; | 662 | char *mesg = buf; |
663 | int fd, err; | 663 | int fd, err; |
664 | struct net *net = &init_net; | ||
664 | 665 | ||
665 | err = get_int(&mesg, &fd); | 666 | err = get_int(&mesg, &fd); |
666 | if (err != 0 || fd < 0) | 667 | if (err != 0 || fd < 0) |
@@ -672,6 +673,8 @@ static ssize_t __write_ports_addfd(char *buf) | |||
672 | 673 | ||
673 | err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); | 674 | err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); |
674 | if (err < 0) { | 675 | if (err < 0) { |
676 | if (nfsd_serv->sv_nrthreads == 1) | ||
677 | svc_shutdown_net(nfsd_serv, net); | ||
675 | svc_destroy(nfsd_serv); | 678 | svc_destroy(nfsd_serv); |
676 | return err; | 679 | return err; |
677 | } | 680 | } |
@@ -709,6 +712,7 @@ static ssize_t __write_ports_addxprt(char *buf) | |||
709 | char transport[16]; | 712 | char transport[16]; |
710 | struct svc_xprt *xprt; | 713 | struct svc_xprt *xprt; |
711 | int port, err; | 714 | int port, err; |
715 | struct net *net = &init_net; | ||
712 | 716 | ||
713 | if (sscanf(buf, "%15s %4u", transport, &port) != 2) | 717 | if (sscanf(buf, "%15s %4u", transport, &port) != 2) |
714 | return -EINVAL; | 718 | return -EINVAL; |
@@ -720,12 +724,12 @@ static ssize_t __write_ports_addxprt(char *buf) | |||
720 | if (err != 0) | 724 | if (err != 0) |
721 | return err; | 725 | return err; |
722 | 726 | ||
723 | err = svc_create_xprt(nfsd_serv, transport, &init_net, | 727 | err = svc_create_xprt(nfsd_serv, transport, net, |
724 | PF_INET, port, SVC_SOCK_ANONYMOUS); | 728 | PF_INET, port, SVC_SOCK_ANONYMOUS); |
725 | if (err < 0) | 729 | if (err < 0) |
726 | goto out_err; | 730 | goto out_err; |
727 | 731 | ||
728 | err = svc_create_xprt(nfsd_serv, transport, &init_net, | 732 | err = svc_create_xprt(nfsd_serv, transport, net, |
729 | PF_INET6, port, SVC_SOCK_ANONYMOUS); | 733 | PF_INET6, port, SVC_SOCK_ANONYMOUS); |
730 | if (err < 0 && err != -EAFNOSUPPORT) | 734 | if (err < 0 && err != -EAFNOSUPPORT) |
731 | goto out_close; | 735 | goto out_close; |
@@ -734,12 +738,14 @@ static ssize_t __write_ports_addxprt(char *buf) | |||
734 | nfsd_serv->sv_nrthreads--; | 738 | nfsd_serv->sv_nrthreads--; |
735 | return 0; | 739 | return 0; |
736 | out_close: | 740 | out_close: |
737 | xprt = svc_find_xprt(nfsd_serv, transport, &init_net, PF_INET, port); | 741 | xprt = svc_find_xprt(nfsd_serv, transport, net, PF_INET, port); |
738 | if (xprt != NULL) { | 742 | if (xprt != NULL) { |
739 | svc_close_xprt(xprt); | 743 | svc_close_xprt(xprt); |
740 | svc_xprt_put(xprt); | 744 | svc_xprt_put(xprt); |
741 | } | 745 | } |
742 | out_err: | 746 | out_err: |
747 | if (nfsd_serv->sv_nrthreads == 1) | ||
748 | svc_shutdown_net(nfsd_serv, net); | ||
743 | svc_destroy(nfsd_serv); | 749 | svc_destroy(nfsd_serv); |
744 | return err; | 750 | return err; |
745 | } | 751 | } |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index cb4d51d8cbdb..ee709fc8f58b 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/fs_struct.h> | 12 | #include <linux/fs_struct.h> |
13 | #include <linux/swap.h> | 13 | #include <linux/swap.h> |
14 | #include <linux/nsproxy.h> | ||
14 | 15 | ||
15 | #include <linux/sunrpc/stats.h> | 16 | #include <linux/sunrpc/stats.h> |
16 | #include <linux/sunrpc/svcsock.h> | 17 | #include <linux/sunrpc/svcsock.h> |
@@ -330,6 +331,8 @@ static int nfsd_get_default_max_blksize(void) | |||
330 | 331 | ||
331 | int nfsd_create_serv(void) | 332 | int nfsd_create_serv(void) |
332 | { | 333 | { |
334 | int error; | ||
335 | |||
333 | WARN_ON(!mutex_is_locked(&nfsd_mutex)); | 336 | WARN_ON(!mutex_is_locked(&nfsd_mutex)); |
334 | if (nfsd_serv) { | 337 | if (nfsd_serv) { |
335 | svc_get(nfsd_serv); | 338 | svc_get(nfsd_serv); |
@@ -343,6 +346,12 @@ int nfsd_create_serv(void) | |||
343 | if (nfsd_serv == NULL) | 346 | if (nfsd_serv == NULL) |
344 | return -ENOMEM; | 347 | return -ENOMEM; |
345 | 348 | ||
349 | error = svc_bind(nfsd_serv, current->nsproxy->net_ns); | ||
350 | if (error < 0) { | ||
351 | svc_destroy(nfsd_serv); | ||
352 | return error; | ||
353 | } | ||
354 | |||
346 | set_max_drc(); | 355 | set_max_drc(); |
347 | do_gettimeofday(&nfssvc_boot); /* record boot time */ | 356 | do_gettimeofday(&nfssvc_boot); /* record boot time */ |
348 | return 0; | 357 | return 0; |
@@ -373,6 +382,7 @@ int nfsd_set_nrthreads(int n, int *nthreads) | |||
373 | int i = 0; | 382 | int i = 0; |
374 | int tot = 0; | 383 | int tot = 0; |
375 | int err = 0; | 384 | int err = 0; |
385 | struct net *net = &init_net; | ||
376 | 386 | ||
377 | WARN_ON(!mutex_is_locked(&nfsd_mutex)); | 387 | WARN_ON(!mutex_is_locked(&nfsd_mutex)); |
378 | 388 | ||
@@ -417,6 +427,9 @@ int nfsd_set_nrthreads(int n, int *nthreads) | |||
417 | if (err) | 427 | if (err) |
418 | break; | 428 | break; |
419 | } | 429 | } |
430 | |||
431 | if (nfsd_serv->sv_nrthreads == 1) | ||
432 | svc_shutdown_net(nfsd_serv, net); | ||
420 | svc_destroy(nfsd_serv); | 433 | svc_destroy(nfsd_serv); |
421 | 434 | ||
422 | return err; | 435 | return err; |
@@ -432,6 +445,7 @@ nfsd_svc(unsigned short port, int nrservs) | |||
432 | { | 445 | { |
433 | int error; | 446 | int error; |
434 | bool nfsd_up_before; | 447 | bool nfsd_up_before; |
448 | struct net *net = &init_net; | ||
435 | 449 | ||
436 | mutex_lock(&nfsd_mutex); | 450 | mutex_lock(&nfsd_mutex); |
437 | dprintk("nfsd: creating service\n"); | 451 | dprintk("nfsd: creating service\n"); |
@@ -464,6 +478,8 @@ out_shutdown: | |||
464 | if (error < 0 && !nfsd_up_before) | 478 | if (error < 0 && !nfsd_up_before) |
465 | nfsd_shutdown(); | 479 | nfsd_shutdown(); |
466 | out_destroy: | 480 | out_destroy: |
481 | if (nfsd_serv->sv_nrthreads == 1) | ||
482 | svc_shutdown_net(nfsd_serv, net); | ||
467 | svc_destroy(nfsd_serv); /* Release server */ | 483 | svc_destroy(nfsd_serv); /* Release server */ |
468 | out: | 484 | out: |
469 | mutex_unlock(&nfsd_mutex); | 485 | mutex_unlock(&nfsd_mutex); |
@@ -547,6 +563,9 @@ nfsd(void *vrqstp) | |||
547 | nfsdstats.th_cnt --; | 563 | nfsdstats.th_cnt --; |
548 | 564 | ||
549 | out: | 565 | out: |
566 | if (rqstp->rq_server->sv_nrthreads == 1) | ||
567 | svc_shutdown_net(rqstp->rq_server, &init_net); | ||
568 | |||
550 | /* Release the thread */ | 569 | /* Release the thread */ |
551 | svc_exit_thread(rqstp); | 570 | svc_exit_thread(rqstp); |
552 | 571 | ||
@@ -659,8 +678,12 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file) | |||
659 | int nfsd_pool_stats_release(struct inode *inode, struct file *file) | 678 | int nfsd_pool_stats_release(struct inode *inode, struct file *file) |
660 | { | 679 | { |
661 | int ret = seq_release(inode, file); | 680 | int ret = seq_release(inode, file); |
681 | struct net *net = &init_net; | ||
682 | |||
662 | mutex_lock(&nfsd_mutex); | 683 | mutex_lock(&nfsd_mutex); |
663 | /* this function really, really should have been called svc_put() */ | 684 | /* this function really, really should have been called svc_put() */ |
685 | if (nfsd_serv->sv_nrthreads == 1) | ||
686 | svc_shutdown_net(nfsd_serv, net); | ||
664 | svc_destroy(nfsd_serv); | 687 | svc_destroy(nfsd_serv); |
665 | mutex_unlock(&nfsd_mutex); | 688 | mutex_unlock(&nfsd_mutex); |
666 | return ret; | 689 | return ret; |
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 89ab137d379a..849091e16ea6 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
@@ -232,7 +232,6 @@ struct nfs4_client { | |||
232 | time_t cl_time; /* time of last lease renewal */ | 232 | time_t cl_time; /* time of last lease renewal */ |
233 | struct sockaddr_storage cl_addr; /* client ipaddress */ | 233 | struct sockaddr_storage cl_addr; /* client ipaddress */ |
234 | u32 cl_flavor; /* setclientid pseudoflavor */ | 234 | u32 cl_flavor; /* setclientid pseudoflavor */ |
235 | char *cl_principal; /* setclientid principal name */ | ||
236 | struct svc_cred cl_cred; /* setclientid principal */ | 235 | struct svc_cred cl_cred; /* setclientid principal */ |
237 | clientid_t cl_clientid; /* generated by server */ | 236 | clientid_t cl_clientid; /* generated by server */ |
238 | nfs4_verifier cl_confirm; /* generated by server */ | 237 | nfs4_verifier cl_confirm; /* generated by server */ |
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 1b3501598ab5..acd127d4ee82 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h | |||
@@ -60,7 +60,7 @@ struct nfsd4_compound_state { | |||
60 | __be32 *datap; | 60 | __be32 *datap; |
61 | size_t iovlen; | 61 | size_t iovlen; |
62 | u32 minorversion; | 62 | u32 minorversion; |
63 | u32 status; | 63 | __be32 status; |
64 | stateid_t current_stateid; | 64 | stateid_t current_stateid; |
65 | stateid_t save_stateid; | 65 | stateid_t save_stateid; |
66 | /* to indicate current and saved state id presents */ | 66 | /* to indicate current and saved state id presents */ |
@@ -364,7 +364,7 @@ struct nfsd4_test_stateid_id { | |||
364 | }; | 364 | }; |
365 | 365 | ||
366 | struct nfsd4_test_stateid { | 366 | struct nfsd4_test_stateid { |
367 | __be32 ts_num_ids; | 367 | u32 ts_num_ids; |
368 | struct list_head ts_stateid_list; | 368 | struct list_head ts_stateid_list; |
369 | }; | 369 | }; |
370 | 370 | ||
@@ -549,7 +549,7 @@ int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *, | |||
549 | struct nfsd4_compoundargs *); | 549 | struct nfsd4_compoundargs *); |
550 | int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *, | 550 | int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *, |
551 | struct nfsd4_compoundres *); | 551 | struct nfsd4_compoundres *); |
552 | int nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); | 552 | __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); |
553 | void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); | 553 | void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); |
554 | void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); | 554 | void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); |
555 | __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | 555 | __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, |
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 0bb2c2010b95..b72847988b78 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c | |||
@@ -508,31 +508,29 @@ static struct dentry *nilfs_fh_to_parent(struct super_block *sb, struct fid *fh, | |||
508 | return nilfs_get_dentry(sb, fid->cno, fid->parent_ino, fid->parent_gen); | 508 | return nilfs_get_dentry(sb, fid->cno, fid->parent_ino, fid->parent_gen); |
509 | } | 509 | } |
510 | 510 | ||
511 | static int nilfs_encode_fh(struct dentry *dentry, __u32 *fh, int *lenp, | 511 | static int nilfs_encode_fh(struct inode *inode, __u32 *fh, int *lenp, |
512 | int connectable) | 512 | struct inode *parent) |
513 | { | 513 | { |
514 | struct nilfs_fid *fid = (struct nilfs_fid *)fh; | 514 | struct nilfs_fid *fid = (struct nilfs_fid *)fh; |
515 | struct inode *inode = dentry->d_inode; | ||
516 | struct nilfs_root *root = NILFS_I(inode)->i_root; | 515 | struct nilfs_root *root = NILFS_I(inode)->i_root; |
517 | int type; | 516 | int type; |
518 | 517 | ||
519 | if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE || | 518 | if (parent && *lenp < NILFS_FID_SIZE_CONNECTABLE) { |
520 | (connectable && *lenp < NILFS_FID_SIZE_CONNECTABLE)) | 519 | *lenp = NILFS_FID_SIZE_CONNECTABLE; |
520 | return 255; | ||
521 | } | ||
522 | if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE) { | ||
523 | *lenp = NILFS_FID_SIZE_NON_CONNECTABLE; | ||
521 | return 255; | 524 | return 255; |
525 | } | ||
522 | 526 | ||
523 | fid->cno = root->cno; | 527 | fid->cno = root->cno; |
524 | fid->ino = inode->i_ino; | 528 | fid->ino = inode->i_ino; |
525 | fid->gen = inode->i_generation; | 529 | fid->gen = inode->i_generation; |
526 | 530 | ||
527 | if (connectable && !S_ISDIR(inode->i_mode)) { | 531 | if (parent) { |
528 | struct inode *parent; | ||
529 | |||
530 | spin_lock(&dentry->d_lock); | ||
531 | parent = dentry->d_parent->d_inode; | ||
532 | fid->parent_ino = parent->i_ino; | 532 | fid->parent_ino = parent->i_ino; |
533 | fid->parent_gen = parent->i_generation; | 533 | fid->parent_gen = parent->i_generation; |
534 | spin_unlock(&dentry->d_lock); | ||
535 | |||
536 | type = FILEID_NILFS_WITH_PARENT; | 534 | type = FILEID_NILFS_WITH_PARENT; |
537 | *lenp = NILFS_FID_SIZE_CONNECTABLE; | 535 | *lenp = NILFS_FID_SIZE_CONNECTABLE; |
538 | } else { | 536 | } else { |
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index ccb14d3fc0de..b39c5c161adb 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c | |||
@@ -123,7 +123,7 @@ int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) | |||
123 | } | 123 | } |
124 | EXPORT_SYMBOL_GPL(__fsnotify_parent); | 124 | EXPORT_SYMBOL_GPL(__fsnotify_parent); |
125 | 125 | ||
126 | static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, | 126 | static int send_to_group(struct inode *to_tell, |
127 | struct fsnotify_mark *inode_mark, | 127 | struct fsnotify_mark *inode_mark, |
128 | struct fsnotify_mark *vfsmount_mark, | 128 | struct fsnotify_mark *vfsmount_mark, |
129 | __u32 mask, void *data, | 129 | __u32 mask, void *data, |
@@ -168,10 +168,10 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, | |||
168 | vfsmount_test_mask &= ~inode_mark->ignored_mask; | 168 | vfsmount_test_mask &= ~inode_mark->ignored_mask; |
169 | } | 169 | } |
170 | 170 | ||
171 | pr_debug("%s: group=%p to_tell=%p mnt=%p mask=%x inode_mark=%p" | 171 | pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p" |
172 | " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x" | 172 | " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x" |
173 | " data=%p data_is=%d cookie=%d event=%p\n", | 173 | " data=%p data_is=%d cookie=%d event=%p\n", |
174 | __func__, group, to_tell, mnt, mask, inode_mark, | 174 | __func__, group, to_tell, mask, inode_mark, |
175 | inode_test_mask, vfsmount_mark, vfsmount_test_mask, data, | 175 | inode_test_mask, vfsmount_mark, vfsmount_test_mask, data, |
176 | data_is, cookie, *event); | 176 | data_is, cookie, *event); |
177 | 177 | ||
@@ -258,16 +258,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | |||
258 | 258 | ||
259 | if (inode_group > vfsmount_group) { | 259 | if (inode_group > vfsmount_group) { |
260 | /* handle inode */ | 260 | /* handle inode */ |
261 | ret = send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, | 261 | ret = send_to_group(to_tell, inode_mark, NULL, mask, data, |
262 | data_is, cookie, file_name, &event); | 262 | data_is, cookie, file_name, &event); |
263 | /* we didn't use the vfsmount_mark */ | 263 | /* we didn't use the vfsmount_mark */ |
264 | vfsmount_group = NULL; | 264 | vfsmount_group = NULL; |
265 | } else if (vfsmount_group > inode_group) { | 265 | } else if (vfsmount_group > inode_group) { |
266 | ret = send_to_group(to_tell, &mnt->mnt, NULL, vfsmount_mark, mask, data, | 266 | ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, data, |
267 | data_is, cookie, file_name, &event); | 267 | data_is, cookie, file_name, &event); |
268 | inode_group = NULL; | 268 | inode_group = NULL; |
269 | } else { | 269 | } else { |
270 | ret = send_to_group(to_tell, &mnt->mnt, inode_mark, vfsmount_mark, | 270 | ret = send_to_group(to_tell, inode_mark, vfsmount_mark, |
271 | mask, data, data_is, cookie, file_name, | 271 | mask, data, data_is, cookie, file_name, |
272 | &event); | 272 | &event); |
273 | } | 273 | } |
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 8639169221c7..7389d2d5e51d 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c | |||
@@ -2096,7 +2096,9 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, | |||
2096 | err = file_remove_suid(file); | 2096 | err = file_remove_suid(file); |
2097 | if (err) | 2097 | if (err) |
2098 | goto out; | 2098 | goto out; |
2099 | file_update_time(file); | 2099 | err = file_update_time(file); |
2100 | if (err) | ||
2101 | goto out; | ||
2100 | written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos, | 2102 | written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos, |
2101 | count); | 2103 | count); |
2102 | out: | 2104 | out: |
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index c7ee03c22226..0725e6054650 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c | |||
@@ -422,45 +422,46 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, | |||
422 | struct ocfs2_blockcheck_stats *stats) | 422 | struct ocfs2_blockcheck_stats *stats) |
423 | { | 423 | { |
424 | int rc = 0; | 424 | int rc = 0; |
425 | struct ocfs2_block_check check; | 425 | u32 bc_crc32e; |
426 | u16 bc_ecc; | ||
426 | u32 crc, ecc; | 427 | u32 crc, ecc; |
427 | 428 | ||
428 | ocfs2_blockcheck_inc_check(stats); | 429 | ocfs2_blockcheck_inc_check(stats); |
429 | 430 | ||
430 | check.bc_crc32e = le32_to_cpu(bc->bc_crc32e); | 431 | bc_crc32e = le32_to_cpu(bc->bc_crc32e); |
431 | check.bc_ecc = le16_to_cpu(bc->bc_ecc); | 432 | bc_ecc = le16_to_cpu(bc->bc_ecc); |
432 | 433 | ||
433 | memset(bc, 0, sizeof(struct ocfs2_block_check)); | 434 | memset(bc, 0, sizeof(struct ocfs2_block_check)); |
434 | 435 | ||
435 | /* Fast path - if the crc32 validates, we're good to go */ | 436 | /* Fast path - if the crc32 validates, we're good to go */ |
436 | crc = crc32_le(~0, data, blocksize); | 437 | crc = crc32_le(~0, data, blocksize); |
437 | if (crc == check.bc_crc32e) | 438 | if (crc == bc_crc32e) |
438 | goto out; | 439 | goto out; |
439 | 440 | ||
440 | ocfs2_blockcheck_inc_failure(stats); | 441 | ocfs2_blockcheck_inc_failure(stats); |
441 | mlog(ML_ERROR, | 442 | mlog(ML_ERROR, |
442 | "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n", | 443 | "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n", |
443 | (unsigned int)check.bc_crc32e, (unsigned int)crc); | 444 | (unsigned int)bc_crc32e, (unsigned int)crc); |
444 | 445 | ||
445 | /* Ok, try ECC fixups */ | 446 | /* Ok, try ECC fixups */ |
446 | ecc = ocfs2_hamming_encode_block(data, blocksize); | 447 | ecc = ocfs2_hamming_encode_block(data, blocksize); |
447 | ocfs2_hamming_fix_block(data, blocksize, ecc ^ check.bc_ecc); | 448 | ocfs2_hamming_fix_block(data, blocksize, ecc ^ bc_ecc); |
448 | 449 | ||
449 | /* And check the crc32 again */ | 450 | /* And check the crc32 again */ |
450 | crc = crc32_le(~0, data, blocksize); | 451 | crc = crc32_le(~0, data, blocksize); |
451 | if (crc == check.bc_crc32e) { | 452 | if (crc == bc_crc32e) { |
452 | ocfs2_blockcheck_inc_recover(stats); | 453 | ocfs2_blockcheck_inc_recover(stats); |
453 | goto out; | 454 | goto out; |
454 | } | 455 | } |
455 | 456 | ||
456 | mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n", | 457 | mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n", |
457 | (unsigned int)check.bc_crc32e, (unsigned int)crc); | 458 | (unsigned int)bc_crc32e, (unsigned int)crc); |
458 | 459 | ||
459 | rc = -EIO; | 460 | rc = -EIO; |
460 | 461 | ||
461 | out: | 462 | out: |
462 | bc->bc_crc32e = cpu_to_le32(check.bc_crc32e); | 463 | bc->bc_crc32e = cpu_to_le32(bc_crc32e); |
463 | bc->bc_ecc = cpu_to_le16(check.bc_ecc); | 464 | bc->bc_ecc = cpu_to_le16(bc_ecc); |
464 | 465 | ||
465 | return rc; | 466 | return rc; |
466 | } | 467 | } |
@@ -528,7 +529,8 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, | |||
528 | struct ocfs2_blockcheck_stats *stats) | 529 | struct ocfs2_blockcheck_stats *stats) |
529 | { | 530 | { |
530 | int i, rc = 0; | 531 | int i, rc = 0; |
531 | struct ocfs2_block_check check; | 532 | u32 bc_crc32e; |
533 | u16 bc_ecc; | ||
532 | u32 crc, ecc, fix; | 534 | u32 crc, ecc, fix; |
533 | 535 | ||
534 | BUG_ON(nr < 0); | 536 | BUG_ON(nr < 0); |
@@ -538,21 +540,21 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, | |||
538 | 540 | ||
539 | ocfs2_blockcheck_inc_check(stats); | 541 | ocfs2_blockcheck_inc_check(stats); |
540 | 542 | ||
541 | check.bc_crc32e = le32_to_cpu(bc->bc_crc32e); | 543 | bc_crc32e = le32_to_cpu(bc->bc_crc32e); |
542 | check.bc_ecc = le16_to_cpu(bc->bc_ecc); | 544 | bc_ecc = le16_to_cpu(bc->bc_ecc); |
543 | 545 | ||
544 | memset(bc, 0, sizeof(struct ocfs2_block_check)); | 546 | memset(bc, 0, sizeof(struct ocfs2_block_check)); |
545 | 547 | ||
546 | /* Fast path - if the crc32 validates, we're good to go */ | 548 | /* Fast path - if the crc32 validates, we're good to go */ |
547 | for (i = 0, crc = ~0; i < nr; i++) | 549 | for (i = 0, crc = ~0; i < nr; i++) |
548 | crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); | 550 | crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); |
549 | if (crc == check.bc_crc32e) | 551 | if (crc == bc_crc32e) |
550 | goto out; | 552 | goto out; |
551 | 553 | ||
552 | ocfs2_blockcheck_inc_failure(stats); | 554 | ocfs2_blockcheck_inc_failure(stats); |
553 | mlog(ML_ERROR, | 555 | mlog(ML_ERROR, |
554 | "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", | 556 | "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", |
555 | (unsigned int)check.bc_crc32e, (unsigned int)crc); | 557 | (unsigned int)bc_crc32e, (unsigned int)crc); |
556 | 558 | ||
557 | /* Ok, try ECC fixups */ | 559 | /* Ok, try ECC fixups */ |
558 | for (i = 0, ecc = 0; i < nr; i++) { | 560 | for (i = 0, ecc = 0; i < nr; i++) { |
@@ -565,7 +567,7 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, | |||
565 | bhs[i]->b_size * 8, | 567 | bhs[i]->b_size * 8, |
566 | bhs[i]->b_size * 8 * i); | 568 | bhs[i]->b_size * 8 * i); |
567 | } | 569 | } |
568 | fix = ecc ^ check.bc_ecc; | 570 | fix = ecc ^ bc_ecc; |
569 | for (i = 0; i < nr; i++) { | 571 | for (i = 0; i < nr; i++) { |
570 | /* | 572 | /* |
571 | * Try the fix against each buffer. It will only affect | 573 | * Try the fix against each buffer. It will only affect |
@@ -578,19 +580,19 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, | |||
578 | /* And check the crc32 again */ | 580 | /* And check the crc32 again */ |
579 | for (i = 0, crc = ~0; i < nr; i++) | 581 | for (i = 0, crc = ~0; i < nr; i++) |
580 | crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); | 582 | crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); |
581 | if (crc == check.bc_crc32e) { | 583 | if (crc == bc_crc32e) { |
582 | ocfs2_blockcheck_inc_recover(stats); | 584 | ocfs2_blockcheck_inc_recover(stats); |
583 | goto out; | 585 | goto out; |
584 | } | 586 | } |
585 | 587 | ||
586 | mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", | 588 | mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", |
587 | (unsigned int)check.bc_crc32e, (unsigned int)crc); | 589 | (unsigned int)bc_crc32e, (unsigned int)crc); |
588 | 590 | ||
589 | rc = -EIO; | 591 | rc = -EIO; |
590 | 592 | ||
591 | out: | 593 | out: |
592 | bc->bc_crc32e = cpu_to_le32(check.bc_crc32e); | 594 | bc->bc_crc32e = cpu_to_le32(bc_crc32e); |
593 | bc->bc_ecc = cpu_to_le16(check.bc_ecc); | 595 | bc->bc_ecc = cpu_to_le16(bc_ecc); |
594 | 596 | ||
595 | return rc; | 597 | return rc; |
596 | } | 598 | } |
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 3a3ed4bb794b..fbec0be62326 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c | |||
@@ -293,7 +293,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, | |||
293 | struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf; | 293 | struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf; |
294 | char *name; | 294 | char *name; |
295 | struct list_head *iter, *head=NULL; | 295 | struct list_head *iter, *head=NULL; |
296 | u64 cookie; | 296 | __be64 cookie; |
297 | u32 flags; | 297 | u32 flags; |
298 | u8 node; | 298 | u8 node; |
299 | 299 | ||
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index a5952ceecba5..de854cca12a2 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -679,7 +679,7 @@ struct dlm_query_join_packet { | |||
679 | }; | 679 | }; |
680 | 680 | ||
681 | union dlm_query_join_response { | 681 | union dlm_query_join_response { |
682 | u32 intval; | 682 | __be32 intval; |
683 | struct dlm_query_join_packet packet; | 683 | struct dlm_query_join_packet packet; |
684 | }; | 684 | }; |
685 | 685 | ||
@@ -755,8 +755,8 @@ struct dlm_query_region { | |||
755 | struct dlm_node_info { | 755 | struct dlm_node_info { |
756 | u8 ni_nodenum; | 756 | u8 ni_nodenum; |
757 | u8 pad1; | 757 | u8 pad1; |
758 | u16 ni_ipv4_port; | 758 | __be16 ni_ipv4_port; |
759 | u32 ni_ipv4_address; | 759 | __be32 ni_ipv4_address; |
760 | }; | 760 | }; |
761 | 761 | ||
762 | struct dlm_query_nodeinfo { | 762 | struct dlm_query_nodeinfo { |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 92f2ead0fab6..9e89d70df337 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -818,7 +818,7 @@ static void dlm_query_join_packet_to_wire(struct dlm_query_join_packet *packet, | |||
818 | union dlm_query_join_response response; | 818 | union dlm_query_join_response response; |
819 | 819 | ||
820 | response.packet = *packet; | 820 | response.packet = *packet; |
821 | *wire = cpu_to_be32(response.intval); | 821 | *wire = be32_to_cpu(response.intval); |
822 | } | 822 | } |
823 | 823 | ||
824 | static void dlm_query_join_wire_to_packet(u32 wire, | 824 | static void dlm_query_join_wire_to_packet(u32 wire, |
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 745db42528d5..322216a5f0dd 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c | |||
@@ -177,21 +177,23 @@ bail: | |||
177 | return parent; | 177 | return parent; |
178 | } | 178 | } |
179 | 179 | ||
180 | static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len, | 180 | static int ocfs2_encode_fh(struct inode *inode, u32 *fh_in, int *max_len, |
181 | int connectable) | 181 | struct inode *parent) |
182 | { | 182 | { |
183 | struct inode *inode = dentry->d_inode; | ||
184 | int len = *max_len; | 183 | int len = *max_len; |
185 | int type = 1; | 184 | int type = 1; |
186 | u64 blkno; | 185 | u64 blkno; |
187 | u32 generation; | 186 | u32 generation; |
188 | __le32 *fh = (__force __le32 *) fh_in; | 187 | __le32 *fh = (__force __le32 *) fh_in; |
189 | 188 | ||
189 | #ifdef TRACE_HOOKS_ARE_NOT_BRAINDEAD_IN_YOUR_OPINION | ||
190 | #error "You go ahead and fix that mess, then. Somehow" | ||
190 | trace_ocfs2_encode_fh_begin(dentry, dentry->d_name.len, | 191 | trace_ocfs2_encode_fh_begin(dentry, dentry->d_name.len, |
191 | dentry->d_name.name, | 192 | dentry->d_name.name, |
192 | fh, len, connectable); | 193 | fh, len, connectable); |
194 | #endif | ||
193 | 195 | ||
194 | if (connectable && (len < 6)) { | 196 | if (parent && (len < 6)) { |
195 | *max_len = 6; | 197 | *max_len = 6; |
196 | type = 255; | 198 | type = 255; |
197 | goto bail; | 199 | goto bail; |
@@ -211,12 +213,7 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len, | |||
211 | fh[1] = cpu_to_le32((u32)(blkno & 0xffffffff)); | 213 | fh[1] = cpu_to_le32((u32)(blkno & 0xffffffff)); |
212 | fh[2] = cpu_to_le32(generation); | 214 | fh[2] = cpu_to_le32(generation); |
213 | 215 | ||
214 | if (connectable && !S_ISDIR(inode->i_mode)) { | 216 | if (parent) { |
215 | struct inode *parent; | ||
216 | |||
217 | spin_lock(&dentry->d_lock); | ||
218 | |||
219 | parent = dentry->d_parent->d_inode; | ||
220 | blkno = OCFS2_I(parent)->ip_blkno; | 217 | blkno = OCFS2_I(parent)->ip_blkno; |
221 | generation = parent->i_generation; | 218 | generation = parent->i_generation; |
222 | 219 | ||
@@ -224,8 +221,6 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len, | |||
224 | fh[4] = cpu_to_le32((u32)(blkno & 0xffffffff)); | 221 | fh[4] = cpu_to_le32((u32)(blkno & 0xffffffff)); |
225 | fh[5] = cpu_to_le32(generation); | 222 | fh[5] = cpu_to_le32(generation); |
226 | 223 | ||
227 | spin_unlock(&dentry->d_lock); | ||
228 | |||
229 | len = 6; | 224 | len = 6; |
230 | type = 2; | 225 | type = 2; |
231 | 226 | ||
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 735514ca400f..d89e08a81eda 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -273,11 +273,13 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
273 | inode->i_gid = le32_to_cpu(fe->i_gid); | 273 | inode->i_gid = le32_to_cpu(fe->i_gid); |
274 | 274 | ||
275 | /* Fast symlinks will have i_size but no allocated clusters. */ | 275 | /* Fast symlinks will have i_size but no allocated clusters. */ |
276 | if (S_ISLNK(inode->i_mode) && !fe->i_clusters) | 276 | if (S_ISLNK(inode->i_mode) && !fe->i_clusters) { |
277 | inode->i_blocks = 0; | 277 | inode->i_blocks = 0; |
278 | else | 278 | inode->i_mapping->a_ops = &ocfs2_fast_symlink_aops; |
279 | } else { | ||
279 | inode->i_blocks = ocfs2_inode_sector_count(inode); | 280 | inode->i_blocks = ocfs2_inode_sector_count(inode); |
280 | inode->i_mapping->a_ops = &ocfs2_aops; | 281 | inode->i_mapping->a_ops = &ocfs2_aops; |
282 | } | ||
281 | inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime); | 283 | inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime); |
282 | inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); | 284 | inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); |
283 | inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); | 285 | inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); |
@@ -331,10 +333,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
331 | OCFS2_I(inode)->ip_dir_lock_gen = 1; | 333 | OCFS2_I(inode)->ip_dir_lock_gen = 1; |
332 | break; | 334 | break; |
333 | case S_IFLNK: | 335 | case S_IFLNK: |
334 | if (ocfs2_inode_is_fast_symlink(inode)) | 336 | inode->i_op = &ocfs2_symlink_inode_operations; |
335 | inode->i_op = &ocfs2_fast_symlink_inode_operations; | ||
336 | else | ||
337 | inode->i_op = &ocfs2_symlink_inode_operations; | ||
338 | i_size_write(inode, le64_to_cpu(fe->i_size)); | 337 | i_size_write(inode, le64_to_cpu(fe->i_size)); |
339 | break; | 338 | break; |
340 | default: | 339 | default: |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index a1a1bfd652c9..d96f7f81d8dd 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -864,7 +864,7 @@ int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info, | |||
864 | if (status) | 864 | if (status) |
865 | break; | 865 | break; |
866 | 866 | ||
867 | reqp = (struct ocfs2_info_request *)(unsigned long)req_addr; | 867 | reqp = (struct ocfs2_info_request __user *)(unsigned long)req_addr; |
868 | if (!reqp) { | 868 | if (!reqp) { |
869 | status = -EINVAL; | 869 | status = -EINVAL; |
870 | goto bail; | 870 | goto bail; |
@@ -888,9 +888,11 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
888 | struct ocfs2_space_resv sr; | 888 | struct ocfs2_space_resv sr; |
889 | struct ocfs2_new_group_input input; | 889 | struct ocfs2_new_group_input input; |
890 | struct reflink_arguments args; | 890 | struct reflink_arguments args; |
891 | const char *old_path, *new_path; | 891 | const char __user *old_path; |
892 | const char __user *new_path; | ||
892 | bool preserve; | 893 | bool preserve; |
893 | struct ocfs2_info info; | 894 | struct ocfs2_info info; |
895 | void __user *argp = (void __user *)arg; | ||
894 | 896 | ||
895 | switch (cmd) { | 897 | switch (cmd) { |
896 | case OCFS2_IOC_GETFLAGS: | 898 | case OCFS2_IOC_GETFLAGS: |
@@ -937,17 +939,15 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
937 | 939 | ||
938 | return ocfs2_group_add(inode, &input); | 940 | return ocfs2_group_add(inode, &input); |
939 | case OCFS2_IOC_REFLINK: | 941 | case OCFS2_IOC_REFLINK: |
940 | if (copy_from_user(&args, (struct reflink_arguments *)arg, | 942 | if (copy_from_user(&args, argp, sizeof(args))) |
941 | sizeof(args))) | ||
942 | return -EFAULT; | 943 | return -EFAULT; |
943 | old_path = (const char *)(unsigned long)args.old_path; | 944 | old_path = (const char __user *)(unsigned long)args.old_path; |
944 | new_path = (const char *)(unsigned long)args.new_path; | 945 | new_path = (const char __user *)(unsigned long)args.new_path; |
945 | preserve = (args.preserve != 0); | 946 | preserve = (args.preserve != 0); |
946 | 947 | ||
947 | return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); | 948 | return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); |
948 | case OCFS2_IOC_INFO: | 949 | case OCFS2_IOC_INFO: |
949 | if (copy_from_user(&info, (struct ocfs2_info __user *)arg, | 950 | if (copy_from_user(&info, argp, sizeof(struct ocfs2_info))) |
950 | sizeof(struct ocfs2_info))) | ||
951 | return -EFAULT; | 951 | return -EFAULT; |
952 | 952 | ||
953 | return ocfs2_info_handle(inode, &info, 0); | 953 | return ocfs2_info_handle(inode, &info, 0); |
@@ -960,22 +960,20 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
960 | if (!capable(CAP_SYS_ADMIN)) | 960 | if (!capable(CAP_SYS_ADMIN)) |
961 | return -EPERM; | 961 | return -EPERM; |
962 | 962 | ||
963 | if (copy_from_user(&range, (struct fstrim_range *)arg, | 963 | if (copy_from_user(&range, argp, sizeof(range))) |
964 | sizeof(range))) | ||
965 | return -EFAULT; | 964 | return -EFAULT; |
966 | 965 | ||
967 | ret = ocfs2_trim_fs(sb, &range); | 966 | ret = ocfs2_trim_fs(sb, &range); |
968 | if (ret < 0) | 967 | if (ret < 0) |
969 | return ret; | 968 | return ret; |
970 | 969 | ||
971 | if (copy_to_user((struct fstrim_range *)arg, &range, | 970 | if (copy_to_user(argp, &range, sizeof(range))) |
972 | sizeof(range))) | ||
973 | return -EFAULT; | 971 | return -EFAULT; |
974 | 972 | ||
975 | return 0; | 973 | return 0; |
976 | } | 974 | } |
977 | case OCFS2_IOC_MOVE_EXT: | 975 | case OCFS2_IOC_MOVE_EXT: |
978 | return ocfs2_ioctl_move_extents(filp, (void __user *)arg); | 976 | return ocfs2_ioctl_move_extents(filp, argp); |
979 | default: | 977 | default: |
980 | return -ENOTTY; | 978 | return -ENOTTY; |
981 | } | 979 | } |
@@ -988,6 +986,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
988 | struct reflink_arguments args; | 986 | struct reflink_arguments args; |
989 | struct inode *inode = file->f_path.dentry->d_inode; | 987 | struct inode *inode = file->f_path.dentry->d_inode; |
990 | struct ocfs2_info info; | 988 | struct ocfs2_info info; |
989 | void __user *argp = (void __user *)arg; | ||
991 | 990 | ||
992 | switch (cmd) { | 991 | switch (cmd) { |
993 | case OCFS2_IOC32_GETFLAGS: | 992 | case OCFS2_IOC32_GETFLAGS: |
@@ -1006,16 +1005,14 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
1006 | case FITRIM: | 1005 | case FITRIM: |
1007 | break; | 1006 | break; |
1008 | case OCFS2_IOC_REFLINK: | 1007 | case OCFS2_IOC_REFLINK: |
1009 | if (copy_from_user(&args, (struct reflink_arguments *)arg, | 1008 | if (copy_from_user(&args, argp, sizeof(args))) |
1010 | sizeof(args))) | ||
1011 | return -EFAULT; | 1009 | return -EFAULT; |
1012 | preserve = (args.preserve != 0); | 1010 | preserve = (args.preserve != 0); |
1013 | 1011 | ||
1014 | return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), | 1012 | return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), |
1015 | compat_ptr(args.new_path), preserve); | 1013 | compat_ptr(args.new_path), preserve); |
1016 | case OCFS2_IOC_INFO: | 1014 | case OCFS2_IOC_INFO: |
1017 | if (copy_from_user(&info, (struct ocfs2_info __user *)arg, | 1015 | if (copy_from_user(&info, argp, sizeof(struct ocfs2_info))) |
1018 | sizeof(struct ocfs2_info))) | ||
1019 | return -EFAULT; | 1016 | return -EFAULT; |
1020 | 1017 | ||
1021 | return ocfs2_info_handle(inode, &info, 1); | 1018 | return ocfs2_info_handle(inode, &info, 1); |
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index b1e3fce72ea4..6083432f667e 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c | |||
@@ -1082,8 +1082,7 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) | |||
1082 | context->file = filp; | 1082 | context->file = filp; |
1083 | 1083 | ||
1084 | if (argp) { | 1084 | if (argp) { |
1085 | if (copy_from_user(&range, (struct ocfs2_move_extents *)argp, | 1085 | if (copy_from_user(&range, argp, sizeof(range))) { |
1086 | sizeof(range))) { | ||
1087 | status = -EFAULT; | 1086 | status = -EFAULT; |
1088 | goto out; | 1087 | goto out; |
1089 | } | 1088 | } |
@@ -1138,8 +1137,7 @@ out: | |||
1138 | * length and new_offset even if failure happens somewhere. | 1137 | * length and new_offset even if failure happens somewhere. |
1139 | */ | 1138 | */ |
1140 | if (argp) { | 1139 | if (argp) { |
1141 | if (copy_to_user((struct ocfs2_move_extents *)argp, &range, | 1140 | if (copy_to_user(argp, &range, sizeof(range))) |
1142 | sizeof(range))) | ||
1143 | status = -EFAULT; | 1141 | status = -EFAULT; |
1144 | } | 1142 | } |
1145 | 1143 | ||
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index a9856e3eaaf0..9f39c640cddf 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -1724,15 +1724,16 @@ static int ocfs2_symlink(struct inode *dir, | |||
1724 | fe = (struct ocfs2_dinode *) new_fe_bh->b_data; | 1724 | fe = (struct ocfs2_dinode *) new_fe_bh->b_data; |
1725 | inode->i_rdev = 0; | 1725 | inode->i_rdev = 0; |
1726 | newsize = l - 1; | 1726 | newsize = l - 1; |
1727 | inode->i_op = &ocfs2_symlink_inode_operations; | ||
1727 | if (l > ocfs2_fast_symlink_chars(sb)) { | 1728 | if (l > ocfs2_fast_symlink_chars(sb)) { |
1728 | u32 offset = 0; | 1729 | u32 offset = 0; |
1729 | 1730 | ||
1730 | inode->i_op = &ocfs2_symlink_inode_operations; | ||
1731 | status = dquot_alloc_space_nodirty(inode, | 1731 | status = dquot_alloc_space_nodirty(inode, |
1732 | ocfs2_clusters_to_bytes(osb->sb, 1)); | 1732 | ocfs2_clusters_to_bytes(osb->sb, 1)); |
1733 | if (status) | 1733 | if (status) |
1734 | goto bail; | 1734 | goto bail; |
1735 | did_quota = 1; | 1735 | did_quota = 1; |
1736 | inode->i_mapping->a_ops = &ocfs2_aops; | ||
1736 | status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0, | 1737 | status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0, |
1737 | new_fe_bh, | 1738 | new_fe_bh, |
1738 | handle, data_ac, NULL, | 1739 | handle, data_ac, NULL, |
@@ -1750,7 +1751,7 @@ static int ocfs2_symlink(struct inode *dir, | |||
1750 | i_size_write(inode, newsize); | 1751 | i_size_write(inode, newsize); |
1751 | inode->i_blocks = ocfs2_inode_sector_count(inode); | 1752 | inode->i_blocks = ocfs2_inode_sector_count(inode); |
1752 | } else { | 1753 | } else { |
1753 | inode->i_op = &ocfs2_fast_symlink_inode_operations; | 1754 | inode->i_mapping->a_ops = &ocfs2_fast_symlink_aops; |
1754 | memcpy((char *) fe->id2.i_symlink, symname, l); | 1755 | memcpy((char *) fe->id2.i_symlink, symname, l); |
1755 | i_size_write(inode, newsize); | 1756 | i_size_write(inode, newsize); |
1756 | inode->i_blocks = 0; | 1757 | inode->i_blocks = 0; |
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 5d22872e2bb3..f1fbb4b552ad 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c | |||
@@ -54,101 +54,40 @@ | |||
54 | #include "buffer_head_io.h" | 54 | #include "buffer_head_io.h" |
55 | 55 | ||
56 | 56 | ||
57 | static char *ocfs2_fast_symlink_getlink(struct inode *inode, | 57 | static int ocfs2_fast_symlink_readpage(struct file *unused, struct page *page) |
58 | struct buffer_head **bh) | ||
59 | { | 58 | { |
60 | int status; | 59 | struct inode *inode = page->mapping->host; |
61 | char *link = NULL; | 60 | struct buffer_head *bh; |
61 | int status = ocfs2_read_inode_block(inode, &bh); | ||
62 | struct ocfs2_dinode *fe; | 62 | struct ocfs2_dinode *fe; |
63 | const char *link; | ||
64 | void *kaddr; | ||
65 | size_t len; | ||
63 | 66 | ||
64 | status = ocfs2_read_inode_block(inode, bh); | ||
65 | if (status < 0) { | 67 | if (status < 0) { |
66 | mlog_errno(status); | 68 | mlog_errno(status); |
67 | link = ERR_PTR(status); | 69 | return status; |
68 | goto bail; | ||
69 | } | 70 | } |
70 | 71 | ||
71 | fe = (struct ocfs2_dinode *) (*bh)->b_data; | 72 | fe = (struct ocfs2_dinode *) bh->b_data; |
72 | link = (char *) fe->id2.i_symlink; | 73 | link = (char *) fe->id2.i_symlink; |
73 | bail: | 74 | /* will be less than a page size */ |
74 | 75 | len = strnlen(link, ocfs2_fast_symlink_chars(inode->i_sb)); | |
75 | return link; | 76 | kaddr = kmap_atomic(page); |
76 | } | 77 | memcpy(kaddr, link, len + 1); |
77 | 78 | kunmap_atomic(kaddr); | |
78 | static int ocfs2_readlink(struct dentry *dentry, | 79 | SetPageUptodate(page); |
79 | char __user *buffer, | 80 | unlock_page(page); |
80 | int buflen) | ||
81 | { | ||
82 | int ret; | ||
83 | char *link; | ||
84 | struct buffer_head *bh = NULL; | ||
85 | struct inode *inode = dentry->d_inode; | ||
86 | |||
87 | link = ocfs2_fast_symlink_getlink(inode, &bh); | ||
88 | if (IS_ERR(link)) { | ||
89 | ret = PTR_ERR(link); | ||
90 | goto out; | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * Without vfsmount we can't update atime now, | ||
95 | * but we will update atime here ultimately. | ||
96 | */ | ||
97 | ret = vfs_readlink(dentry, buffer, buflen, link); | ||
98 | |||
99 | brelse(bh); | 81 | brelse(bh); |
100 | out: | 82 | return 0; |
101 | if (ret < 0) | ||
102 | mlog_errno(ret); | ||
103 | return ret; | ||
104 | } | 83 | } |
105 | 84 | ||
106 | static void *ocfs2_fast_follow_link(struct dentry *dentry, | 85 | const struct address_space_operations ocfs2_fast_symlink_aops = { |
107 | struct nameidata *nd) | 86 | .readpage = ocfs2_fast_symlink_readpage, |
108 | { | 87 | }; |
109 | int status = 0; | ||
110 | int len; | ||
111 | char *target, *link = ERR_PTR(-ENOMEM); | ||
112 | struct inode *inode = dentry->d_inode; | ||
113 | struct buffer_head *bh = NULL; | ||
114 | |||
115 | BUG_ON(!ocfs2_inode_is_fast_symlink(inode)); | ||
116 | target = ocfs2_fast_symlink_getlink(inode, &bh); | ||
117 | if (IS_ERR(target)) { | ||
118 | status = PTR_ERR(target); | ||
119 | mlog_errno(status); | ||
120 | goto bail; | ||
121 | } | ||
122 | |||
123 | /* Fast symlinks can't be large */ | ||
124 | len = strnlen(target, ocfs2_fast_symlink_chars(inode->i_sb)); | ||
125 | link = kzalloc(len + 1, GFP_NOFS); | ||
126 | if (!link) { | ||
127 | status = -ENOMEM; | ||
128 | mlog_errno(status); | ||
129 | goto bail; | ||
130 | } | ||
131 | |||
132 | memcpy(link, target, len); | ||
133 | |||
134 | bail: | ||
135 | nd_set_link(nd, status ? ERR_PTR(status) : link); | ||
136 | brelse(bh); | ||
137 | |||
138 | if (status) | ||
139 | mlog_errno(status); | ||
140 | return NULL; | ||
141 | } | ||
142 | |||
143 | static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) | ||
144 | { | ||
145 | char *link = nd_get_link(nd); | ||
146 | if (!IS_ERR(link)) | ||
147 | kfree(link); | ||
148 | } | ||
149 | 88 | ||
150 | const struct inode_operations ocfs2_symlink_inode_operations = { | 89 | const struct inode_operations ocfs2_symlink_inode_operations = { |
151 | .readlink = page_readlink, | 90 | .readlink = generic_readlink, |
152 | .follow_link = page_follow_link_light, | 91 | .follow_link = page_follow_link_light, |
153 | .put_link = page_put_link, | 92 | .put_link = page_put_link, |
154 | .getattr = ocfs2_getattr, | 93 | .getattr = ocfs2_getattr, |
@@ -159,15 +98,3 @@ const struct inode_operations ocfs2_symlink_inode_operations = { | |||
159 | .removexattr = generic_removexattr, | 98 | .removexattr = generic_removexattr, |
160 | .fiemap = ocfs2_fiemap, | 99 | .fiemap = ocfs2_fiemap, |
161 | }; | 100 | }; |
162 | const struct inode_operations ocfs2_fast_symlink_inode_operations = { | ||
163 | .readlink = ocfs2_readlink, | ||
164 | .follow_link = ocfs2_fast_follow_link, | ||
165 | .put_link = ocfs2_fast_put_link, | ||
166 | .getattr = ocfs2_getattr, | ||
167 | .setattr = ocfs2_setattr, | ||
168 | .setxattr = generic_setxattr, | ||
169 | .getxattr = generic_getxattr, | ||
170 | .listxattr = ocfs2_listxattr, | ||
171 | .removexattr = generic_removexattr, | ||
172 | .fiemap = ocfs2_fiemap, | ||
173 | }; | ||
diff --git a/fs/ocfs2/symlink.h b/fs/ocfs2/symlink.h index 65a6c9c6ad51..71ee4245e919 100644 --- a/fs/ocfs2/symlink.h +++ b/fs/ocfs2/symlink.h | |||
@@ -27,7 +27,7 @@ | |||
27 | #define OCFS2_SYMLINK_H | 27 | #define OCFS2_SYMLINK_H |
28 | 28 | ||
29 | extern const struct inode_operations ocfs2_symlink_inode_operations; | 29 | extern const struct inode_operations ocfs2_symlink_inode_operations; |
30 | extern const struct inode_operations ocfs2_fast_symlink_inode_operations; | 30 | extern const struct address_space_operations ocfs2_fast_symlink_aops; |
31 | 31 | ||
32 | /* | 32 | /* |
33 | * Test whether an inode is a fast symlink. | 33 | * Test whether an inode is a fast symlink. |
@@ -654,10 +654,23 @@ static inline int __get_file_write_access(struct inode *inode, | |||
654 | return error; | 654 | return error; |
655 | } | 655 | } |
656 | 656 | ||
657 | static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, | 657 | int open_check_o_direct(struct file *f) |
658 | struct file *f, | 658 | { |
659 | int (*open)(struct inode *, struct file *), | 659 | /* NB: we're sure to have correct a_ops only after f_op->open */ |
660 | const struct cred *cred) | 660 | if (f->f_flags & O_DIRECT) { |
661 | if (!f->f_mapping->a_ops || | ||
662 | ((!f->f_mapping->a_ops->direct_IO) && | ||
663 | (!f->f_mapping->a_ops->get_xip_mem))) { | ||
664 | return -EINVAL; | ||
665 | } | ||
666 | } | ||
667 | return 0; | ||
668 | } | ||
669 | |||
670 | static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, | ||
671 | struct file *f, | ||
672 | int (*open)(struct inode *, struct file *), | ||
673 | const struct cred *cred) | ||
661 | { | 674 | { |
662 | static const struct file_operations empty_fops = {}; | 675 | static const struct file_operations empty_fops = {}; |
663 | struct inode *inode; | 676 | struct inode *inode; |
@@ -713,16 +726,6 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, | |||
713 | 726 | ||
714 | file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); | 727 | file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); |
715 | 728 | ||
716 | /* NB: we're sure to have correct a_ops only after f_op->open */ | ||
717 | if (f->f_flags & O_DIRECT) { | ||
718 | if (!f->f_mapping->a_ops || | ||
719 | ((!f->f_mapping->a_ops->direct_IO) && | ||
720 | (!f->f_mapping->a_ops->get_xip_mem))) { | ||
721 | fput(f); | ||
722 | f = ERR_PTR(-EINVAL); | ||
723 | } | ||
724 | } | ||
725 | |||
726 | return f; | 729 | return f; |
727 | 730 | ||
728 | cleanup_all: | 731 | cleanup_all: |
@@ -744,12 +747,29 @@ cleanup_all: | |||
744 | f->f_path.dentry = NULL; | 747 | f->f_path.dentry = NULL; |
745 | f->f_path.mnt = NULL; | 748 | f->f_path.mnt = NULL; |
746 | cleanup_file: | 749 | cleanup_file: |
747 | put_filp(f); | ||
748 | dput(dentry); | 750 | dput(dentry); |
749 | mntput(mnt); | 751 | mntput(mnt); |
750 | return ERR_PTR(error); | 752 | return ERR_PTR(error); |
751 | } | 753 | } |
752 | 754 | ||
755 | static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, | ||
756 | struct file *f, | ||
757 | int (*open)(struct inode *, struct file *), | ||
758 | const struct cred *cred) | ||
759 | { | ||
760 | struct file *res = do_dentry_open(dentry, mnt, f, open, cred); | ||
761 | if (!IS_ERR(res)) { | ||
762 | int error = open_check_o_direct(f); | ||
763 | if (error) { | ||
764 | fput(res); | ||
765 | res = ERR_PTR(error); | ||
766 | } | ||
767 | } else { | ||
768 | put_filp(f); | ||
769 | } | ||
770 | return res; | ||
771 | } | ||
772 | |||
753 | /** | 773 | /** |
754 | * lookup_instantiate_filp - instantiates the open intent filp | 774 | * lookup_instantiate_filp - instantiates the open intent filp |
755 | * @nd: pointer to nameidata | 775 | * @nd: pointer to nameidata |
@@ -804,13 +824,31 @@ struct file *nameidata_to_filp(struct nameidata *nd) | |||
804 | 824 | ||
805 | /* Pick up the filp from the open intent */ | 825 | /* Pick up the filp from the open intent */ |
806 | filp = nd->intent.open.file; | 826 | filp = nd->intent.open.file; |
807 | nd->intent.open.file = NULL; | ||
808 | 827 | ||
809 | /* Has the filesystem initialised the file for us? */ | 828 | /* Has the filesystem initialised the file for us? */ |
810 | if (filp->f_path.dentry == NULL) { | 829 | if (filp->f_path.dentry != NULL) { |
830 | nd->intent.open.file = NULL; | ||
831 | } else { | ||
832 | struct file *res; | ||
833 | |||
811 | path_get(&nd->path); | 834 | path_get(&nd->path); |
812 | filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp, | 835 | res = do_dentry_open(nd->path.dentry, nd->path.mnt, |
813 | NULL, cred); | 836 | filp, NULL, cred); |
837 | if (!IS_ERR(res)) { | ||
838 | int error; | ||
839 | |||
840 | nd->intent.open.file = NULL; | ||
841 | BUG_ON(res != filp); | ||
842 | |||
843 | error = open_check_o_direct(filp); | ||
844 | if (error) { | ||
845 | fput(filp); | ||
846 | filp = ERR_PTR(error); | ||
847 | } | ||
848 | } else { | ||
849 | /* Allow nd->intent.open.file to be recycled */ | ||
850 | filp = res; | ||
851 | } | ||
814 | } | 852 | } |
815 | return filp; | 853 | return filp; |
816 | } | 854 | } |
@@ -654,8 +654,11 @@ out: | |||
654 | wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); | 654 | wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); |
655 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | 655 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); |
656 | } | 656 | } |
657 | if (ret > 0) | 657 | if (ret > 0) { |
658 | file_update_time(filp); | 658 | int err = file_update_time(filp); |
659 | if (err) | ||
660 | ret = err; | ||
661 | } | ||
659 | return ret; | 662 | return ret; |
660 | } | 663 | } |
661 | 664 | ||
diff --git a/fs/pnode.c b/fs/pnode.c index ab5fa9e1a79a..bed378db0758 100644 --- a/fs/pnode.c +++ b/fs/pnode.c | |||
@@ -257,12 +257,12 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, | |||
257 | prev_src_mnt = child; | 257 | prev_src_mnt = child; |
258 | } | 258 | } |
259 | out: | 259 | out: |
260 | br_write_lock(vfsmount_lock); | 260 | br_write_lock(&vfsmount_lock); |
261 | while (!list_empty(&tmp_list)) { | 261 | while (!list_empty(&tmp_list)) { |
262 | child = list_first_entry(&tmp_list, struct mount, mnt_hash); | 262 | child = list_first_entry(&tmp_list, struct mount, mnt_hash); |
263 | umount_tree(child, 0, &umount_list); | 263 | umount_tree(child, 0, &umount_list); |
264 | } | 264 | } |
265 | br_write_unlock(vfsmount_lock); | 265 | br_write_unlock(&vfsmount_lock); |
266 | release_mounts(&umount_list); | 266 | release_mounts(&umount_list); |
267 | return ret; | 267 | return ret; |
268 | } | 268 | } |
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 12412852d88a..5e289a7cbad1 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c | |||
@@ -23,12 +23,12 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) | |||
23 | 23 | ||
24 | poll_wait(file, &p->ns->poll, wait); | 24 | poll_wait(file, &p->ns->poll, wait); |
25 | 25 | ||
26 | br_read_lock(vfsmount_lock); | 26 | br_read_lock(&vfsmount_lock); |
27 | if (p->m.poll_event != ns->event) { | 27 | if (p->m.poll_event != ns->event) { |
28 | p->m.poll_event = ns->event; | 28 | p->m.poll_event = ns->event; |
29 | res |= POLLERR | POLLPRI; | 29 | res |= POLLERR | POLLPRI; |
30 | } | 30 | } |
31 | br_read_unlock(vfsmount_lock); | 31 | br_read_unlock(&vfsmount_lock); |
32 | 32 | ||
33 | return res; | 33 | return res; |
34 | } | 34 | } |
diff --git a/fs/readdir.c b/fs/readdir.c index cc0a8227cddf..39e3370d79cf 100644 --- a/fs/readdir.c +++ b/fs/readdir.c | |||
@@ -108,11 +108,11 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, | |||
108 | int error; | 108 | int error; |
109 | struct file * file; | 109 | struct file * file; |
110 | struct readdir_callback buf; | 110 | struct readdir_callback buf; |
111 | int fput_needed; | ||
111 | 112 | ||
112 | error = -EBADF; | 113 | file = fget_light(fd, &fput_needed); |
113 | file = fget(fd); | ||
114 | if (!file) | 114 | if (!file) |
115 | goto out; | 115 | return -EBADF; |
116 | 116 | ||
117 | buf.result = 0; | 117 | buf.result = 0; |
118 | buf.dirent = dirent; | 118 | buf.dirent = dirent; |
@@ -121,8 +121,7 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, | |||
121 | if (buf.result) | 121 | if (buf.result) |
122 | error = buf.result; | 122 | error = buf.result; |
123 | 123 | ||
124 | fput(file); | 124 | fput_light(file, fput_needed); |
125 | out: | ||
126 | return error; | 125 | return error; |
127 | } | 126 | } |
128 | 127 | ||
@@ -195,16 +194,15 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, | |||
195 | struct file * file; | 194 | struct file * file; |
196 | struct linux_dirent __user * lastdirent; | 195 | struct linux_dirent __user * lastdirent; |
197 | struct getdents_callback buf; | 196 | struct getdents_callback buf; |
197 | int fput_needed; | ||
198 | int error; | 198 | int error; |
199 | 199 | ||
200 | error = -EFAULT; | ||
201 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 200 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
202 | goto out; | 201 | return -EFAULT; |
203 | 202 | ||
204 | error = -EBADF; | 203 | file = fget_light(fd, &fput_needed); |
205 | file = fget(fd); | ||
206 | if (!file) | 204 | if (!file) |
207 | goto out; | 205 | return -EBADF; |
208 | 206 | ||
209 | buf.current_dir = dirent; | 207 | buf.current_dir = dirent; |
210 | buf.previous = NULL; | 208 | buf.previous = NULL; |
@@ -221,8 +219,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, | |||
221 | else | 219 | else |
222 | error = count - buf.count; | 220 | error = count - buf.count; |
223 | } | 221 | } |
224 | fput(file); | 222 | fput_light(file, fput_needed); |
225 | out: | ||
226 | return error; | 223 | return error; |
227 | } | 224 | } |
228 | 225 | ||
@@ -278,16 +275,15 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, | |||
278 | struct file * file; | 275 | struct file * file; |
279 | struct linux_dirent64 __user * lastdirent; | 276 | struct linux_dirent64 __user * lastdirent; |
280 | struct getdents_callback64 buf; | 277 | struct getdents_callback64 buf; |
278 | int fput_needed; | ||
281 | int error; | 279 | int error; |
282 | 280 | ||
283 | error = -EFAULT; | ||
284 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 281 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
285 | goto out; | 282 | return -EFAULT; |
286 | 283 | ||
287 | error = -EBADF; | 284 | file = fget_light(fd, &fput_needed); |
288 | file = fget(fd); | ||
289 | if (!file) | 285 | if (!file) |
290 | goto out; | 286 | return -EBADF; |
291 | 287 | ||
292 | buf.current_dir = dirent; | 288 | buf.current_dir = dirent; |
293 | buf.previous = NULL; | 289 | buf.previous = NULL; |
@@ -305,7 +301,6 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, | |||
305 | else | 301 | else |
306 | error = count - buf.count; | 302 | error = count - buf.count; |
307 | } | 303 | } |
308 | fput(file); | 304 | fput_light(file, fput_needed); |
309 | out: | ||
310 | return error; | 305 | return error; |
311 | } | 306 | } |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 59d06871a850..a6d4268fb6c1 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -1592,13 +1592,12 @@ struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
1592 | (fh_type == 6) ? fid->raw[5] : 0); | 1592 | (fh_type == 6) ? fid->raw[5] : 0); |
1593 | } | 1593 | } |
1594 | 1594 | ||
1595 | int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, | 1595 | int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp, |
1596 | int need_parent) | 1596 | struct inode *parent) |
1597 | { | 1597 | { |
1598 | struct inode *inode = dentry->d_inode; | ||
1599 | int maxlen = *lenp; | 1598 | int maxlen = *lenp; |
1600 | 1599 | ||
1601 | if (need_parent && (maxlen < 5)) { | 1600 | if (parent && (maxlen < 5)) { |
1602 | *lenp = 5; | 1601 | *lenp = 5; |
1603 | return 255; | 1602 | return 255; |
1604 | } else if (maxlen < 3) { | 1603 | } else if (maxlen < 3) { |
@@ -1610,20 +1609,15 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, | |||
1610 | data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); | 1609 | data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); |
1611 | data[2] = inode->i_generation; | 1610 | data[2] = inode->i_generation; |
1612 | *lenp = 3; | 1611 | *lenp = 3; |
1613 | /* no room for directory info? return what we've stored so far */ | 1612 | if (parent) { |
1614 | if (maxlen < 5 || !need_parent) | 1613 | data[3] = parent->i_ino; |
1615 | return 3; | 1614 | data[4] = le32_to_cpu(INODE_PKEY(parent)->k_dir_id); |
1616 | 1615 | *lenp = 5; | |
1617 | spin_lock(&dentry->d_lock); | 1616 | if (maxlen >= 6) { |
1618 | inode = dentry->d_parent->d_inode; | 1617 | data[5] = parent->i_generation; |
1619 | data[3] = inode->i_ino; | 1618 | *lenp = 6; |
1620 | data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); | 1619 | } |
1621 | *lenp = 5; | 1620 | } |
1622 | if (maxlen >= 6) { | ||
1623 | data[5] = inode->i_generation; | ||
1624 | *lenp = 6; | ||
1625 | } | ||
1626 | spin_unlock(&dentry->d_lock); | ||
1627 | return *lenp; | 1621 | return *lenp; |
1628 | } | 1622 | } |
1629 | 1623 | ||
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index b1a08573fe14..afcadcc03e8a 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -1923,6 +1923,8 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, | |||
1923 | * the workqueue job (flush_async_commit) needs this lock | 1923 | * the workqueue job (flush_async_commit) needs this lock |
1924 | */ | 1924 | */ |
1925 | reiserfs_write_unlock(sb); | 1925 | reiserfs_write_unlock(sb); |
1926 | |||
1927 | cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work); | ||
1926 | flush_workqueue(commit_wq); | 1928 | flush_workqueue(commit_wq); |
1927 | 1929 | ||
1928 | if (!reiserfs_mounted_fs_count) { | 1930 | if (!reiserfs_mounted_fs_count) { |
@@ -3231,8 +3233,6 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th, | |||
3231 | th->t_trans_id, journal->j_trans_id); | 3233 | th->t_trans_id, journal->j_trans_id); |
3232 | } | 3234 | } |
3233 | 3235 | ||
3234 | sb->s_dirt = 1; | ||
3235 | |||
3236 | prepared = test_clear_buffer_journal_prepared(bh); | 3236 | prepared = test_clear_buffer_journal_prepared(bh); |
3237 | clear_buffer_journal_restore_dirty(bh); | 3237 | clear_buffer_journal_restore_dirty(bh); |
3238 | /* already in this transaction, we are done */ | 3238 | /* already in this transaction, we are done */ |
@@ -3316,6 +3316,7 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th, | |||
3316 | journal->j_first = cn; | 3316 | journal->j_first = cn; |
3317 | journal->j_last = cn; | 3317 | journal->j_last = cn; |
3318 | } | 3318 | } |
3319 | reiserfs_schedule_old_flush(sb); | ||
3319 | return 0; | 3320 | return 0; |
3320 | } | 3321 | } |
3321 | 3322 | ||
@@ -3492,7 +3493,7 @@ static void flush_async_commits(struct work_struct *work) | |||
3492 | ** flushes any old transactions to disk | 3493 | ** flushes any old transactions to disk |
3493 | ** ends the current transaction if it is too old | 3494 | ** ends the current transaction if it is too old |
3494 | */ | 3495 | */ |
3495 | int reiserfs_flush_old_commits(struct super_block *sb) | 3496 | void reiserfs_flush_old_commits(struct super_block *sb) |
3496 | { | 3497 | { |
3497 | time_t now; | 3498 | time_t now; |
3498 | struct reiserfs_transaction_handle th; | 3499 | struct reiserfs_transaction_handle th; |
@@ -3502,9 +3503,8 @@ int reiserfs_flush_old_commits(struct super_block *sb) | |||
3502 | /* safety check so we don't flush while we are replaying the log during | 3503 | /* safety check so we don't flush while we are replaying the log during |
3503 | * mount | 3504 | * mount |
3504 | */ | 3505 | */ |
3505 | if (list_empty(&journal->j_journal_list)) { | 3506 | if (list_empty(&journal->j_journal_list)) |
3506 | return 0; | 3507 | return; |
3507 | } | ||
3508 | 3508 | ||
3509 | /* check the current transaction. If there are no writers, and it is | 3509 | /* check the current transaction. If there are no writers, and it is |
3510 | * too old, finish it, and force the commit blocks to disk | 3510 | * too old, finish it, and force the commit blocks to disk |
@@ -3526,7 +3526,6 @@ int reiserfs_flush_old_commits(struct super_block *sb) | |||
3526 | do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT); | 3526 | do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT); |
3527 | } | 3527 | } |
3528 | } | 3528 | } |
3529 | return sb->s_dirt; | ||
3530 | } | 3529 | } |
3531 | 3530 | ||
3532 | /* | 3531 | /* |
@@ -3955,7 +3954,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
3955 | ** it tells us if we should continue with the journal_end, or just return | 3954 | ** it tells us if we should continue with the journal_end, or just return |
3956 | */ | 3955 | */ |
3957 | if (!check_journal_end(th, sb, nblocks, flags)) { | 3956 | if (!check_journal_end(th, sb, nblocks, flags)) { |
3958 | sb->s_dirt = 1; | 3957 | reiserfs_schedule_old_flush(sb); |
3959 | wake_queued_writers(sb); | 3958 | wake_queued_writers(sb); |
3960 | reiserfs_async_progress_wait(sb); | 3959 | reiserfs_async_progress_wait(sb); |
3961 | goto out; | 3960 | goto out; |
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index a59d27126338..33215f57ea06 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h | |||
@@ -480,6 +480,11 @@ struct reiserfs_sb_info { | |||
480 | struct dentry *priv_root; /* root of /.reiserfs_priv */ | 480 | struct dentry *priv_root; /* root of /.reiserfs_priv */ |
481 | struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ | 481 | struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ |
482 | int j_errno; | 482 | int j_errno; |
483 | |||
484 | int work_queued; /* non-zero delayed work is queued */ | ||
485 | struct delayed_work old_work; /* old transactions flush delayed work */ | ||
486 | spinlock_t old_work_lock; /* protects old_work and work_queued */ | ||
487 | |||
483 | #ifdef CONFIG_QUOTA | 488 | #ifdef CONFIG_QUOTA |
484 | char *s_qf_names[MAXQUOTAS]; | 489 | char *s_qf_names[MAXQUOTAS]; |
485 | int s_jquota_fmt; | 490 | int s_jquota_fmt; |
@@ -2452,7 +2457,7 @@ struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct | |||
2452 | int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *); | 2457 | int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *); |
2453 | int reiserfs_commit_page(struct inode *inode, struct page *page, | 2458 | int reiserfs_commit_page(struct inode *inode, struct page *page, |
2454 | unsigned from, unsigned to); | 2459 | unsigned from, unsigned to); |
2455 | int reiserfs_flush_old_commits(struct super_block *); | 2460 | void reiserfs_flush_old_commits(struct super_block *); |
2456 | int reiserfs_commit_for_inode(struct inode *); | 2461 | int reiserfs_commit_for_inode(struct inode *); |
2457 | int reiserfs_inode_needs_commit(struct inode *); | 2462 | int reiserfs_inode_needs_commit(struct inode *); |
2458 | void reiserfs_update_inode_transaction(struct inode *); | 2463 | void reiserfs_update_inode_transaction(struct inode *); |
@@ -2487,6 +2492,7 @@ void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); | |||
2487 | int reiserfs_allocate_list_bitmaps(struct super_block *s, | 2492 | int reiserfs_allocate_list_bitmaps(struct super_block *s, |
2488 | struct reiserfs_list_bitmap *, unsigned int); | 2493 | struct reiserfs_list_bitmap *, unsigned int); |
2489 | 2494 | ||
2495 | void reiserfs_schedule_old_flush(struct super_block *s); | ||
2490 | void add_save_link(struct reiserfs_transaction_handle *th, | 2496 | void add_save_link(struct reiserfs_transaction_handle *th, |
2491 | struct inode *inode, int truncate); | 2497 | struct inode *inode, int truncate); |
2492 | int remove_save_link(struct inode *inode, int truncate); | 2498 | int remove_save_link(struct inode *inode, int truncate); |
@@ -2611,8 +2617,8 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, | |||
2611 | int fh_len, int fh_type); | 2617 | int fh_len, int fh_type); |
2612 | struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, | 2618 | struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, |
2613 | int fh_len, int fh_type); | 2619 | int fh_len, int fh_type); |
2614 | int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, | 2620 | int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp, |
2615 | int connectable); | 2621 | struct inode *parent); |
2616 | 2622 | ||
2617 | int reiserfs_truncate_file(struct inode *, int update_timestamps); | 2623 | int reiserfs_truncate_file(struct inode *, int update_timestamps); |
2618 | void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset, | 2624 | void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset, |
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index 9a17f63c3fd7..3ce02cff5e90 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c | |||
@@ -200,7 +200,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
200 | (bmap_nr_new - bmap_nr))); | 200 | (bmap_nr_new - bmap_nr))); |
201 | PUT_SB_BLOCK_COUNT(s, block_count_new); | 201 | PUT_SB_BLOCK_COUNT(s, block_count_new); |
202 | PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new); | 202 | PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new); |
203 | s->s_dirt = 1; | ||
204 | 203 | ||
205 | journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); | 204 | journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); |
206 | 205 | ||
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index c07b7d709447..651ce767b55d 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -72,20 +72,58 @@ static int reiserfs_sync_fs(struct super_block *s, int wait) | |||
72 | if (!journal_begin(&th, s, 1)) | 72 | if (!journal_begin(&th, s, 1)) |
73 | if (!journal_end_sync(&th, s, 1)) | 73 | if (!journal_end_sync(&th, s, 1)) |
74 | reiserfs_flush_old_commits(s); | 74 | reiserfs_flush_old_commits(s); |
75 | s->s_dirt = 0; /* Even if it's not true. | ||
76 | * We'll loop forever in sync_supers otherwise */ | ||
77 | reiserfs_write_unlock(s); | 75 | reiserfs_write_unlock(s); |
78 | return 0; | 76 | return 0; |
79 | } | 77 | } |
80 | 78 | ||
81 | static void reiserfs_write_super(struct super_block *s) | 79 | static void flush_old_commits(struct work_struct *work) |
82 | { | 80 | { |
81 | struct reiserfs_sb_info *sbi; | ||
82 | struct super_block *s; | ||
83 | |||
84 | sbi = container_of(work, struct reiserfs_sb_info, old_work.work); | ||
85 | s = sbi->s_journal->j_work_sb; | ||
86 | |||
87 | spin_lock(&sbi->old_work_lock); | ||
88 | sbi->work_queued = 0; | ||
89 | spin_unlock(&sbi->old_work_lock); | ||
90 | |||
83 | reiserfs_sync_fs(s, 1); | 91 | reiserfs_sync_fs(s, 1); |
84 | } | 92 | } |
85 | 93 | ||
94 | void reiserfs_schedule_old_flush(struct super_block *s) | ||
95 | { | ||
96 | struct reiserfs_sb_info *sbi = REISERFS_SB(s); | ||
97 | unsigned long delay; | ||
98 | |||
99 | if (s->s_flags & MS_RDONLY) | ||
100 | return; | ||
101 | |||
102 | spin_lock(&sbi->old_work_lock); | ||
103 | if (!sbi->work_queued) { | ||
104 | delay = msecs_to_jiffies(dirty_writeback_interval * 10); | ||
105 | queue_delayed_work(system_long_wq, &sbi->old_work, delay); | ||
106 | sbi->work_queued = 1; | ||
107 | } | ||
108 | spin_unlock(&sbi->old_work_lock); | ||
109 | } | ||
110 | |||
111 | static void cancel_old_flush(struct super_block *s) | ||
112 | { | ||
113 | struct reiserfs_sb_info *sbi = REISERFS_SB(s); | ||
114 | |||
115 | cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); | ||
116 | spin_lock(&sbi->old_work_lock); | ||
117 | sbi->work_queued = 0; | ||
118 | spin_unlock(&sbi->old_work_lock); | ||
119 | } | ||
120 | |||
86 | static int reiserfs_freeze(struct super_block *s) | 121 | static int reiserfs_freeze(struct super_block *s) |
87 | { | 122 | { |
88 | struct reiserfs_transaction_handle th; | 123 | struct reiserfs_transaction_handle th; |
124 | |||
125 | cancel_old_flush(s); | ||
126 | |||
89 | reiserfs_write_lock(s); | 127 | reiserfs_write_lock(s); |
90 | if (!(s->s_flags & MS_RDONLY)) { | 128 | if (!(s->s_flags & MS_RDONLY)) { |
91 | int err = journal_begin(&th, s, 1); | 129 | int err = journal_begin(&th, s, 1); |
@@ -99,7 +137,6 @@ static int reiserfs_freeze(struct super_block *s) | |||
99 | journal_end_sync(&th, s, 1); | 137 | journal_end_sync(&th, s, 1); |
100 | } | 138 | } |
101 | } | 139 | } |
102 | s->s_dirt = 0; | ||
103 | reiserfs_write_unlock(s); | 140 | reiserfs_write_unlock(s); |
104 | return 0; | 141 | return 0; |
105 | } | 142 | } |
@@ -483,9 +520,6 @@ static void reiserfs_put_super(struct super_block *s) | |||
483 | 520 | ||
484 | reiserfs_write_lock(s); | 521 | reiserfs_write_lock(s); |
485 | 522 | ||
486 | if (s->s_dirt) | ||
487 | reiserfs_write_super(s); | ||
488 | |||
489 | /* change file system state to current state if it was mounted with read-write permissions */ | 523 | /* change file system state to current state if it was mounted with read-write permissions */ |
490 | if (!(s->s_flags & MS_RDONLY)) { | 524 | if (!(s->s_flags & MS_RDONLY)) { |
491 | if (!journal_begin(&th, s, 10)) { | 525 | if (!journal_begin(&th, s, 10)) { |
@@ -692,7 +726,6 @@ static const struct super_operations reiserfs_sops = { | |||
692 | .dirty_inode = reiserfs_dirty_inode, | 726 | .dirty_inode = reiserfs_dirty_inode, |
693 | .evict_inode = reiserfs_evict_inode, | 727 | .evict_inode = reiserfs_evict_inode, |
694 | .put_super = reiserfs_put_super, | 728 | .put_super = reiserfs_put_super, |
695 | .write_super = reiserfs_write_super, | ||
696 | .sync_fs = reiserfs_sync_fs, | 729 | .sync_fs = reiserfs_sync_fs, |
697 | .freeze_fs = reiserfs_freeze, | 730 | .freeze_fs = reiserfs_freeze, |
698 | .unfreeze_fs = reiserfs_unfreeze, | 731 | .unfreeze_fs = reiserfs_unfreeze, |
@@ -1400,7 +1433,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1400 | err = journal_end(&th, s, 10); | 1433 | err = journal_end(&th, s, 10); |
1401 | if (err) | 1434 | if (err) |
1402 | goto out_err; | 1435 | goto out_err; |
1403 | s->s_dirt = 0; | ||
1404 | 1436 | ||
1405 | if (!(*mount_flags & MS_RDONLY)) { | 1437 | if (!(*mount_flags & MS_RDONLY)) { |
1406 | dquot_resume(s, -1); | 1438 | dquot_resume(s, -1); |
@@ -1730,19 +1762,21 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1730 | return -ENOMEM; | 1762 | return -ENOMEM; |
1731 | s->s_fs_info = sbi; | 1763 | s->s_fs_info = sbi; |
1732 | /* Set default values for options: non-aggressive tails, RO on errors */ | 1764 | /* Set default values for options: non-aggressive tails, RO on errors */ |
1733 | REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); | 1765 | sbi->s_mount_opt |= (1 << REISERFS_SMALLTAIL); |
1734 | REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO); | 1766 | sbi->s_mount_opt |= (1 << REISERFS_ERROR_RO); |
1735 | REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); | 1767 | sbi->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); |
1736 | /* no preallocation minimum, be smart in | 1768 | /* no preallocation minimum, be smart in |
1737 | reiserfs_file_write instead */ | 1769 | reiserfs_file_write instead */ |
1738 | REISERFS_SB(s)->s_alloc_options.preallocmin = 0; | 1770 | sbi->s_alloc_options.preallocmin = 0; |
1739 | /* Preallocate by 16 blocks (17-1) at once */ | 1771 | /* Preallocate by 16 blocks (17-1) at once */ |
1740 | REISERFS_SB(s)->s_alloc_options.preallocsize = 17; | 1772 | sbi->s_alloc_options.preallocsize = 17; |
1741 | /* setup default block allocator options */ | 1773 | /* setup default block allocator options */ |
1742 | reiserfs_init_alloc_options(s); | 1774 | reiserfs_init_alloc_options(s); |
1743 | 1775 | ||
1744 | mutex_init(&REISERFS_SB(s)->lock); | 1776 | spin_lock_init(&sbi->old_work_lock); |
1745 | REISERFS_SB(s)->lock_depth = -1; | 1777 | INIT_DELAYED_WORK(&sbi->old_work, flush_old_commits); |
1778 | mutex_init(&sbi->lock); | ||
1779 | sbi->lock_depth = -1; | ||
1746 | 1780 | ||
1747 | jdev_name = NULL; | 1781 | jdev_name = NULL; |
1748 | if (reiserfs_parse_options | 1782 | if (reiserfs_parse_options |
@@ -1751,8 +1785,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1751 | goto error_unlocked; | 1785 | goto error_unlocked; |
1752 | } | 1786 | } |
1753 | if (jdev_name && jdev_name[0]) { | 1787 | if (jdev_name && jdev_name[0]) { |
1754 | REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL); | 1788 | sbi->s_jdev = kstrdup(jdev_name, GFP_KERNEL); |
1755 | if (!REISERFS_SB(s)->s_jdev) { | 1789 | if (!sbi->s_jdev) { |
1756 | SWARN(silent, s, "", "Cannot allocate memory for " | 1790 | SWARN(silent, s, "", "Cannot allocate memory for " |
1757 | "journal device name"); | 1791 | "journal device name"); |
1758 | goto error; | 1792 | goto error; |
@@ -1810,7 +1844,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1810 | /* make data=ordered the default */ | 1844 | /* make data=ordered the default */ |
1811 | if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) && | 1845 | if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) && |
1812 | !reiserfs_data_writeback(s)) { | 1846 | !reiserfs_data_writeback(s)) { |
1813 | REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_DATA_ORDERED); | 1847 | sbi->s_mount_opt |= (1 << REISERFS_DATA_ORDERED); |
1814 | } | 1848 | } |
1815 | 1849 | ||
1816 | if (reiserfs_data_log(s)) { | 1850 | if (reiserfs_data_log(s)) { |
@@ -2003,6 +2037,8 @@ error_unlocked: | |||
2003 | reiserfs_write_unlock(s); | 2037 | reiserfs_write_unlock(s); |
2004 | } | 2038 | } |
2005 | 2039 | ||
2040 | cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); | ||
2041 | |||
2006 | reiserfs_free_bitmap_cache(s); | 2042 | reiserfs_free_bitmap_cache(s); |
2007 | if (SB_BUFFER_WITH_SB(s)) | 2043 | if (SB_BUFFER_WITH_SB(s)) |
2008 | brelse(SB_BUFFER_WITH_SB(s)); | 2044 | brelse(SB_BUFFER_WITH_SB(s)); |
diff --git a/fs/signalfd.c b/fs/signalfd.c index 7ae2a574cb25..9f35a37173de 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c | |||
@@ -269,12 +269,13 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, | |||
269 | if (ufd < 0) | 269 | if (ufd < 0) |
270 | kfree(ctx); | 270 | kfree(ctx); |
271 | } else { | 271 | } else { |
272 | struct file *file = fget(ufd); | 272 | int fput_needed; |
273 | struct file *file = fget_light(ufd, &fput_needed); | ||
273 | if (!file) | 274 | if (!file) |
274 | return -EBADF; | 275 | return -EBADF; |
275 | ctx = file->private_data; | 276 | ctx = file->private_data; |
276 | if (file->f_op != &signalfd_fops) { | 277 | if (file->f_op != &signalfd_fops) { |
277 | fput(file); | 278 | fput_light(file, fput_needed); |
278 | return -EINVAL; | 279 | return -EINVAL; |
279 | } | 280 | } |
280 | spin_lock_irq(¤t->sighand->siglock); | 281 | spin_lock_irq(¤t->sighand->siglock); |
@@ -282,7 +283,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, | |||
282 | spin_unlock_irq(¤t->sighand->siglock); | 283 | spin_unlock_irq(¤t->sighand->siglock); |
283 | 284 | ||
284 | wake_up(¤t->sighand->signalfd_wqh); | 285 | wake_up(¤t->sighand->signalfd_wqh); |
285 | fput(file); | 286 | fput_light(file, fput_needed); |
286 | } | 287 | } |
287 | 288 | ||
288 | return ufd; | 289 | return ufd; |
diff --git a/fs/splice.c b/fs/splice.c index 406ef2b792c2..c9f1318a3b82 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -1003,8 +1003,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
1003 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); | 1003 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); |
1004 | ret = file_remove_suid(out); | 1004 | ret = file_remove_suid(out); |
1005 | if (!ret) { | 1005 | if (!ret) { |
1006 | file_update_time(out); | 1006 | ret = file_update_time(out); |
1007 | ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); | 1007 | if (!ret) |
1008 | ret = splice_from_pipe_feed(pipe, &sd, | ||
1009 | pipe_to_file); | ||
1008 | } | 1010 | } |
1009 | mutex_unlock(&inode->i_mutex); | 1011 | mutex_unlock(&inode->i_mutex); |
1010 | } while (ret > 0); | 1012 | } while (ret > 0); |
diff --git a/fs/statfs.c b/fs/statfs.c index 43e6b6fe4e85..95ad5c0e586c 100644 --- a/fs/statfs.c +++ b/fs/statfs.c | |||
@@ -87,11 +87,12 @@ int user_statfs(const char __user *pathname, struct kstatfs *st) | |||
87 | 87 | ||
88 | int fd_statfs(int fd, struct kstatfs *st) | 88 | int fd_statfs(int fd, struct kstatfs *st) |
89 | { | 89 | { |
90 | struct file *file = fget(fd); | 90 | int fput_needed; |
91 | struct file *file = fget_light(fd, &fput_needed); | ||
91 | int error = -EBADF; | 92 | int error = -EBADF; |
92 | if (file) { | 93 | if (file) { |
93 | error = vfs_statfs(&file->f_path, st); | 94 | error = vfs_statfs(&file->f_path, st); |
94 | fput(file); | 95 | fput_light(file, fput_needed); |
95 | } | 96 | } |
96 | return error; | 97 | return error; |
97 | } | 98 | } |
@@ -188,11 +188,12 @@ static int do_fsync(unsigned int fd, int datasync) | |||
188 | { | 188 | { |
189 | struct file *file; | 189 | struct file *file; |
190 | int ret = -EBADF; | 190 | int ret = -EBADF; |
191 | int fput_needed; | ||
191 | 192 | ||
192 | file = fget(fd); | 193 | file = fget_light(fd, &fput_needed); |
193 | if (file) { | 194 | if (file) { |
194 | ret = vfs_fsync(file, datasync); | 195 | ret = vfs_fsync(file, datasync); |
195 | fput(file); | 196 | fput_light(file, fput_needed); |
196 | } | 197 | } |
197 | return ret; | 198 | return ret; |
198 | } | 199 | } |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 62a2727f4ecf..a6d42efc76d2 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -1127,16 +1127,7 @@ int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
1127 | struct ubifs_inode *ui = ubifs_inode(inode); | 1127 | struct ubifs_inode *ui = ubifs_inode(inode); |
1128 | 1128 | ||
1129 | mutex_lock(&ui->ui_mutex); | 1129 | mutex_lock(&ui->ui_mutex); |
1130 | stat->dev = inode->i_sb->s_dev; | 1130 | generic_fillattr(inode, stat); |
1131 | stat->ino = inode->i_ino; | ||
1132 | stat->mode = inode->i_mode; | ||
1133 | stat->nlink = inode->i_nlink; | ||
1134 | stat->uid = inode->i_uid; | ||
1135 | stat->gid = inode->i_gid; | ||
1136 | stat->rdev = inode->i_rdev; | ||
1137 | stat->atime = inode->i_atime; | ||
1138 | stat->mtime = inode->i_mtime; | ||
1139 | stat->ctime = inode->i_ctime; | ||
1140 | stat->blksize = UBIFS_BLOCK_SIZE; | 1131 | stat->blksize = UBIFS_BLOCK_SIZE; |
1141 | stat->size = ui->ui_size; | 1132 | stat->size = ui->ui_size; |
1142 | 1133 | ||
diff --git a/fs/udf/namei.c b/fs/udf/namei.c index a165c66e3eef..18024178ac4c 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c | |||
@@ -1260,16 +1260,15 @@ static struct dentry *udf_fh_to_parent(struct super_block *sb, | |||
1260 | fid->udf.parent_partref, | 1260 | fid->udf.parent_partref, |
1261 | fid->udf.parent_generation); | 1261 | fid->udf.parent_generation); |
1262 | } | 1262 | } |
1263 | static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp, | 1263 | static int udf_encode_fh(struct inode *inode, __u32 *fh, int *lenp, |
1264 | int connectable) | 1264 | struct inode *parent) |
1265 | { | 1265 | { |
1266 | int len = *lenp; | 1266 | int len = *lenp; |
1267 | struct inode *inode = de->d_inode; | ||
1268 | struct kernel_lb_addr location = UDF_I(inode)->i_location; | 1267 | struct kernel_lb_addr location = UDF_I(inode)->i_location; |
1269 | struct fid *fid = (struct fid *)fh; | 1268 | struct fid *fid = (struct fid *)fh; |
1270 | int type = FILEID_UDF_WITHOUT_PARENT; | 1269 | int type = FILEID_UDF_WITHOUT_PARENT; |
1271 | 1270 | ||
1272 | if (connectable && (len < 5)) { | 1271 | if (parent && (len < 5)) { |
1273 | *lenp = 5; | 1272 | *lenp = 5; |
1274 | return 255; | 1273 | return 255; |
1275 | } else if (len < 3) { | 1274 | } else if (len < 3) { |
@@ -1282,14 +1281,11 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp, | |||
1282 | fid->udf.partref = location.partitionReferenceNum; | 1281 | fid->udf.partref = location.partitionReferenceNum; |
1283 | fid->udf.generation = inode->i_generation; | 1282 | fid->udf.generation = inode->i_generation; |
1284 | 1283 | ||
1285 | if (connectable && !S_ISDIR(inode->i_mode)) { | 1284 | if (parent) { |
1286 | spin_lock(&de->d_lock); | 1285 | location = UDF_I(parent)->i_location; |
1287 | inode = de->d_parent->d_inode; | ||
1288 | location = UDF_I(inode)->i_location; | ||
1289 | fid->udf.parent_block = location.logicalBlockNum; | 1286 | fid->udf.parent_block = location.logicalBlockNum; |
1290 | fid->udf.parent_partref = location.partitionReferenceNum; | 1287 | fid->udf.parent_partref = location.partitionReferenceNum; |
1291 | fid->udf.parent_generation = inode->i_generation; | 1288 | fid->udf.parent_generation = inode->i_generation; |
1292 | spin_unlock(&de->d_lock); | ||
1293 | *lenp = 5; | 1289 | *lenp = 5; |
1294 | type = FILEID_UDF_WITH_PARENT; | 1290 | type = FILEID_UDF_WITH_PARENT; |
1295 | } | 1291 | } |
diff --git a/fs/utimes.c b/fs/utimes.c index ba653f3dc1bc..fa4dbe451e27 100644 --- a/fs/utimes.c +++ b/fs/utimes.c | |||
@@ -140,18 +140,19 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times, | |||
140 | goto out; | 140 | goto out; |
141 | 141 | ||
142 | if (filename == NULL && dfd != AT_FDCWD) { | 142 | if (filename == NULL && dfd != AT_FDCWD) { |
143 | int fput_needed; | ||
143 | struct file *file; | 144 | struct file *file; |
144 | 145 | ||
145 | if (flags & AT_SYMLINK_NOFOLLOW) | 146 | if (flags & AT_SYMLINK_NOFOLLOW) |
146 | goto out; | 147 | goto out; |
147 | 148 | ||
148 | file = fget(dfd); | 149 | file = fget_light(dfd, &fput_needed); |
149 | error = -EBADF; | 150 | error = -EBADF; |
150 | if (!file) | 151 | if (!file) |
151 | goto out; | 152 | goto out; |
152 | 153 | ||
153 | error = utimes_common(&file->f_path, times); | 154 | error = utimes_common(&file->f_path, times); |
154 | fput(file); | 155 | fput_light(file, fput_needed); |
155 | } else { | 156 | } else { |
156 | struct path path; | 157 | struct path path; |
157 | int lookup_flags = 0; | 158 | int lookup_flags = 0; |
diff --git a/fs/xattr.c b/fs/xattr.c index 3c8c1cc333c7..1d7ac3790458 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -399,11 +399,12 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname, | |||
399 | SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, | 399 | SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, |
400 | const void __user *,value, size_t, size, int, flags) | 400 | const void __user *,value, size_t, size, int, flags) |
401 | { | 401 | { |
402 | int fput_needed; | ||
402 | struct file *f; | 403 | struct file *f; |
403 | struct dentry *dentry; | 404 | struct dentry *dentry; |
404 | int error = -EBADF; | 405 | int error = -EBADF; |
405 | 406 | ||
406 | f = fget(fd); | 407 | f = fget_light(fd, &fput_needed); |
407 | if (!f) | 408 | if (!f) |
408 | return error; | 409 | return error; |
409 | dentry = f->f_path.dentry; | 410 | dentry = f->f_path.dentry; |
@@ -413,7 +414,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, | |||
413 | error = setxattr(dentry, name, value, size, flags); | 414 | error = setxattr(dentry, name, value, size, flags); |
414 | mnt_drop_write_file(f); | 415 | mnt_drop_write_file(f); |
415 | } | 416 | } |
416 | fput(f); | 417 | fput_light(f, fput_needed); |
417 | return error; | 418 | return error; |
418 | } | 419 | } |
419 | 420 | ||
@@ -486,15 +487,16 @@ SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname, | |||
486 | SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, | 487 | SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, |
487 | void __user *, value, size_t, size) | 488 | void __user *, value, size_t, size) |
488 | { | 489 | { |
490 | int fput_needed; | ||
489 | struct file *f; | 491 | struct file *f; |
490 | ssize_t error = -EBADF; | 492 | ssize_t error = -EBADF; |
491 | 493 | ||
492 | f = fget(fd); | 494 | f = fget_light(fd, &fput_needed); |
493 | if (!f) | 495 | if (!f) |
494 | return error; | 496 | return error; |
495 | audit_inode(NULL, f->f_path.dentry); | 497 | audit_inode(NULL, f->f_path.dentry); |
496 | error = getxattr(f->f_path.dentry, name, value, size); | 498 | error = getxattr(f->f_path.dentry, name, value, size); |
497 | fput(f); | 499 | fput_light(f, fput_needed); |
498 | return error; | 500 | return error; |
499 | } | 501 | } |
500 | 502 | ||
@@ -566,15 +568,16 @@ SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list, | |||
566 | 568 | ||
567 | SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) | 569 | SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) |
568 | { | 570 | { |
571 | int fput_needed; | ||
569 | struct file *f; | 572 | struct file *f; |
570 | ssize_t error = -EBADF; | 573 | ssize_t error = -EBADF; |
571 | 574 | ||
572 | f = fget(fd); | 575 | f = fget_light(fd, &fput_needed); |
573 | if (!f) | 576 | if (!f) |
574 | return error; | 577 | return error; |
575 | audit_inode(NULL, f->f_path.dentry); | 578 | audit_inode(NULL, f->f_path.dentry); |
576 | error = listxattr(f->f_path.dentry, list, size); | 579 | error = listxattr(f->f_path.dentry, list, size); |
577 | fput(f); | 580 | fput_light(f, fput_needed); |
578 | return error; | 581 | return error; |
579 | } | 582 | } |
580 | 583 | ||
@@ -634,11 +637,12 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, | |||
634 | 637 | ||
635 | SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) | 638 | SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) |
636 | { | 639 | { |
640 | int fput_needed; | ||
637 | struct file *f; | 641 | struct file *f; |
638 | struct dentry *dentry; | 642 | struct dentry *dentry; |
639 | int error = -EBADF; | 643 | int error = -EBADF; |
640 | 644 | ||
641 | f = fget(fd); | 645 | f = fget_light(fd, &fput_needed); |
642 | if (!f) | 646 | if (!f) |
643 | return error; | 647 | return error; |
644 | dentry = f->f_path.dentry; | 648 | dentry = f->f_path.dentry; |
@@ -648,7 +652,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) | |||
648 | error = removexattr(dentry, name); | 652 | error = removexattr(dentry, name); |
649 | mnt_drop_write_file(f); | 653 | mnt_drop_write_file(f); |
650 | } | 654 | } |
651 | fput(f); | 655 | fput_light(f, fput_needed); |
652 | return error; | 656 | return error; |
653 | } | 657 | } |
654 | 658 | ||
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index a907de565db3..4a7286c1dc80 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c | |||
@@ -46,7 +46,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) | |||
46 | } | 46 | } |
47 | 47 | ||
48 | void * | 48 | void * |
49 | kmem_alloc(size_t size, unsigned int __nocast flags) | 49 | kmem_alloc(size_t size, xfs_km_flags_t flags) |
50 | { | 50 | { |
51 | int retries = 0; | 51 | int retries = 0; |
52 | gfp_t lflags = kmem_flags_convert(flags); | 52 | gfp_t lflags = kmem_flags_convert(flags); |
@@ -65,7 +65,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags) | |||
65 | } | 65 | } |
66 | 66 | ||
67 | void * | 67 | void * |
68 | kmem_zalloc(size_t size, unsigned int __nocast flags) | 68 | kmem_zalloc(size_t size, xfs_km_flags_t flags) |
69 | { | 69 | { |
70 | void *ptr; | 70 | void *ptr; |
71 | 71 | ||
@@ -87,7 +87,7 @@ kmem_free(const void *ptr) | |||
87 | 87 | ||
88 | void * | 88 | void * |
89 | kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, | 89 | kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, |
90 | unsigned int __nocast flags) | 90 | xfs_km_flags_t flags) |
91 | { | 91 | { |
92 | void *new; | 92 | void *new; |
93 | 93 | ||
@@ -102,7 +102,7 @@ kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, | |||
102 | } | 102 | } |
103 | 103 | ||
104 | void * | 104 | void * |
105 | kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) | 105 | kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags) |
106 | { | 106 | { |
107 | int retries = 0; | 107 | int retries = 0; |
108 | gfp_t lflags = kmem_flags_convert(flags); | 108 | gfp_t lflags = kmem_flags_convert(flags); |
@@ -121,7 +121,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) | |||
121 | } | 121 | } |
122 | 122 | ||
123 | void * | 123 | void * |
124 | kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags) | 124 | kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags) |
125 | { | 125 | { |
126 | void *ptr; | 126 | void *ptr; |
127 | 127 | ||
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index ab7c53fe346e..b2f2620f9a87 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h | |||
@@ -27,10 +27,11 @@ | |||
27 | * General memory allocation interfaces | 27 | * General memory allocation interfaces |
28 | */ | 28 | */ |
29 | 29 | ||
30 | #define KM_SLEEP 0x0001u | 30 | typedef unsigned __bitwise xfs_km_flags_t; |
31 | #define KM_NOSLEEP 0x0002u | 31 | #define KM_SLEEP ((__force xfs_km_flags_t)0x0001u) |
32 | #define KM_NOFS 0x0004u | 32 | #define KM_NOSLEEP ((__force xfs_km_flags_t)0x0002u) |
33 | #define KM_MAYFAIL 0x0008u | 33 | #define KM_NOFS ((__force xfs_km_flags_t)0x0004u) |
34 | #define KM_MAYFAIL ((__force xfs_km_flags_t)0x0008u) | ||
34 | 35 | ||
35 | /* | 36 | /* |
36 | * We use a special process flag to avoid recursive callbacks into | 37 | * We use a special process flag to avoid recursive callbacks into |
@@ -38,7 +39,7 @@ | |||
38 | * warnings, so we explicitly skip any generic ones (silly of us). | 39 | * warnings, so we explicitly skip any generic ones (silly of us). |
39 | */ | 40 | */ |
40 | static inline gfp_t | 41 | static inline gfp_t |
41 | kmem_flags_convert(unsigned int __nocast flags) | 42 | kmem_flags_convert(xfs_km_flags_t flags) |
42 | { | 43 | { |
43 | gfp_t lflags; | 44 | gfp_t lflags; |
44 | 45 | ||
@@ -54,9 +55,9 @@ kmem_flags_convert(unsigned int __nocast flags) | |||
54 | return lflags; | 55 | return lflags; |
55 | } | 56 | } |
56 | 57 | ||
57 | extern void *kmem_alloc(size_t, unsigned int __nocast); | 58 | extern void *kmem_alloc(size_t, xfs_km_flags_t); |
58 | extern void *kmem_zalloc(size_t, unsigned int __nocast); | 59 | extern void *kmem_zalloc(size_t, xfs_km_flags_t); |
59 | extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); | 60 | extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t); |
60 | extern void kmem_free(const void *); | 61 | extern void kmem_free(const void *); |
61 | 62 | ||
62 | static inline void *kmem_zalloc_large(size_t size) | 63 | static inline void *kmem_zalloc_large(size_t size) |
@@ -107,7 +108,7 @@ kmem_zone_destroy(kmem_zone_t *zone) | |||
107 | kmem_cache_destroy(zone); | 108 | kmem_cache_destroy(zone); |
108 | } | 109 | } |
109 | 110 | ||
110 | extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); | 111 | extern void *kmem_zone_alloc(kmem_zone_t *, xfs_km_flags_t); |
111 | extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); | 112 | extern void *kmem_zone_zalloc(kmem_zone_t *, xfs_km_flags_t); |
112 | 113 | ||
113 | #endif /* __XFS_SUPPORT_KMEM_H__ */ | 114 | #endif /* __XFS_SUPPORT_KMEM_H__ */ |
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 2d25d19c4ea1..42679223a0fd 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c | |||
@@ -52,19 +52,18 @@ static int xfs_fileid_length(int fileid_type) | |||
52 | 52 | ||
53 | STATIC int | 53 | STATIC int |
54 | xfs_fs_encode_fh( | 54 | xfs_fs_encode_fh( |
55 | struct dentry *dentry, | 55 | struct inode *inode, |
56 | __u32 *fh, | 56 | __u32 *fh, |
57 | int *max_len, | 57 | int *max_len, |
58 | int connectable) | 58 | struct inode *parent) |
59 | { | 59 | { |
60 | struct fid *fid = (struct fid *)fh; | 60 | struct fid *fid = (struct fid *)fh; |
61 | struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fh; | 61 | struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fh; |
62 | struct inode *inode = dentry->d_inode; | ||
63 | int fileid_type; | 62 | int fileid_type; |
64 | int len; | 63 | int len; |
65 | 64 | ||
66 | /* Directories don't need their parent encoded, they have ".." */ | 65 | /* Directories don't need their parent encoded, they have ".." */ |
67 | if (S_ISDIR(inode->i_mode) || !connectable) | 66 | if (!parent) |
68 | fileid_type = FILEID_INO32_GEN; | 67 | fileid_type = FILEID_INO32_GEN; |
69 | else | 68 | else |
70 | fileid_type = FILEID_INO32_GEN_PARENT; | 69 | fileid_type = FILEID_INO32_GEN_PARENT; |
@@ -96,20 +95,16 @@ xfs_fs_encode_fh( | |||
96 | 95 | ||
97 | switch (fileid_type) { | 96 | switch (fileid_type) { |
98 | case FILEID_INO32_GEN_PARENT: | 97 | case FILEID_INO32_GEN_PARENT: |
99 | spin_lock(&dentry->d_lock); | 98 | fid->i32.parent_ino = XFS_I(parent)->i_ino; |
100 | fid->i32.parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino; | 99 | fid->i32.parent_gen = parent->i_generation; |
101 | fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation; | ||
102 | spin_unlock(&dentry->d_lock); | ||
103 | /*FALLTHRU*/ | 100 | /*FALLTHRU*/ |
104 | case FILEID_INO32_GEN: | 101 | case FILEID_INO32_GEN: |
105 | fid->i32.ino = XFS_I(inode)->i_ino; | 102 | fid->i32.ino = XFS_I(inode)->i_ino; |
106 | fid->i32.gen = inode->i_generation; | 103 | fid->i32.gen = inode->i_generation; |
107 | break; | 104 | break; |
108 | case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: | 105 | case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: |
109 | spin_lock(&dentry->d_lock); | 106 | fid64->parent_ino = XFS_I(parent)->i_ino; |
110 | fid64->parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino; | 107 | fid64->parent_gen = parent->i_generation; |
111 | fid64->parent_gen = dentry->d_parent->d_inode->i_generation; | ||
112 | spin_unlock(&dentry->d_lock); | ||
113 | /*FALLTHRU*/ | 108 | /*FALLTHRU*/ |
114 | case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: | 109 | case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: |
115 | fid64->ino = XFS_I(inode)->i_ino; | 110 | fid64->ino = XFS_I(inode)->i_ino; |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 8d214b87f6bb..9f7ec15a6522 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -586,8 +586,11 @@ restart: | |||
586 | * lock above. Eventually we should look into a way to avoid | 586 | * lock above. Eventually we should look into a way to avoid |
587 | * the pointless lock roundtrip. | 587 | * the pointless lock roundtrip. |
588 | */ | 588 | */ |
589 | if (likely(!(file->f_mode & FMODE_NOCMTIME))) | 589 | if (likely(!(file->f_mode & FMODE_NOCMTIME))) { |
590 | file_update_time(file); | 590 | error = file_update_time(file); |
591 | if (error) | ||
592 | return error; | ||
593 | } | ||
591 | 594 | ||
592 | /* | 595 | /* |
593 | * If we're writing the file then make sure to clear the setuid and | 596 | * If we're writing the file then make sure to clear the setuid and |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 6b965bf450e4..f30d9807dc48 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -3152,7 +3152,7 @@ xlog_ticket_alloc( | |||
3152 | int cnt, | 3152 | int cnt, |
3153 | char client, | 3153 | char client, |
3154 | bool permanent, | 3154 | bool permanent, |
3155 | int alloc_flags) | 3155 | xfs_km_flags_t alloc_flags) |
3156 | { | 3156 | { |
3157 | struct xlog_ticket *tic; | 3157 | struct xlog_ticket *tic; |
3158 | uint num_headers; | 3158 | uint num_headers; |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 735ff1ee53da..5bc33261f5be 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -555,7 +555,7 @@ extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); | |||
555 | extern kmem_zone_t *xfs_log_ticket_zone; | 555 | extern kmem_zone_t *xfs_log_ticket_zone; |
556 | struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, | 556 | struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, |
557 | int count, char client, bool permanent, | 557 | int count, char client, bool permanent, |
558 | int alloc_flags); | 558 | xfs_km_flags_t alloc_flags); |
559 | 559 | ||
560 | 560 | ||
561 | static inline void | 561 | static inline void |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index cdf896fcbfa4..fdf324508c5e 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -584,7 +584,7 @@ xfs_trans_t * | |||
584 | _xfs_trans_alloc( | 584 | _xfs_trans_alloc( |
585 | xfs_mount_t *mp, | 585 | xfs_mount_t *mp, |
586 | uint type, | 586 | uint type, |
587 | uint memflags) | 587 | xfs_km_flags_t memflags) |
588 | { | 588 | { |
589 | xfs_trans_t *tp; | 589 | xfs_trans_t *tp; |
590 | 590 | ||
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 7ab99e1898c8..7c37b533aa8e 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -443,7 +443,7 @@ typedef struct xfs_trans { | |||
443 | * XFS transaction mechanism exported interfaces. | 443 | * XFS transaction mechanism exported interfaces. |
444 | */ | 444 | */ |
445 | xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); | 445 | xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); |
446 | xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, uint); | 446 | xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t); |
447 | xfs_trans_t *xfs_trans_dup(xfs_trans_t *); | 447 | xfs_trans_t *xfs_trans_dup(xfs_trans_t *); |
448 | int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, | 448 | int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, |
449 | uint, uint); | 449 | uint, uint); |
diff --git a/include/asm-generic/posix_types.h b/include/asm-generic/posix_types.h index 91d44bd4dde3..fe74fccf18db 100644 --- a/include/asm-generic/posix_types.h +++ b/include/asm-generic/posix_types.h | |||
@@ -23,10 +23,6 @@ typedef __kernel_ulong_t __kernel_ino_t; | |||
23 | typedef unsigned int __kernel_mode_t; | 23 | typedef unsigned int __kernel_mode_t; |
24 | #endif | 24 | #endif |
25 | 25 | ||
26 | #ifndef __kernel_nlink_t | ||
27 | typedef __kernel_ulong_t __kernel_nlink_t; | ||
28 | #endif | ||
29 | |||
30 | #ifndef __kernel_pid_t | 26 | #ifndef __kernel_pid_t |
31 | typedef int __kernel_pid_t; | 27 | typedef int __kernel_pid_t; |
32 | #endif | 28 | #endif |
diff --git a/include/linux/errno.h b/include/linux/errno.h index 2d09bfa5c262..e0de516374da 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h | |||
@@ -17,6 +17,7 @@ | |||
17 | #define ENOIOCTLCMD 515 /* No ioctl command */ | 17 | #define ENOIOCTLCMD 515 /* No ioctl command */ |
18 | #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ | 18 | #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ |
19 | #define EPROBE_DEFER 517 /* Driver requests probe retry */ | 19 | #define EPROBE_DEFER 517 /* Driver requests probe retry */ |
20 | #define EOPENSTALE 518 /* open found a stale dentry */ | ||
20 | 21 | ||
21 | /* Defined for the NFSv3 protocol */ | 22 | /* Defined for the NFSv3 protocol */ |
22 | #define EBADHANDLE 521 /* Illegal NFS file handle */ | 23 | #define EBADHANDLE 521 /* Illegal NFS file handle */ |
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 3a4cef5322dc..12291a7ee275 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h | |||
@@ -165,8 +165,8 @@ struct fid { | |||
165 | */ | 165 | */ |
166 | 166 | ||
167 | struct export_operations { | 167 | struct export_operations { |
168 | int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len, | 168 | int (*encode_fh)(struct inode *inode, __u32 *fh, int *max_len, |
169 | int connectable); | 169 | struct inode *parent); |
170 | struct dentry * (*fh_to_dentry)(struct super_block *sb, struct fid *fid, | 170 | struct dentry * (*fh_to_dentry)(struct super_block *sb, struct fid *fid, |
171 | int fh_len, int fh_type); | 171 | int fh_len, int fh_type); |
172 | struct dentry * (*fh_to_parent)(struct super_block *sb, struct fid *fid, | 172 | struct dentry * (*fh_to_parent)(struct super_block *sb, struct fid *fid, |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 40887afaaca7..51978ed43e97 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -1692,6 +1692,7 @@ struct inode_operations { | |||
1692 | int (*removexattr) (struct dentry *, const char *); | 1692 | int (*removexattr) (struct dentry *, const char *); |
1693 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, | 1693 | int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, |
1694 | u64 len); | 1694 | u64 len); |
1695 | int (*update_time)(struct inode *, struct timespec *, int); | ||
1695 | } ____cacheline_aligned; | 1696 | } ____cacheline_aligned; |
1696 | 1697 | ||
1697 | struct seq_file; | 1698 | struct seq_file; |
@@ -1850,6 +1851,13 @@ static inline void inode_inc_iversion(struct inode *inode) | |||
1850 | spin_unlock(&inode->i_lock); | 1851 | spin_unlock(&inode->i_lock); |
1851 | } | 1852 | } |
1852 | 1853 | ||
1854 | enum file_time_flags { | ||
1855 | S_ATIME = 1, | ||
1856 | S_MTIME = 2, | ||
1857 | S_CTIME = 4, | ||
1858 | S_VERSION = 8, | ||
1859 | }; | ||
1860 | |||
1853 | extern void touch_atime(struct path *); | 1861 | extern void touch_atime(struct path *); |
1854 | static inline void file_accessed(struct file *file) | 1862 | static inline void file_accessed(struct file *file) |
1855 | { | 1863 | { |
@@ -2583,7 +2591,7 @@ extern int inode_change_ok(const struct inode *, struct iattr *); | |||
2583 | extern int inode_newsize_ok(const struct inode *, loff_t offset); | 2591 | extern int inode_newsize_ok(const struct inode *, loff_t offset); |
2584 | extern void setattr_copy(struct inode *inode, const struct iattr *attr); | 2592 | extern void setattr_copy(struct inode *inode, const struct iattr *attr); |
2585 | 2593 | ||
2586 | extern void file_update_time(struct file *file); | 2594 | extern int file_update_time(struct file *file); |
2587 | 2595 | ||
2588 | extern int generic_show_options(struct seq_file *m, struct dentry *root); | 2596 | extern int generic_show_options(struct seq_file *m, struct dentry *root); |
2589 | extern void save_mount_options(struct super_block *sb, char *options); | 2597 | extern void save_mount_options(struct super_block *sb, char *options); |
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 91d0e0a34ef3..63d966d5c2ea 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h | |||
@@ -60,7 +60,7 @@ | |||
60 | #define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\ | 60 | #define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\ |
61 | FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ | 61 | FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ |
62 | FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ | 62 | FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ |
63 | FS_DELETE) | 63 | FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM) |
64 | 64 | ||
65 | #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) | 65 | #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) |
66 | 66 | ||
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 912c30a8ddb1..f334c7fab967 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/mutex.h> | 31 | #include <linux/mutex.h> |
32 | #include <linux/timer.h> | 32 | #include <linux/timer.h> |
33 | #include <linux/slab.h> | 33 | #include <linux/slab.h> |
34 | #include <crypto/hash.h> | ||
34 | #endif | 35 | #endif |
35 | 36 | ||
36 | #define journal_oom_retry 1 | 37 | #define journal_oom_retry 1 |
@@ -147,12 +148,24 @@ typedef struct journal_header_s | |||
147 | #define JBD2_CRC32_CHKSUM 1 | 148 | #define JBD2_CRC32_CHKSUM 1 |
148 | #define JBD2_MD5_CHKSUM 2 | 149 | #define JBD2_MD5_CHKSUM 2 |
149 | #define JBD2_SHA1_CHKSUM 3 | 150 | #define JBD2_SHA1_CHKSUM 3 |
151 | #define JBD2_CRC32C_CHKSUM 4 | ||
150 | 152 | ||
151 | #define JBD2_CRC32_CHKSUM_SIZE 4 | 153 | #define JBD2_CRC32_CHKSUM_SIZE 4 |
152 | 154 | ||
153 | #define JBD2_CHECKSUM_BYTES (32 / sizeof(u32)) | 155 | #define JBD2_CHECKSUM_BYTES (32 / sizeof(u32)) |
154 | /* | 156 | /* |
155 | * Commit block header for storing transactional checksums: | 157 | * Commit block header for storing transactional checksums: |
158 | * | ||
159 | * NOTE: If FEATURE_COMPAT_CHECKSUM (checksum v1) is set, the h_chksum* | ||
160 | * fields are used to store a checksum of the descriptor and data blocks. | ||
161 | * | ||
162 | * If FEATURE_INCOMPAT_CSUM_V2 (checksum v2) is set, then the h_chksum | ||
163 | * field is used to store crc32c(uuid+commit_block). Each journal metadata | ||
164 | * block gets its own checksum, and data block checksums are stored in | ||
165 | * journal_block_tag (in the descriptor). The other h_chksum* fields are | ||
166 | * not used. | ||
167 | * | ||
168 | * Checksum v1 and v2 are mutually exclusive features. | ||
156 | */ | 169 | */ |
157 | struct commit_header { | 170 | struct commit_header { |
158 | __be32 h_magic; | 171 | __be32 h_magic; |
@@ -175,13 +188,19 @@ struct commit_header { | |||
175 | typedef struct journal_block_tag_s | 188 | typedef struct journal_block_tag_s |
176 | { | 189 | { |
177 | __be32 t_blocknr; /* The on-disk block number */ | 190 | __be32 t_blocknr; /* The on-disk block number */ |
178 | __be32 t_flags; /* See below */ | 191 | __be16 t_checksum; /* truncated crc32c(uuid+seq+block) */ |
192 | __be16 t_flags; /* See below */ | ||
179 | __be32 t_blocknr_high; /* most-significant high 32bits. */ | 193 | __be32 t_blocknr_high; /* most-significant high 32bits. */ |
180 | } journal_block_tag_t; | 194 | } journal_block_tag_t; |
181 | 195 | ||
182 | #define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high)) | 196 | #define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high)) |
183 | #define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t)) | 197 | #define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t)) |
184 | 198 | ||
199 | /* Tail of descriptor block, for checksumming */ | ||
200 | struct jbd2_journal_block_tail { | ||
201 | __be32 t_checksum; /* crc32c(uuid+descr_block) */ | ||
202 | }; | ||
203 | |||
185 | /* | 204 | /* |
186 | * The revoke descriptor: used on disk to describe a series of blocks to | 205 | * The revoke descriptor: used on disk to describe a series of blocks to |
187 | * be revoked from the log | 206 | * be revoked from the log |
@@ -192,6 +211,10 @@ typedef struct jbd2_journal_revoke_header_s | |||
192 | __be32 r_count; /* Count of bytes used in the block */ | 211 | __be32 r_count; /* Count of bytes used in the block */ |
193 | } jbd2_journal_revoke_header_t; | 212 | } jbd2_journal_revoke_header_t; |
194 | 213 | ||
214 | /* Tail of revoke block, for checksumming */ | ||
215 | struct jbd2_journal_revoke_tail { | ||
216 | __be32 r_checksum; /* crc32c(uuid+revoke_block) */ | ||
217 | }; | ||
195 | 218 | ||
196 | /* Definitions for the journal tag flags word: */ | 219 | /* Definitions for the journal tag flags word: */ |
197 | #define JBD2_FLAG_ESCAPE 1 /* on-disk block is escaped */ | 220 | #define JBD2_FLAG_ESCAPE 1 /* on-disk block is escaped */ |
@@ -241,7 +264,10 @@ typedef struct journal_superblock_s | |||
241 | __be32 s_max_trans_data; /* Limit of data blocks per trans. */ | 264 | __be32 s_max_trans_data; /* Limit of data blocks per trans. */ |
242 | 265 | ||
243 | /* 0x0050 */ | 266 | /* 0x0050 */ |
244 | __u32 s_padding[44]; | 267 | __u8 s_checksum_type; /* checksum type */ |
268 | __u8 s_padding2[3]; | ||
269 | __u32 s_padding[42]; | ||
270 | __be32 s_checksum; /* crc32c(superblock) */ | ||
245 | 271 | ||
246 | /* 0x0100 */ | 272 | /* 0x0100 */ |
247 | __u8 s_users[16*48]; /* ids of all fs'es sharing the log */ | 273 | __u8 s_users[16*48]; /* ids of all fs'es sharing the log */ |
@@ -263,13 +289,15 @@ typedef struct journal_superblock_s | |||
263 | #define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 | 289 | #define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 |
264 | #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 | 290 | #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 |
265 | #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 | 291 | #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 |
292 | #define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 | ||
266 | 293 | ||
267 | /* Features known to this kernel version: */ | 294 | /* Features known to this kernel version: */ |
268 | #define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM | 295 | #define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM |
269 | #define JBD2_KNOWN_ROCOMPAT_FEATURES 0 | 296 | #define JBD2_KNOWN_ROCOMPAT_FEATURES 0 |
270 | #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ | 297 | #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ |
271 | JBD2_FEATURE_INCOMPAT_64BIT | \ | 298 | JBD2_FEATURE_INCOMPAT_64BIT | \ |
272 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) | 299 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \ |
300 | JBD2_FEATURE_INCOMPAT_CSUM_V2) | ||
273 | 301 | ||
274 | #ifdef __KERNEL__ | 302 | #ifdef __KERNEL__ |
275 | 303 | ||
@@ -939,6 +967,12 @@ struct journal_s | |||
939 | * superblock pointer here | 967 | * superblock pointer here |
940 | */ | 968 | */ |
941 | void *j_private; | 969 | void *j_private; |
970 | |||
971 | /* Reference to checksum algorithm driver via cryptoapi */ | ||
972 | struct crypto_shash *j_chksum_driver; | ||
973 | |||
974 | /* Precomputed journal UUID checksum for seeding other checksums */ | ||
975 | __u32 j_csum_seed; | ||
942 | }; | 976 | }; |
943 | 977 | ||
944 | /* | 978 | /* |
@@ -1268,6 +1302,25 @@ static inline int jbd_space_needed(journal_t *journal) | |||
1268 | 1302 | ||
1269 | extern int jbd_blocks_per_page(struct inode *inode); | 1303 | extern int jbd_blocks_per_page(struct inode *inode); |
1270 | 1304 | ||
1305 | static inline u32 jbd2_chksum(journal_t *journal, u32 crc, | ||
1306 | const void *address, unsigned int length) | ||
1307 | { | ||
1308 | struct { | ||
1309 | struct shash_desc shash; | ||
1310 | char ctx[crypto_shash_descsize(journal->j_chksum_driver)]; | ||
1311 | } desc; | ||
1312 | int err; | ||
1313 | |||
1314 | desc.shash.tfm = journal->j_chksum_driver; | ||
1315 | desc.shash.flags = 0; | ||
1316 | *(u32 *)desc.ctx = crc; | ||
1317 | |||
1318 | err = crypto_shash_update(&desc.shash, address, length); | ||
1319 | BUG_ON(err); | ||
1320 | |||
1321 | return *(u32 *)desc.ctx; | ||
1322 | } | ||
1323 | |||
1271 | #ifdef __KERNEL__ | 1324 | #ifdef __KERNEL__ |
1272 | 1325 | ||
1273 | #define buffer_trace_init(bh) do {} while (0) | 1326 | #define buffer_trace_init(bh) do {} while (0) |
diff --git a/include/linux/jbd_common.h b/include/linux/jbd_common.h index 6230f8556a4e..6133679bc4c0 100644 --- a/include/linux/jbd_common.h +++ b/include/linux/jbd_common.h | |||
@@ -12,6 +12,7 @@ enum jbd_state_bits { | |||
12 | BH_State, /* Pins most journal_head state */ | 12 | BH_State, /* Pins most journal_head state */ |
13 | BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ | 13 | BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ |
14 | BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ | 14 | BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ |
15 | BH_Verified, /* Metadata block has been verified ok */ | ||
15 | BH_JBDPrivateStart, /* First bit available for private use by FS */ | 16 | BH_JBDPrivateStart, /* First bit available for private use by FS */ |
16 | }; | 17 | }; |
17 | 18 | ||
@@ -24,6 +25,7 @@ TAS_BUFFER_FNS(Revoked, revoked) | |||
24 | BUFFER_FNS(RevokeValid, revokevalid) | 25 | BUFFER_FNS(RevokeValid, revokevalid) |
25 | TAS_BUFFER_FNS(RevokeValid, revokevalid) | 26 | TAS_BUFFER_FNS(RevokeValid, revokevalid) |
26 | BUFFER_FNS(Freed, freed) | 27 | BUFFER_FNS(Freed, freed) |
28 | BUFFER_FNS(Verified, verified) | ||
27 | 29 | ||
28 | static inline struct buffer_head *jh2bh(struct journal_head *jh) | 30 | static inline struct buffer_head *jh2bh(struct journal_head *jh) |
29 | { | 31 | { |
diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 87f402ccec55..f01e5f6d1f07 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h | |||
@@ -23,28 +23,17 @@ | |||
23 | #include <linux/lockdep.h> | 23 | #include <linux/lockdep.h> |
24 | #include <linux/percpu.h> | 24 | #include <linux/percpu.h> |
25 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
26 | #include <linux/notifier.h> | ||
26 | 27 | ||
27 | /* can make br locks by using local lock for read side, global lock for write */ | 28 | /* can make br locks by using local lock for read side, global lock for write */ |
28 | #define br_lock_init(name) name##_lock_init() | 29 | #define br_lock_init(name) lg_lock_init(name, #name) |
29 | #define br_read_lock(name) name##_local_lock() | 30 | #define br_read_lock(name) lg_local_lock(name) |
30 | #define br_read_unlock(name) name##_local_unlock() | 31 | #define br_read_unlock(name) lg_local_unlock(name) |
31 | #define br_write_lock(name) name##_global_lock_online() | 32 | #define br_write_lock(name) lg_global_lock(name) |
32 | #define br_write_unlock(name) name##_global_unlock_online() | 33 | #define br_write_unlock(name) lg_global_unlock(name) |
33 | 34 | ||
34 | #define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) | ||
35 | #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) | 35 | #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) |
36 | 36 | ||
37 | |||
38 | #define lg_lock_init(name) name##_lock_init() | ||
39 | #define lg_local_lock(name) name##_local_lock() | ||
40 | #define lg_local_unlock(name) name##_local_unlock() | ||
41 | #define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) | ||
42 | #define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) | ||
43 | #define lg_global_lock(name) name##_global_lock() | ||
44 | #define lg_global_unlock(name) name##_global_unlock() | ||
45 | #define lg_global_lock_online(name) name##_global_lock_online() | ||
46 | #define lg_global_unlock_online(name) name##_global_unlock_online() | ||
47 | |||
48 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 37 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
49 | #define LOCKDEP_INIT_MAP lockdep_init_map | 38 | #define LOCKDEP_INIT_MAP lockdep_init_map |
50 | 39 | ||
@@ -59,142 +48,26 @@ | |||
59 | #define DEFINE_LGLOCK_LOCKDEP(name) | 48 | #define DEFINE_LGLOCK_LOCKDEP(name) |
60 | #endif | 49 | #endif |
61 | 50 | ||
62 | 51 | struct lglock { | |
63 | #define DECLARE_LGLOCK(name) \ | 52 | arch_spinlock_t __percpu *lock; |
64 | extern void name##_lock_init(void); \ | 53 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
65 | extern void name##_local_lock(void); \ | 54 | struct lock_class_key lock_key; |
66 | extern void name##_local_unlock(void); \ | 55 | struct lockdep_map lock_dep_map; |
67 | extern void name##_local_lock_cpu(int cpu); \ | 56 | #endif |
68 | extern void name##_local_unlock_cpu(int cpu); \ | 57 | }; |
69 | extern void name##_global_lock(void); \ | ||
70 | extern void name##_global_unlock(void); \ | ||
71 | extern void name##_global_lock_online(void); \ | ||
72 | extern void name##_global_unlock_online(void); \ | ||
73 | 58 | ||
74 | #define DEFINE_LGLOCK(name) \ | 59 | #define DEFINE_LGLOCK(name) \ |
75 | \ | 60 | DEFINE_LGLOCK_LOCKDEP(name); \ |
76 | DEFINE_SPINLOCK(name##_cpu_lock); \ | 61 | DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ |
77 | cpumask_t name##_cpus __read_mostly; \ | 62 | = __ARCH_SPIN_LOCK_UNLOCKED; \ |
78 | DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ | 63 | struct lglock name = { .lock = &name ## _lock } |
79 | DEFINE_LGLOCK_LOCKDEP(name); \ | 64 | |
80 | \ | 65 | void lg_lock_init(struct lglock *lg, char *name); |
81 | static int \ | 66 | void lg_local_lock(struct lglock *lg); |
82 | name##_lg_cpu_callback(struct notifier_block *nb, \ | 67 | void lg_local_unlock(struct lglock *lg); |
83 | unsigned long action, void *hcpu) \ | 68 | void lg_local_lock_cpu(struct lglock *lg, int cpu); |
84 | { \ | 69 | void lg_local_unlock_cpu(struct lglock *lg, int cpu); |
85 | switch (action & ~CPU_TASKS_FROZEN) { \ | 70 | void lg_global_lock(struct lglock *lg); |
86 | case CPU_UP_PREPARE: \ | 71 | void lg_global_unlock(struct lglock *lg); |
87 | spin_lock(&name##_cpu_lock); \ | 72 | |
88 | cpu_set((unsigned long)hcpu, name##_cpus); \ | ||
89 | spin_unlock(&name##_cpu_lock); \ | ||
90 | break; \ | ||
91 | case CPU_UP_CANCELED: case CPU_DEAD: \ | ||
92 | spin_lock(&name##_cpu_lock); \ | ||
93 | cpu_clear((unsigned long)hcpu, name##_cpus); \ | ||
94 | spin_unlock(&name##_cpu_lock); \ | ||
95 | } \ | ||
96 | return NOTIFY_OK; \ | ||
97 | } \ | ||
98 | static struct notifier_block name##_lg_cpu_notifier = { \ | ||
99 | .notifier_call = name##_lg_cpu_callback, \ | ||
100 | }; \ | ||
101 | void name##_lock_init(void) { \ | ||
102 | int i; \ | ||
103 | LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ | ||
104 | for_each_possible_cpu(i) { \ | ||
105 | arch_spinlock_t *lock; \ | ||
106 | lock = &per_cpu(name##_lock, i); \ | ||
107 | *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ | ||
108 | } \ | ||
109 | register_hotcpu_notifier(&name##_lg_cpu_notifier); \ | ||
110 | get_online_cpus(); \ | ||
111 | for_each_online_cpu(i) \ | ||
112 | cpu_set(i, name##_cpus); \ | ||
113 | put_online_cpus(); \ | ||
114 | } \ | ||
115 | EXPORT_SYMBOL(name##_lock_init); \ | ||
116 | \ | ||
117 | void name##_local_lock(void) { \ | ||
118 | arch_spinlock_t *lock; \ | ||
119 | preempt_disable(); \ | ||
120 | rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ | ||
121 | lock = &__get_cpu_var(name##_lock); \ | ||
122 | arch_spin_lock(lock); \ | ||
123 | } \ | ||
124 | EXPORT_SYMBOL(name##_local_lock); \ | ||
125 | \ | ||
126 | void name##_local_unlock(void) { \ | ||
127 | arch_spinlock_t *lock; \ | ||
128 | rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ | ||
129 | lock = &__get_cpu_var(name##_lock); \ | ||
130 | arch_spin_unlock(lock); \ | ||
131 | preempt_enable(); \ | ||
132 | } \ | ||
133 | EXPORT_SYMBOL(name##_local_unlock); \ | ||
134 | \ | ||
135 | void name##_local_lock_cpu(int cpu) { \ | ||
136 | arch_spinlock_t *lock; \ | ||
137 | preempt_disable(); \ | ||
138 | rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ | ||
139 | lock = &per_cpu(name##_lock, cpu); \ | ||
140 | arch_spin_lock(lock); \ | ||
141 | } \ | ||
142 | EXPORT_SYMBOL(name##_local_lock_cpu); \ | ||
143 | \ | ||
144 | void name##_local_unlock_cpu(int cpu) { \ | ||
145 | arch_spinlock_t *lock; \ | ||
146 | rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ | ||
147 | lock = &per_cpu(name##_lock, cpu); \ | ||
148 | arch_spin_unlock(lock); \ | ||
149 | preempt_enable(); \ | ||
150 | } \ | ||
151 | EXPORT_SYMBOL(name##_local_unlock_cpu); \ | ||
152 | \ | ||
153 | void name##_global_lock_online(void) { \ | ||
154 | int i; \ | ||
155 | spin_lock(&name##_cpu_lock); \ | ||
156 | rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ | ||
157 | for_each_cpu(i, &name##_cpus) { \ | ||
158 | arch_spinlock_t *lock; \ | ||
159 | lock = &per_cpu(name##_lock, i); \ | ||
160 | arch_spin_lock(lock); \ | ||
161 | } \ | ||
162 | } \ | ||
163 | EXPORT_SYMBOL(name##_global_lock_online); \ | ||
164 | \ | ||
165 | void name##_global_unlock_online(void) { \ | ||
166 | int i; \ | ||
167 | rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ | ||
168 | for_each_cpu(i, &name##_cpus) { \ | ||
169 | arch_spinlock_t *lock; \ | ||
170 | lock = &per_cpu(name##_lock, i); \ | ||
171 | arch_spin_unlock(lock); \ | ||
172 | } \ | ||
173 | spin_unlock(&name##_cpu_lock); \ | ||
174 | } \ | ||
175 | EXPORT_SYMBOL(name##_global_unlock_online); \ | ||
176 | \ | ||
177 | void name##_global_lock(void) { \ | ||
178 | int i; \ | ||
179 | preempt_disable(); \ | ||
180 | rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ | ||
181 | for_each_possible_cpu(i) { \ | ||
182 | arch_spinlock_t *lock; \ | ||
183 | lock = &per_cpu(name##_lock, i); \ | ||
184 | arch_spin_lock(lock); \ | ||
185 | } \ | ||
186 | } \ | ||
187 | EXPORT_SYMBOL(name##_global_lock); \ | ||
188 | \ | ||
189 | void name##_global_unlock(void) { \ | ||
190 | int i; \ | ||
191 | rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ | ||
192 | for_each_possible_cpu(i) { \ | ||
193 | arch_spinlock_t *lock; \ | ||
194 | lock = &per_cpu(name##_lock, i); \ | ||
195 | arch_spin_unlock(lock); \ | ||
196 | } \ | ||
197 | preempt_enable(); \ | ||
198 | } \ | ||
199 | EXPORT_SYMBOL(name##_global_unlock); | ||
200 | #endif | 73 | #endif |
diff --git a/include/linux/mm.h b/include/linux/mm.h index ce26716238c3..b36d08ce5c57 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1392,7 +1392,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo | |||
1392 | extern unsigned long mmap_region(struct file *file, unsigned long addr, | 1392 | extern unsigned long mmap_region(struct file *file, unsigned long addr, |
1393 | unsigned long len, unsigned long flags, | 1393 | unsigned long len, unsigned long flags, |
1394 | vm_flags_t vm_flags, unsigned long pgoff); | 1394 | vm_flags_t vm_flags, unsigned long pgoff); |
1395 | extern unsigned long do_mmap(struct file *, unsigned long, | 1395 | extern unsigned long do_mmap_pgoff(struct file *, unsigned long, |
1396 | unsigned long, unsigned long, | 1396 | unsigned long, unsigned long, |
1397 | unsigned long, unsigned long); | 1397 | unsigned long, unsigned long); |
1398 | extern int do_munmap(struct mm_struct *, unsigned long, size_t); | 1398 | extern int do_munmap(struct mm_struct *, unsigned long, size_t); |
diff --git a/include/linux/security.h b/include/linux/security.h index ab0e091ce5fa..4e5a73cdbbef 100644 --- a/include/linux/security.h +++ b/include/linux/security.h | |||
@@ -86,9 +86,9 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name, | |||
86 | extern int cap_inode_removexattr(struct dentry *dentry, const char *name); | 86 | extern int cap_inode_removexattr(struct dentry *dentry, const char *name); |
87 | extern int cap_inode_need_killpriv(struct dentry *dentry); | 87 | extern int cap_inode_need_killpriv(struct dentry *dentry); |
88 | extern int cap_inode_killpriv(struct dentry *dentry); | 88 | extern int cap_inode_killpriv(struct dentry *dentry); |
89 | extern int cap_file_mmap(struct file *file, unsigned long reqprot, | 89 | extern int cap_mmap_addr(unsigned long addr); |
90 | unsigned long prot, unsigned long flags, | 90 | extern int cap_mmap_file(struct file *file, unsigned long reqprot, |
91 | unsigned long addr, unsigned long addr_only); | 91 | unsigned long prot, unsigned long flags); |
92 | extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); | 92 | extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); |
93 | extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, | 93 | extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, |
94 | unsigned long arg4, unsigned long arg5); | 94 | unsigned long arg4, unsigned long arg5); |
@@ -586,15 +586,17 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) | |||
586 | * simple integer value. When @arg represents a user space pointer, it | 586 | * simple integer value. When @arg represents a user space pointer, it |
587 | * should never be used by the security module. | 587 | * should never be used by the security module. |
588 | * Return 0 if permission is granted. | 588 | * Return 0 if permission is granted. |
589 | * @file_mmap : | 589 | * @mmap_addr : |
590 | * Check permissions for a mmap operation at @addr. | ||
591 | * @addr contains virtual address that will be used for the operation. | ||
592 | * Return 0 if permission is granted. | ||
593 | * @mmap_file : | ||
590 | * Check permissions for a mmap operation. The @file may be NULL, e.g. | 594 | * Check permissions for a mmap operation. The @file may be NULL, e.g. |
591 | * if mapping anonymous memory. | 595 | * if mapping anonymous memory. |
592 | * @file contains the file structure for file to map (may be NULL). | 596 | * @file contains the file structure for file to map (may be NULL). |
593 | * @reqprot contains the protection requested by the application. | 597 | * @reqprot contains the protection requested by the application. |
594 | * @prot contains the protection that will be applied by the kernel. | 598 | * @prot contains the protection that will be applied by the kernel. |
595 | * @flags contains the operational flags. | 599 | * @flags contains the operational flags. |
596 | * @addr contains virtual address that will be used for the operation. | ||
597 | * @addr_only contains a boolean: 0 if file-backed VMA, otherwise 1. | ||
598 | * Return 0 if permission is granted. | 600 | * Return 0 if permission is granted. |
599 | * @file_mprotect: | 601 | * @file_mprotect: |
600 | * Check permissions before changing memory access permissions. | 602 | * Check permissions before changing memory access permissions. |
@@ -1481,10 +1483,10 @@ struct security_operations { | |||
1481 | void (*file_free_security) (struct file *file); | 1483 | void (*file_free_security) (struct file *file); |
1482 | int (*file_ioctl) (struct file *file, unsigned int cmd, | 1484 | int (*file_ioctl) (struct file *file, unsigned int cmd, |
1483 | unsigned long arg); | 1485 | unsigned long arg); |
1484 | int (*file_mmap) (struct file *file, | 1486 | int (*mmap_addr) (unsigned long addr); |
1487 | int (*mmap_file) (struct file *file, | ||
1485 | unsigned long reqprot, unsigned long prot, | 1488 | unsigned long reqprot, unsigned long prot, |
1486 | unsigned long flags, unsigned long addr, | 1489 | unsigned long flags); |
1487 | unsigned long addr_only); | ||
1488 | int (*file_mprotect) (struct vm_area_struct *vma, | 1490 | int (*file_mprotect) (struct vm_area_struct *vma, |
1489 | unsigned long reqprot, | 1491 | unsigned long reqprot, |
1490 | unsigned long prot); | 1492 | unsigned long prot); |
@@ -1743,9 +1745,9 @@ int security_file_permission(struct file *file, int mask); | |||
1743 | int security_file_alloc(struct file *file); | 1745 | int security_file_alloc(struct file *file); |
1744 | void security_file_free(struct file *file); | 1746 | void security_file_free(struct file *file); |
1745 | int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 1747 | int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
1746 | int security_file_mmap(struct file *file, unsigned long reqprot, | 1748 | int security_mmap_file(struct file *file, unsigned long prot, |
1747 | unsigned long prot, unsigned long flags, | 1749 | unsigned long flags); |
1748 | unsigned long addr, unsigned long addr_only); | 1750 | int security_mmap_addr(unsigned long addr); |
1749 | int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, | 1751 | int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, |
1750 | unsigned long prot); | 1752 | unsigned long prot); |
1751 | int security_file_lock(struct file *file, unsigned int cmd); | 1753 | int security_file_lock(struct file *file, unsigned int cmd); |
@@ -2181,13 +2183,15 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, | |||
2181 | return 0; | 2183 | return 0; |
2182 | } | 2184 | } |
2183 | 2185 | ||
2184 | static inline int security_file_mmap(struct file *file, unsigned long reqprot, | 2186 | static inline int security_mmap_file(struct file *file, unsigned long prot, |
2185 | unsigned long prot, | 2187 | unsigned long flags) |
2186 | unsigned long flags, | 2188 | { |
2187 | unsigned long addr, | 2189 | return 0; |
2188 | unsigned long addr_only) | 2190 | } |
2191 | |||
2192 | static inline int security_mmap_addr(unsigned long addr) | ||
2189 | { | 2193 | { |
2190 | return cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); | 2194 | return cap_mmap_addr(addr); |
2191 | } | 2195 | } |
2192 | 2196 | ||
2193 | static inline int security_file_mprotect(struct vm_area_struct *vma, | 2197 | static inline int security_file_mprotect(struct vm_area_struct *vma, |
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 51b29ac45a8e..40e0a273faea 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h | |||
@@ -232,7 +232,6 @@ struct svc_rqst { | |||
232 | struct svc_pool * rq_pool; /* thread pool */ | 232 | struct svc_pool * rq_pool; /* thread pool */ |
233 | struct svc_procedure * rq_procinfo; /* procedure info */ | 233 | struct svc_procedure * rq_procinfo; /* procedure info */ |
234 | struct auth_ops * rq_authop; /* authentication flavour */ | 234 | struct auth_ops * rq_authop; /* authentication flavour */ |
235 | u32 rq_flavor; /* pseudoflavor */ | ||
236 | struct svc_cred rq_cred; /* auth info */ | 235 | struct svc_cred rq_cred; /* auth info */ |
237 | void * rq_xprt_ctxt; /* transport specific context ptr */ | 236 | void * rq_xprt_ctxt; /* transport specific context ptr */ |
238 | struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ | 237 | struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ |
@@ -416,6 +415,7 @@ struct svc_procedure { | |||
416 | */ | 415 | */ |
417 | int svc_rpcb_setup(struct svc_serv *serv, struct net *net); | 416 | int svc_rpcb_setup(struct svc_serv *serv, struct net *net); |
418 | void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); | 417 | void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); |
418 | int svc_bind(struct svc_serv *serv, struct net *net); | ||
419 | struct svc_serv *svc_create(struct svc_program *, unsigned int, | 419 | struct svc_serv *svc_create(struct svc_program *, unsigned int, |
420 | void (*shutdown)(struct svc_serv *, struct net *net)); | 420 | void (*shutdown)(struct svc_serv *, struct net *net)); |
421 | struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, | 421 | struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, |
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 2c54683b91de..dd74084a9799 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h | |||
@@ -15,13 +15,23 @@ | |||
15 | #include <linux/sunrpc/msg_prot.h> | 15 | #include <linux/sunrpc/msg_prot.h> |
16 | #include <linux/sunrpc/cache.h> | 16 | #include <linux/sunrpc/cache.h> |
17 | #include <linux/hash.h> | 17 | #include <linux/hash.h> |
18 | #include <linux/cred.h> | ||
18 | 19 | ||
19 | struct svc_cred { | 20 | struct svc_cred { |
20 | uid_t cr_uid; | 21 | uid_t cr_uid; |
21 | gid_t cr_gid; | 22 | gid_t cr_gid; |
22 | struct group_info *cr_group_info; | 23 | struct group_info *cr_group_info; |
24 | u32 cr_flavor; /* pseudoflavor */ | ||
25 | char *cr_principal; /* for gss */ | ||
23 | }; | 26 | }; |
24 | 27 | ||
28 | static inline void free_svc_cred(struct svc_cred *cred) | ||
29 | { | ||
30 | if (cred->cr_group_info) | ||
31 | put_group_info(cred->cr_group_info); | ||
32 | kfree(cred->cr_principal); | ||
33 | } | ||
34 | |||
25 | struct svc_rqst; /* forward decl */ | 35 | struct svc_rqst; /* forward decl */ |
26 | struct in6_addr; | 36 | struct in6_addr; |
27 | 37 | ||
diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h index 7c32daa025eb..726aff1a5201 100644 --- a/include/linux/sunrpc/svcauth_gss.h +++ b/include/linux/sunrpc/svcauth_gss.h | |||
@@ -22,7 +22,6 @@ int gss_svc_init_net(struct net *net); | |||
22 | void gss_svc_shutdown_net(struct net *net); | 22 | void gss_svc_shutdown_net(struct net *net); |
23 | int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); | 23 | int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); |
24 | u32 svcauth_gss_flavor(struct auth_domain *dom); | 24 | u32 svcauth_gss_flavor(struct auth_domain *dom); |
25 | char *svc_gss_principal(struct svc_rqst *); | ||
26 | 25 | ||
27 | #endif /* __KERNEL__ */ | 26 | #endif /* __KERNEL__ */ |
28 | #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */ | 27 | #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */ |
diff --git a/include/linux/types.h b/include/linux/types.h index 7f480db60231..9c1bd539ea70 100644 --- a/include/linux/types.h +++ b/include/linux/types.h | |||
@@ -25,7 +25,7 @@ typedef __kernel_dev_t dev_t; | |||
25 | typedef __kernel_ino_t ino_t; | 25 | typedef __kernel_ino_t ino_t; |
26 | typedef __kernel_mode_t mode_t; | 26 | typedef __kernel_mode_t mode_t; |
27 | typedef unsigned short umode_t; | 27 | typedef unsigned short umode_t; |
28 | typedef __kernel_nlink_t nlink_t; | 28 | typedef __u32 nlink_t; |
29 | typedef __kernel_off_t off_t; | 29 | typedef __kernel_off_t off_t; |
30 | typedef __kernel_pid_t pid_t; | 30 | typedef __kernel_pid_t pid_t; |
31 | typedef __kernel_daddr_t daddr_t; | 31 | typedef __kernel_daddr_t daddr_t; |
@@ -1036,6 +1036,10 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) | |||
1036 | sfd->file = shp->shm_file; | 1036 | sfd->file = shp->shm_file; |
1037 | sfd->vm_ops = NULL; | 1037 | sfd->vm_ops = NULL; |
1038 | 1038 | ||
1039 | err = security_mmap_file(file, prot, flags); | ||
1040 | if (err) | ||
1041 | goto out_fput; | ||
1042 | |||
1039 | down_write(¤t->mm->mmap_sem); | 1043 | down_write(¤t->mm->mmap_sem); |
1040 | if (addr && !(shmflg & SHM_REMAP)) { | 1044 | if (addr && !(shmflg & SHM_REMAP)) { |
1041 | err = -EINVAL; | 1045 | err = -EINVAL; |
@@ -1050,7 +1054,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) | |||
1050 | goto invalid; | 1054 | goto invalid; |
1051 | } | 1055 | } |
1052 | 1056 | ||
1053 | user_addr = do_mmap (file, addr, size, prot, flags, 0); | 1057 | user_addr = do_mmap_pgoff(file, addr, size, prot, flags, 0); |
1054 | *raddr = user_addr; | 1058 | *raddr = user_addr; |
1055 | err = 0; | 1059 | err = 0; |
1056 | if (IS_ERR_VALUE(user_addr)) | 1060 | if (IS_ERR_VALUE(user_addr)) |
@@ -1058,6 +1062,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) | |||
1058 | invalid: | 1062 | invalid: |
1059 | up_write(¤t->mm->mmap_sem); | 1063 | up_write(¤t->mm->mmap_sem); |
1060 | 1064 | ||
1065 | out_fput: | ||
1061 | fput(file); | 1066 | fput(file); |
1062 | 1067 | ||
1063 | out_nattch: | 1068 | out_nattch: |
diff --git a/kernel/Makefile b/kernel/Makefile index 6f3d0ae044b2..c0cc67ad764c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ | |||
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
12 | notifier.o ksysfs.o cred.o \ | 12 | notifier.o ksysfs.o cred.o \ |
13 | async.o range.o groups.o | 13 | async.o range.o groups.o lglock.o |
14 | 14 | ||
15 | ifdef CONFIG_FUNCTION_TRACER | 15 | ifdef CONFIG_FUNCTION_TRACER |
16 | # Do not trace debug files and internal ftrace files | 16 | # Do not trace debug files and internal ftrace files |
diff --git a/kernel/lglock.c b/kernel/lglock.c new file mode 100644 index 000000000000..6535a667a5a7 --- /dev/null +++ b/kernel/lglock.c | |||
@@ -0,0 +1,89 @@ | |||
1 | /* See include/linux/lglock.h for description */ | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/lglock.h> | ||
4 | #include <linux/cpu.h> | ||
5 | #include <linux/string.h> | ||
6 | |||
7 | /* | ||
8 | * Note there is no uninit, so lglocks cannot be defined in | ||
9 | * modules (but it's fine to use them from there) | ||
10 | * Could be added though, just undo lg_lock_init | ||
11 | */ | ||
12 | |||
13 | void lg_lock_init(struct lglock *lg, char *name) | ||
14 | { | ||
15 | LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0); | ||
16 | } | ||
17 | EXPORT_SYMBOL(lg_lock_init); | ||
18 | |||
19 | void lg_local_lock(struct lglock *lg) | ||
20 | { | ||
21 | arch_spinlock_t *lock; | ||
22 | |||
23 | preempt_disable(); | ||
24 | rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); | ||
25 | lock = this_cpu_ptr(lg->lock); | ||
26 | arch_spin_lock(lock); | ||
27 | } | ||
28 | EXPORT_SYMBOL(lg_local_lock); | ||
29 | |||
30 | void lg_local_unlock(struct lglock *lg) | ||
31 | { | ||
32 | arch_spinlock_t *lock; | ||
33 | |||
34 | rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); | ||
35 | lock = this_cpu_ptr(lg->lock); | ||
36 | arch_spin_unlock(lock); | ||
37 | preempt_enable(); | ||
38 | } | ||
39 | EXPORT_SYMBOL(lg_local_unlock); | ||
40 | |||
41 | void lg_local_lock_cpu(struct lglock *lg, int cpu) | ||
42 | { | ||
43 | arch_spinlock_t *lock; | ||
44 | |||
45 | preempt_disable(); | ||
46 | rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); | ||
47 | lock = per_cpu_ptr(lg->lock, cpu); | ||
48 | arch_spin_lock(lock); | ||
49 | } | ||
50 | EXPORT_SYMBOL(lg_local_lock_cpu); | ||
51 | |||
52 | void lg_local_unlock_cpu(struct lglock *lg, int cpu) | ||
53 | { | ||
54 | arch_spinlock_t *lock; | ||
55 | |||
56 | rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); | ||
57 | lock = per_cpu_ptr(lg->lock, cpu); | ||
58 | arch_spin_unlock(lock); | ||
59 | preempt_enable(); | ||
60 | } | ||
61 | EXPORT_SYMBOL(lg_local_unlock_cpu); | ||
62 | |||
63 | void lg_global_lock(struct lglock *lg) | ||
64 | { | ||
65 | int i; | ||
66 | |||
67 | preempt_disable(); | ||
68 | rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_); | ||
69 | for_each_possible_cpu(i) { | ||
70 | arch_spinlock_t *lock; | ||
71 | lock = per_cpu_ptr(lg->lock, i); | ||
72 | arch_spin_lock(lock); | ||
73 | } | ||
74 | } | ||
75 | EXPORT_SYMBOL(lg_global_lock); | ||
76 | |||
77 | void lg_global_unlock(struct lglock *lg) | ||
78 | { | ||
79 | int i; | ||
80 | |||
81 | rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); | ||
82 | for_each_possible_cpu(i) { | ||
83 | arch_spinlock_t *lock; | ||
84 | lock = per_cpu_ptr(lg->lock, i); | ||
85 | arch_spin_unlock(lock); | ||
86 | } | ||
87 | preempt_enable(); | ||
88 | } | ||
89 | EXPORT_SYMBOL(lg_global_unlock); | ||
diff --git a/mm/cleancache.c b/mm/cleancache.c index 5646c740f613..32e6f4136fa2 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c | |||
@@ -80,7 +80,7 @@ EXPORT_SYMBOL(__cleancache_init_shared_fs); | |||
80 | static int cleancache_get_key(struct inode *inode, | 80 | static int cleancache_get_key(struct inode *inode, |
81 | struct cleancache_filekey *key) | 81 | struct cleancache_filekey *key) |
82 | { | 82 | { |
83 | int (*fhfn)(struct dentry *, __u32 *fh, int *, int); | 83 | int (*fhfn)(struct inode *, __u32 *fh, int *, struct inode *); |
84 | int len = 0, maxlen = CLEANCACHE_KEY_MAX; | 84 | int len = 0, maxlen = CLEANCACHE_KEY_MAX; |
85 | struct super_block *sb = inode->i_sb; | 85 | struct super_block *sb = inode->i_sb; |
86 | 86 | ||
@@ -88,9 +88,7 @@ static int cleancache_get_key(struct inode *inode, | |||
88 | if (sb->s_export_op != NULL) { | 88 | if (sb->s_export_op != NULL) { |
89 | fhfn = sb->s_export_op->encode_fh; | 89 | fhfn = sb->s_export_op->encode_fh; |
90 | if (fhfn) { | 90 | if (fhfn) { |
91 | struct dentry d; | 91 | len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL); |
92 | d.d_inode = inode; | ||
93 | len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0); | ||
94 | if (len <= 0 || len == 255) | 92 | if (len <= 0 || len == 255) |
95 | return -1; | 93 | return -1; |
96 | if (maxlen > CLEANCACHE_KEY_MAX) | 94 | if (maxlen > CLEANCACHE_KEY_MAX) |
diff --git a/mm/filemap.c b/mm/filemap.c index 64b48f934b89..a4a5260b0279 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1899,71 +1899,6 @@ struct page *read_cache_page(struct address_space *mapping, | |||
1899 | } | 1899 | } |
1900 | EXPORT_SYMBOL(read_cache_page); | 1900 | EXPORT_SYMBOL(read_cache_page); |
1901 | 1901 | ||
1902 | /* | ||
1903 | * The logic we want is | ||
1904 | * | ||
1905 | * if suid or (sgid and xgrp) | ||
1906 | * remove privs | ||
1907 | */ | ||
1908 | int should_remove_suid(struct dentry *dentry) | ||
1909 | { | ||
1910 | umode_t mode = dentry->d_inode->i_mode; | ||
1911 | int kill = 0; | ||
1912 | |||
1913 | /* suid always must be killed */ | ||
1914 | if (unlikely(mode & S_ISUID)) | ||
1915 | kill = ATTR_KILL_SUID; | ||
1916 | |||
1917 | /* | ||
1918 | * sgid without any exec bits is just a mandatory locking mark; leave | ||
1919 | * it alone. If some exec bits are set, it's a real sgid; kill it. | ||
1920 | */ | ||
1921 | if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) | ||
1922 | kill |= ATTR_KILL_SGID; | ||
1923 | |||
1924 | if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode))) | ||
1925 | return kill; | ||
1926 | |||
1927 | return 0; | ||
1928 | } | ||
1929 | EXPORT_SYMBOL(should_remove_suid); | ||
1930 | |||
1931 | static int __remove_suid(struct dentry *dentry, int kill) | ||
1932 | { | ||
1933 | struct iattr newattrs; | ||
1934 | |||
1935 | newattrs.ia_valid = ATTR_FORCE | kill; | ||
1936 | return notify_change(dentry, &newattrs); | ||
1937 | } | ||
1938 | |||
1939 | int file_remove_suid(struct file *file) | ||
1940 | { | ||
1941 | struct dentry *dentry = file->f_path.dentry; | ||
1942 | struct inode *inode = dentry->d_inode; | ||
1943 | int killsuid; | ||
1944 | int killpriv; | ||
1945 | int error = 0; | ||
1946 | |||
1947 | /* Fast path for nothing security related */ | ||
1948 | if (IS_NOSEC(inode)) | ||
1949 | return 0; | ||
1950 | |||
1951 | killsuid = should_remove_suid(dentry); | ||
1952 | killpriv = security_inode_need_killpriv(dentry); | ||
1953 | |||
1954 | if (killpriv < 0) | ||
1955 | return killpriv; | ||
1956 | if (killpriv) | ||
1957 | error = security_inode_killpriv(dentry); | ||
1958 | if (!error && killsuid) | ||
1959 | error = __remove_suid(dentry, killsuid); | ||
1960 | if (!error && (inode->i_sb->s_flags & MS_NOSEC)) | ||
1961 | inode->i_flags |= S_NOSEC; | ||
1962 | |||
1963 | return error; | ||
1964 | } | ||
1965 | EXPORT_SYMBOL(file_remove_suid); | ||
1966 | |||
1967 | static size_t __iovec_copy_from_user_inatomic(char *vaddr, | 1902 | static size_t __iovec_copy_from_user_inatomic(char *vaddr, |
1968 | const struct iovec *iov, size_t base, size_t bytes) | 1903 | const struct iovec *iov, size_t base, size_t bytes) |
1969 | { | 1904 | { |
@@ -2489,7 +2424,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
2489 | if (err) | 2424 | if (err) |
2490 | goto out; | 2425 | goto out; |
2491 | 2426 | ||
2492 | file_update_time(file); | 2427 | err = file_update_time(file); |
2428 | if (err) | ||
2429 | goto out; | ||
2493 | 2430 | ||
2494 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ | 2431 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ |
2495 | if (unlikely(file->f_flags & O_DIRECT)) { | 2432 | if (unlikely(file->f_flags & O_DIRECT)) { |
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index a4eb31132229..213ca1f53409 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c | |||
@@ -426,7 +426,9 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, | |||
426 | if (ret) | 426 | if (ret) |
427 | goto out_backing; | 427 | goto out_backing; |
428 | 428 | ||
429 | file_update_time(filp); | 429 | ret = file_update_time(filp); |
430 | if (ret) | ||
431 | goto out_backing; | ||
430 | 432 | ||
431 | ret = __xip_file_write (filp, buf, count, pos, ppos); | 433 | ret = __xip_file_write (filp, buf, count, pos, ppos); |
432 | 434 | ||
diff --git a/mm/internal.h b/mm/internal.h index 4194ab9dc19b..5cbb78190041 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -350,3 +350,7 @@ extern u64 hwpoison_filter_flags_mask; | |||
350 | extern u64 hwpoison_filter_flags_value; | 350 | extern u64 hwpoison_filter_flags_value; |
351 | extern u64 hwpoison_filter_memcg; | 351 | extern u64 hwpoison_filter_memcg; |
352 | extern u32 hwpoison_filter_enable; | 352 | extern u32 hwpoison_filter_enable; |
353 | |||
354 | extern unsigned long vm_mmap_pgoff(struct file *, unsigned long, | ||
355 | unsigned long, unsigned long, | ||
356 | unsigned long, unsigned long); | ||
@@ -971,15 +971,13 @@ static inline unsigned long round_hint_to_min(unsigned long hint) | |||
971 | * The caller must hold down_write(¤t->mm->mmap_sem). | 971 | * The caller must hold down_write(¤t->mm->mmap_sem). |
972 | */ | 972 | */ |
973 | 973 | ||
974 | static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | 974 | unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, |
975 | unsigned long len, unsigned long prot, | 975 | unsigned long len, unsigned long prot, |
976 | unsigned long flags, unsigned long pgoff) | 976 | unsigned long flags, unsigned long pgoff) |
977 | { | 977 | { |
978 | struct mm_struct * mm = current->mm; | 978 | struct mm_struct * mm = current->mm; |
979 | struct inode *inode; | 979 | struct inode *inode; |
980 | vm_flags_t vm_flags; | 980 | vm_flags_t vm_flags; |
981 | int error; | ||
982 | unsigned long reqprot = prot; | ||
983 | 981 | ||
984 | /* | 982 | /* |
985 | * Does the application expect PROT_READ to imply PROT_EXEC? | 983 | * Does the application expect PROT_READ to imply PROT_EXEC? |
@@ -1101,39 +1099,9 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
1101 | } | 1099 | } |
1102 | } | 1100 | } |
1103 | 1101 | ||
1104 | error = security_file_mmap(file, reqprot, prot, flags, addr, 0); | ||
1105 | if (error) | ||
1106 | return error; | ||
1107 | |||
1108 | return mmap_region(file, addr, len, flags, vm_flags, pgoff); | 1102 | return mmap_region(file, addr, len, flags, vm_flags, pgoff); |
1109 | } | 1103 | } |
1110 | 1104 | ||
1111 | unsigned long do_mmap(struct file *file, unsigned long addr, | ||
1112 | unsigned long len, unsigned long prot, | ||
1113 | unsigned long flag, unsigned long offset) | ||
1114 | { | ||
1115 | if (unlikely(offset + PAGE_ALIGN(len) < offset)) | ||
1116 | return -EINVAL; | ||
1117 | if (unlikely(offset & ~PAGE_MASK)) | ||
1118 | return -EINVAL; | ||
1119 | return do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); | ||
1120 | } | ||
1121 | EXPORT_SYMBOL(do_mmap); | ||
1122 | |||
1123 | unsigned long vm_mmap(struct file *file, unsigned long addr, | ||
1124 | unsigned long len, unsigned long prot, | ||
1125 | unsigned long flag, unsigned long offset) | ||
1126 | { | ||
1127 | unsigned long ret; | ||
1128 | struct mm_struct *mm = current->mm; | ||
1129 | |||
1130 | down_write(&mm->mmap_sem); | ||
1131 | ret = do_mmap(file, addr, len, prot, flag, offset); | ||
1132 | up_write(&mm->mmap_sem); | ||
1133 | return ret; | ||
1134 | } | ||
1135 | EXPORT_SYMBOL(vm_mmap); | ||
1136 | |||
1137 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | 1105 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, |
1138 | unsigned long, prot, unsigned long, flags, | 1106 | unsigned long, prot, unsigned long, flags, |
1139 | unsigned long, fd, unsigned long, pgoff) | 1107 | unsigned long, fd, unsigned long, pgoff) |
@@ -1165,10 +1133,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | |||
1165 | 1133 | ||
1166 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | 1134 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); |
1167 | 1135 | ||
1168 | down_write(¤t->mm->mmap_sem); | 1136 | retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); |
1169 | retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
1170 | up_write(¤t->mm->mmap_sem); | ||
1171 | |||
1172 | if (file) | 1137 | if (file) |
1173 | fput(file); | 1138 | fput(file); |
1174 | out: | 1139 | out: |
@@ -1629,7 +1594,9 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, | |||
1629 | if (addr & ~PAGE_MASK) | 1594 | if (addr & ~PAGE_MASK) |
1630 | return -EINVAL; | 1595 | return -EINVAL; |
1631 | 1596 | ||
1632 | return arch_rebalance_pgtables(addr, len); | 1597 | addr = arch_rebalance_pgtables(addr, len); |
1598 | error = security_mmap_addr(addr); | ||
1599 | return error ? error : addr; | ||
1633 | } | 1600 | } |
1634 | 1601 | ||
1635 | EXPORT_SYMBOL(get_unmapped_area); | 1602 | EXPORT_SYMBOL(get_unmapped_area); |
@@ -1819,7 +1786,7 @@ int expand_downwards(struct vm_area_struct *vma, | |||
1819 | return -ENOMEM; | 1786 | return -ENOMEM; |
1820 | 1787 | ||
1821 | address &= PAGE_MASK; | 1788 | address &= PAGE_MASK; |
1822 | error = security_file_mmap(NULL, 0, 0, 0, address, 1); | 1789 | error = security_mmap_addr(address); |
1823 | if (error) | 1790 | if (error) |
1824 | return error; | 1791 | return error; |
1825 | 1792 | ||
@@ -2159,7 +2126,6 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) | |||
2159 | 2126 | ||
2160 | return 0; | 2127 | return 0; |
2161 | } | 2128 | } |
2162 | EXPORT_SYMBOL(do_munmap); | ||
2163 | 2129 | ||
2164 | int vm_munmap(unsigned long start, size_t len) | 2130 | int vm_munmap(unsigned long start, size_t len) |
2165 | { | 2131 | { |
@@ -2207,10 +2173,6 @@ static unsigned long do_brk(unsigned long addr, unsigned long len) | |||
2207 | if (!len) | 2173 | if (!len) |
2208 | return addr; | 2174 | return addr; |
2209 | 2175 | ||
2210 | error = security_file_mmap(NULL, 0, 0, 0, addr, 1); | ||
2211 | if (error) | ||
2212 | return error; | ||
2213 | |||
2214 | flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; | 2176 | flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; |
2215 | 2177 | ||
2216 | error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED); | 2178 | error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED); |
@@ -2563,10 +2525,6 @@ int install_special_mapping(struct mm_struct *mm, | |||
2563 | vma->vm_ops = &special_mapping_vmops; | 2525 | vma->vm_ops = &special_mapping_vmops; |
2564 | vma->vm_private_data = pages; | 2526 | vma->vm_private_data = pages; |
2565 | 2527 | ||
2566 | ret = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); | ||
2567 | if (ret) | ||
2568 | goto out; | ||
2569 | |||
2570 | ret = insert_vm_struct(mm, vma); | 2528 | ret = insert_vm_struct(mm, vma); |
2571 | if (ret) | 2529 | if (ret) |
2572 | goto out; | 2530 | goto out; |
diff --git a/mm/mremap.c b/mm/mremap.c index db8d983b5a7d..21fed202ddad 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
@@ -371,10 +371,6 @@ static unsigned long mremap_to(unsigned long addr, | |||
371 | if ((addr <= new_addr) && (addr+old_len) > new_addr) | 371 | if ((addr <= new_addr) && (addr+old_len) > new_addr) |
372 | goto out; | 372 | goto out; |
373 | 373 | ||
374 | ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); | ||
375 | if (ret) | ||
376 | goto out; | ||
377 | |||
378 | ret = do_munmap(mm, new_addr, new_len); | 374 | ret = do_munmap(mm, new_addr, new_len); |
379 | if (ret) | 375 | if (ret) |
380 | goto out; | 376 | goto out; |
@@ -432,15 +428,17 @@ static int vma_expandable(struct vm_area_struct *vma, unsigned long delta) | |||
432 | * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise | 428 | * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise |
433 | * This option implies MREMAP_MAYMOVE. | 429 | * This option implies MREMAP_MAYMOVE. |
434 | */ | 430 | */ |
435 | unsigned long do_mremap(unsigned long addr, | 431 | SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, |
436 | unsigned long old_len, unsigned long new_len, | 432 | unsigned long, new_len, unsigned long, flags, |
437 | unsigned long flags, unsigned long new_addr) | 433 | unsigned long, new_addr) |
438 | { | 434 | { |
439 | struct mm_struct *mm = current->mm; | 435 | struct mm_struct *mm = current->mm; |
440 | struct vm_area_struct *vma; | 436 | struct vm_area_struct *vma; |
441 | unsigned long ret = -EINVAL; | 437 | unsigned long ret = -EINVAL; |
442 | unsigned long charged = 0; | 438 | unsigned long charged = 0; |
443 | 439 | ||
440 | down_write(¤t->mm->mmap_sem); | ||
441 | |||
444 | if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) | 442 | if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) |
445 | goto out; | 443 | goto out; |
446 | 444 | ||
@@ -530,25 +528,11 @@ unsigned long do_mremap(unsigned long addr, | |||
530 | goto out; | 528 | goto out; |
531 | } | 529 | } |
532 | 530 | ||
533 | ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); | ||
534 | if (ret) | ||
535 | goto out; | ||
536 | ret = move_vma(vma, addr, old_len, new_len, new_addr); | 531 | ret = move_vma(vma, addr, old_len, new_len, new_addr); |
537 | } | 532 | } |
538 | out: | 533 | out: |
539 | if (ret & ~PAGE_MASK) | 534 | if (ret & ~PAGE_MASK) |
540 | vm_unacct_memory(charged); | 535 | vm_unacct_memory(charged); |
541 | return ret; | ||
542 | } | ||
543 | |||
544 | SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, | ||
545 | unsigned long, new_len, unsigned long, flags, | ||
546 | unsigned long, new_addr) | ||
547 | { | ||
548 | unsigned long ret; | ||
549 | |||
550 | down_write(¤t->mm->mmap_sem); | ||
551 | ret = do_mremap(addr, old_len, new_len, flags, new_addr); | ||
552 | up_write(¤t->mm->mmap_sem); | 536 | up_write(¤t->mm->mmap_sem); |
553 | return ret; | 537 | return ret; |
554 | } | 538 | } |
diff --git a/mm/nommu.c b/mm/nommu.c index bb8f4f004a82..c4acfbc09972 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -889,7 +889,6 @@ static int validate_mmap_request(struct file *file, | |||
889 | unsigned long *_capabilities) | 889 | unsigned long *_capabilities) |
890 | { | 890 | { |
891 | unsigned long capabilities, rlen; | 891 | unsigned long capabilities, rlen; |
892 | unsigned long reqprot = prot; | ||
893 | int ret; | 892 | int ret; |
894 | 893 | ||
895 | /* do the simple checks first */ | 894 | /* do the simple checks first */ |
@@ -1047,7 +1046,7 @@ static int validate_mmap_request(struct file *file, | |||
1047 | } | 1046 | } |
1048 | 1047 | ||
1049 | /* allow the security API to have its say */ | 1048 | /* allow the security API to have its say */ |
1050 | ret = security_file_mmap(file, reqprot, prot, flags, addr, 0); | 1049 | ret = security_mmap_addr(addr); |
1051 | if (ret < 0) | 1050 | if (ret < 0) |
1052 | return ret; | 1051 | return ret; |
1053 | 1052 | ||
@@ -1233,7 +1232,7 @@ enomem: | |||
1233 | /* | 1232 | /* |
1234 | * handle mapping creation for uClinux | 1233 | * handle mapping creation for uClinux |
1235 | */ | 1234 | */ |
1236 | static unsigned long do_mmap_pgoff(struct file *file, | 1235 | unsigned long do_mmap_pgoff(struct file *file, |
1237 | unsigned long addr, | 1236 | unsigned long addr, |
1238 | unsigned long len, | 1237 | unsigned long len, |
1239 | unsigned long prot, | 1238 | unsigned long prot, |
@@ -1471,32 +1470,6 @@ error_getting_region: | |||
1471 | return -ENOMEM; | 1470 | return -ENOMEM; |
1472 | } | 1471 | } |
1473 | 1472 | ||
1474 | unsigned long do_mmap(struct file *file, unsigned long addr, | ||
1475 | unsigned long len, unsigned long prot, | ||
1476 | unsigned long flag, unsigned long offset) | ||
1477 | { | ||
1478 | if (unlikely(offset + PAGE_ALIGN(len) < offset)) | ||
1479 | return -EINVAL; | ||
1480 | if (unlikely(offset & ~PAGE_MASK)) | ||
1481 | return -EINVAL; | ||
1482 | return do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); | ||
1483 | } | ||
1484 | EXPORT_SYMBOL(do_mmap); | ||
1485 | |||
1486 | unsigned long vm_mmap(struct file *file, unsigned long addr, | ||
1487 | unsigned long len, unsigned long prot, | ||
1488 | unsigned long flag, unsigned long offset) | ||
1489 | { | ||
1490 | unsigned long ret; | ||
1491 | struct mm_struct *mm = current->mm; | ||
1492 | |||
1493 | down_write(&mm->mmap_sem); | ||
1494 | ret = do_mmap(file, addr, len, prot, flag, offset); | ||
1495 | up_write(&mm->mmap_sem); | ||
1496 | return ret; | ||
1497 | } | ||
1498 | EXPORT_SYMBOL(vm_mmap); | ||
1499 | |||
1500 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | 1473 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, |
1501 | unsigned long, prot, unsigned long, flags, | 1474 | unsigned long, prot, unsigned long, flags, |
1502 | unsigned long, fd, unsigned long, pgoff) | 1475 | unsigned long, fd, unsigned long, pgoff) |
@@ -1513,9 +1486,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | |||
1513 | 1486 | ||
1514 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | 1487 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); |
1515 | 1488 | ||
1516 | down_write(¤t->mm->mmap_sem); | 1489 | ret = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); |
1517 | retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
1518 | up_write(¤t->mm->mmap_sem); | ||
1519 | 1490 | ||
1520 | if (file) | 1491 | if (file) |
1521 | fput(file); | 1492 | fput(file); |
diff --git a/mm/shmem.c b/mm/shmem.c index d576b84d913c..585bd220a21e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -2439,11 +2439,9 @@ static struct dentry *shmem_fh_to_dentry(struct super_block *sb, | |||
2439 | return dentry; | 2439 | return dentry; |
2440 | } | 2440 | } |
2441 | 2441 | ||
2442 | static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, | 2442 | static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len, |
2443 | int connectable) | 2443 | struct inode *parent) |
2444 | { | 2444 | { |
2445 | struct inode *inode = dentry->d_inode; | ||
2446 | |||
2447 | if (*len < 3) { | 2445 | if (*len < 3) { |
2448 | *len = 3; | 2446 | *len = 3; |
2449 | return 255; | 2447 | return 255; |
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/export.h> | 4 | #include <linux/export.h> |
5 | #include <linux/err.h> | 5 | #include <linux/err.h> |
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/security.h> | ||
7 | #include <asm/uaccess.h> | 8 | #include <asm/uaccess.h> |
8 | 9 | ||
9 | #include "internal.h" | 10 | #include "internal.h" |
@@ -341,6 +342,35 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start, | |||
341 | } | 342 | } |
342 | EXPORT_SYMBOL_GPL(get_user_pages_fast); | 343 | EXPORT_SYMBOL_GPL(get_user_pages_fast); |
343 | 344 | ||
345 | unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, | ||
346 | unsigned long len, unsigned long prot, | ||
347 | unsigned long flag, unsigned long pgoff) | ||
348 | { | ||
349 | unsigned long ret; | ||
350 | struct mm_struct *mm = current->mm; | ||
351 | |||
352 | ret = security_mmap_file(file, prot, flag); | ||
353 | if (!ret) { | ||
354 | down_write(&mm->mmap_sem); | ||
355 | ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff); | ||
356 | up_write(&mm->mmap_sem); | ||
357 | } | ||
358 | return ret; | ||
359 | } | ||
360 | |||
361 | unsigned long vm_mmap(struct file *file, unsigned long addr, | ||
362 | unsigned long len, unsigned long prot, | ||
363 | unsigned long flag, unsigned long offset) | ||
364 | { | ||
365 | if (unlikely(offset + PAGE_ALIGN(len) < offset)) | ||
366 | return -EINVAL; | ||
367 | if (unlikely(offset & ~PAGE_MASK)) | ||
368 | return -EINVAL; | ||
369 | |||
370 | return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); | ||
371 | } | ||
372 | EXPORT_SYMBOL(vm_mmap); | ||
373 | |||
344 | /* Tracepoints definitions. */ | 374 | /* Tracepoints definitions. */ |
345 | EXPORT_TRACEPOINT_SYMBOL(kmalloc); | 375 | EXPORT_TRACEPOINT_SYMBOL(kmalloc); |
346 | EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); | 376 | EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); |
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 8522a4793374..ca8e0a57d945 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c | |||
@@ -16,8 +16,6 @@ | |||
16 | #include <net/netlink.h> | 16 | #include <net/netlink.h> |
17 | #include <net/pkt_sched.h> | 17 | #include <net/pkt_sched.h> |
18 | 18 | ||
19 | extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ | ||
20 | |||
21 | /* | 19 | /* |
22 | * The ATM queuing discipline provides a framework for invoking classifiers | 20 | * The ATM queuing discipline provides a framework for invoking classifiers |
23 | * (aka "filters"), which in turn select classes of this queuing discipline. | 21 | * (aka "filters"), which in turn select classes of this queuing discipline. |
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 38f388c39dce..107c4528654f 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c | |||
@@ -381,21 +381,53 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) | |||
381 | } | 381 | } |
382 | 382 | ||
383 | /* | 383 | /* |
384 | * We cannot currently handle tokens with rotated data. We need a | 384 | * We can shift data by up to LOCAL_BUF_LEN bytes in a pass. If we need |
385 | * generalized routine to rotate the data in place. It is anticipated | 385 | * to do more than that, we shift repeatedly. Kevin Coffman reports |
386 | * that we won't encounter rotated data in the general case. | 386 | * seeing 28 bytes as the value used by Microsoft clients and servers |
387 | * with AES, so this constant is chosen to allow handling 28 in one pass | ||
388 | * without using too much stack space. | ||
389 | * | ||
390 | * If that proves to a problem perhaps we could use a more clever | ||
391 | * algorithm. | ||
387 | */ | 392 | */ |
388 | static u32 | 393 | #define LOCAL_BUF_LEN 32u |
389 | rotate_left(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, u16 rrc) | 394 | |
395 | static void rotate_buf_a_little(struct xdr_buf *buf, unsigned int shift) | ||
390 | { | 396 | { |
391 | unsigned int realrrc = rrc % (buf->len - offset - GSS_KRB5_TOK_HDR_LEN); | 397 | char head[LOCAL_BUF_LEN]; |
398 | char tmp[LOCAL_BUF_LEN]; | ||
399 | unsigned int this_len, i; | ||
400 | |||
401 | BUG_ON(shift > LOCAL_BUF_LEN); | ||
392 | 402 | ||
393 | if (realrrc == 0) | 403 | read_bytes_from_xdr_buf(buf, 0, head, shift); |
394 | return 0; | 404 | for (i = 0; i + shift < buf->len; i += LOCAL_BUF_LEN) { |
405 | this_len = min(LOCAL_BUF_LEN, buf->len - (i + shift)); | ||
406 | read_bytes_from_xdr_buf(buf, i+shift, tmp, this_len); | ||
407 | write_bytes_to_xdr_buf(buf, i, tmp, this_len); | ||
408 | } | ||
409 | write_bytes_to_xdr_buf(buf, buf->len - shift, head, shift); | ||
410 | } | ||
395 | 411 | ||
396 | dprintk("%s: cannot process token with rotated data: " | 412 | static void _rotate_left(struct xdr_buf *buf, unsigned int shift) |
397 | "rrc %u, realrrc %u\n", __func__, rrc, realrrc); | 413 | { |
398 | return 1; | 414 | int shifted = 0; |
415 | int this_shift; | ||
416 | |||
417 | shift %= buf->len; | ||
418 | while (shifted < shift) { | ||
419 | this_shift = min(shift - shifted, LOCAL_BUF_LEN); | ||
420 | rotate_buf_a_little(buf, this_shift); | ||
421 | shifted += this_shift; | ||
422 | } | ||
423 | } | ||
424 | |||
425 | static void rotate_left(u32 base, struct xdr_buf *buf, unsigned int shift) | ||
426 | { | ||
427 | struct xdr_buf subbuf; | ||
428 | |||
429 | xdr_buf_subsegment(buf, &subbuf, base, buf->len - base); | ||
430 | _rotate_left(&subbuf, shift); | ||
399 | } | 431 | } |
400 | 432 | ||
401 | static u32 | 433 | static u32 |
@@ -495,11 +527,8 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) | |||
495 | 527 | ||
496 | seqnum = be64_to_cpup((__be64 *)(ptr + 8)); | 528 | seqnum = be64_to_cpup((__be64 *)(ptr + 8)); |
497 | 529 | ||
498 | if (rrc != 0) { | 530 | if (rrc != 0) |
499 | err = rotate_left(kctx, offset, buf, rrc); | 531 | rotate_left(offset + 16, buf, rrc); |
500 | if (err) | ||
501 | return GSS_S_FAILURE; | ||
502 | } | ||
503 | 532 | ||
504 | err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf, | 533 | err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf, |
505 | &headskip, &tailskip); | 534 | &headskip, &tailskip); |
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 3089de37c433..73e957386600 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c | |||
@@ -336,7 +336,6 @@ struct rsc { | |||
336 | struct svc_cred cred; | 336 | struct svc_cred cred; |
337 | struct gss_svc_seq_data seqdata; | 337 | struct gss_svc_seq_data seqdata; |
338 | struct gss_ctx *mechctx; | 338 | struct gss_ctx *mechctx; |
339 | char *client_name; | ||
340 | }; | 339 | }; |
341 | 340 | ||
342 | static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old); | 341 | static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old); |
@@ -347,9 +346,7 @@ static void rsc_free(struct rsc *rsci) | |||
347 | kfree(rsci->handle.data); | 346 | kfree(rsci->handle.data); |
348 | if (rsci->mechctx) | 347 | if (rsci->mechctx) |
349 | gss_delete_sec_context(&rsci->mechctx); | 348 | gss_delete_sec_context(&rsci->mechctx); |
350 | if (rsci->cred.cr_group_info) | 349 | free_svc_cred(&rsci->cred); |
351 | put_group_info(rsci->cred.cr_group_info); | ||
352 | kfree(rsci->client_name); | ||
353 | } | 350 | } |
354 | 351 | ||
355 | static void rsc_put(struct kref *ref) | 352 | static void rsc_put(struct kref *ref) |
@@ -387,7 +384,7 @@ rsc_init(struct cache_head *cnew, struct cache_head *ctmp) | |||
387 | tmp->handle.data = NULL; | 384 | tmp->handle.data = NULL; |
388 | new->mechctx = NULL; | 385 | new->mechctx = NULL; |
389 | new->cred.cr_group_info = NULL; | 386 | new->cred.cr_group_info = NULL; |
390 | new->client_name = NULL; | 387 | new->cred.cr_principal = NULL; |
391 | } | 388 | } |
392 | 389 | ||
393 | static void | 390 | static void |
@@ -402,8 +399,8 @@ update_rsc(struct cache_head *cnew, struct cache_head *ctmp) | |||
402 | spin_lock_init(&new->seqdata.sd_lock); | 399 | spin_lock_init(&new->seqdata.sd_lock); |
403 | new->cred = tmp->cred; | 400 | new->cred = tmp->cred; |
404 | tmp->cred.cr_group_info = NULL; | 401 | tmp->cred.cr_group_info = NULL; |
405 | new->client_name = tmp->client_name; | 402 | new->cred.cr_principal = tmp->cred.cr_principal; |
406 | tmp->client_name = NULL; | 403 | tmp->cred.cr_principal = NULL; |
407 | } | 404 | } |
408 | 405 | ||
409 | static struct cache_head * | 406 | static struct cache_head * |
@@ -501,8 +498,8 @@ static int rsc_parse(struct cache_detail *cd, | |||
501 | /* get client name */ | 498 | /* get client name */ |
502 | len = qword_get(&mesg, buf, mlen); | 499 | len = qword_get(&mesg, buf, mlen); |
503 | if (len > 0) { | 500 | if (len > 0) { |
504 | rsci.client_name = kstrdup(buf, GFP_KERNEL); | 501 | rsci.cred.cr_principal = kstrdup(buf, GFP_KERNEL); |
505 | if (!rsci.client_name) | 502 | if (!rsci.cred.cr_principal) |
506 | goto out; | 503 | goto out; |
507 | } | 504 | } |
508 | 505 | ||
@@ -932,16 +929,6 @@ struct gss_svc_data { | |||
932 | struct rsc *rsci; | 929 | struct rsc *rsci; |
933 | }; | 930 | }; |
934 | 931 | ||
935 | char *svc_gss_principal(struct svc_rqst *rqstp) | ||
936 | { | ||
937 | struct gss_svc_data *gd = (struct gss_svc_data *)rqstp->rq_auth_data; | ||
938 | |||
939 | if (gd && gd->rsci) | ||
940 | return gd->rsci->client_name; | ||
941 | return NULL; | ||
942 | } | ||
943 | EXPORT_SYMBOL_GPL(svc_gss_principal); | ||
944 | |||
945 | static int | 932 | static int |
946 | svcauth_gss_set_client(struct svc_rqst *rqstp) | 933 | svcauth_gss_set_client(struct svc_rqst *rqstp) |
947 | { | 934 | { |
@@ -1220,7 +1207,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) | |||
1220 | } | 1207 | } |
1221 | svcdata->rsci = rsci; | 1208 | svcdata->rsci = rsci; |
1222 | cache_get(&rsci->h); | 1209 | cache_get(&rsci->h); |
1223 | rqstp->rq_flavor = gss_svc_to_pseudoflavor( | 1210 | rqstp->rq_cred.cr_flavor = gss_svc_to_pseudoflavor( |
1224 | rsci->mechctx->mech_type, gc->gc_svc); | 1211 | rsci->mechctx->mech_type, gc->gc_svc); |
1225 | ret = SVC_OK; | 1212 | ret = SVC_OK; |
1226 | goto out; | 1213 | goto out; |
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 3c0653439f3d..92509ffe15fc 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c | |||
@@ -180,14 +180,16 @@ void rpcb_put_local(struct net *net) | |||
180 | struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); | 180 | struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); |
181 | struct rpc_clnt *clnt = sn->rpcb_local_clnt; | 181 | struct rpc_clnt *clnt = sn->rpcb_local_clnt; |
182 | struct rpc_clnt *clnt4 = sn->rpcb_local_clnt4; | 182 | struct rpc_clnt *clnt4 = sn->rpcb_local_clnt4; |
183 | int shutdown; | 183 | int shutdown = 0; |
184 | 184 | ||
185 | spin_lock(&sn->rpcb_clnt_lock); | 185 | spin_lock(&sn->rpcb_clnt_lock); |
186 | if (--sn->rpcb_users == 0) { | 186 | if (sn->rpcb_users) { |
187 | sn->rpcb_local_clnt = NULL; | 187 | if (--sn->rpcb_users == 0) { |
188 | sn->rpcb_local_clnt4 = NULL; | 188 | sn->rpcb_local_clnt = NULL; |
189 | sn->rpcb_local_clnt4 = NULL; | ||
190 | } | ||
191 | shutdown = !sn->rpcb_users; | ||
189 | } | 192 | } |
190 | shutdown = !sn->rpcb_users; | ||
191 | spin_unlock(&sn->rpcb_clnt_lock); | 193 | spin_unlock(&sn->rpcb_clnt_lock); |
192 | 194 | ||
193 | if (shutdown) { | 195 | if (shutdown) { |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 017c0117d154..7e9baaa1e543 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
@@ -407,6 +407,14 @@ static int svc_uses_rpcbind(struct svc_serv *serv) | |||
407 | return 0; | 407 | return 0; |
408 | } | 408 | } |
409 | 409 | ||
410 | int svc_bind(struct svc_serv *serv, struct net *net) | ||
411 | { | ||
412 | if (!svc_uses_rpcbind(serv)) | ||
413 | return 0; | ||
414 | return svc_rpcb_setup(serv, net); | ||
415 | } | ||
416 | EXPORT_SYMBOL_GPL(svc_bind); | ||
417 | |||
410 | /* | 418 | /* |
411 | * Create an RPC service | 419 | * Create an RPC service |
412 | */ | 420 | */ |
@@ -471,15 +479,8 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, | |||
471 | spin_lock_init(&pool->sp_lock); | 479 | spin_lock_init(&pool->sp_lock); |
472 | } | 480 | } |
473 | 481 | ||
474 | if (svc_uses_rpcbind(serv)) { | 482 | if (svc_uses_rpcbind(serv) && (!serv->sv_shutdown)) |
475 | if (svc_rpcb_setup(serv, current->nsproxy->net_ns) < 0) { | 483 | serv->sv_shutdown = svc_rpcb_cleanup; |
476 | kfree(serv->sv_pools); | ||
477 | kfree(serv); | ||
478 | return NULL; | ||
479 | } | ||
480 | if (!serv->sv_shutdown) | ||
481 | serv->sv_shutdown = svc_rpcb_cleanup; | ||
482 | } | ||
483 | 484 | ||
484 | return serv; | 485 | return serv; |
485 | } | 486 | } |
@@ -536,8 +537,6 @@ EXPORT_SYMBOL_GPL(svc_shutdown_net); | |||
536 | void | 537 | void |
537 | svc_destroy(struct svc_serv *serv) | 538 | svc_destroy(struct svc_serv *serv) |
538 | { | 539 | { |
539 | struct net *net = current->nsproxy->net_ns; | ||
540 | |||
541 | dprintk("svc: svc_destroy(%s, %d)\n", | 540 | dprintk("svc: svc_destroy(%s, %d)\n", |
542 | serv->sv_program->pg_name, | 541 | serv->sv_program->pg_name, |
543 | serv->sv_nrthreads); | 542 | serv->sv_nrthreads); |
@@ -552,8 +551,6 @@ svc_destroy(struct svc_serv *serv) | |||
552 | 551 | ||
553 | del_timer_sync(&serv->sv_temptimer); | 552 | del_timer_sync(&serv->sv_temptimer); |
554 | 553 | ||
555 | svc_shutdown_net(serv, net); | ||
556 | |||
557 | /* | 554 | /* |
558 | * The last user is gone and thus all sockets have to be destroyed to | 555 | * The last user is gone and thus all sockets have to be destroyed to |
559 | * the point. Check this. | 556 | * the point. Check this. |
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b98ee3514912..88f2bf671960 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c | |||
@@ -598,6 +598,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) | |||
598 | 598 | ||
599 | /* now allocate needed pages. If we get a failure, sleep briefly */ | 599 | /* now allocate needed pages. If we get a failure, sleep briefly */ |
600 | pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; | 600 | pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; |
601 | BUG_ON(pages >= RPCSVC_MAXPAGES); | ||
601 | for (i = 0; i < pages ; i++) | 602 | for (i = 0; i < pages ; i++) |
602 | while (rqstp->rq_pages[i] == NULL) { | 603 | while (rqstp->rq_pages[i] == NULL) { |
603 | struct page *p = alloc_page(GFP_KERNEL); | 604 | struct page *p = alloc_page(GFP_KERNEL); |
@@ -612,7 +613,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) | |||
612 | rqstp->rq_pages[i] = p; | 613 | rqstp->rq_pages[i] = p; |
613 | } | 614 | } |
614 | rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ | 615 | rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ |
615 | BUG_ON(pages >= RPCSVC_MAXPAGES); | ||
616 | 616 | ||
617 | /* Make arg->head point to first page and arg->pages point to rest */ | 617 | /* Make arg->head point to first page and arg->pages point to rest */ |
618 | arg = &rqstp->rq_arg; | 618 | arg = &rqstp->rq_arg; |
@@ -973,7 +973,7 @@ void svc_close_net(struct svc_serv *serv, struct net *net) | |||
973 | svc_clear_pools(serv, net); | 973 | svc_clear_pools(serv, net); |
974 | /* | 974 | /* |
975 | * At this point the sp_sockets lists will stay empty, since | 975 | * At this point the sp_sockets lists will stay empty, since |
976 | * svc_enqueue will not add new entries without taking the | 976 | * svc_xprt_enqueue will not add new entries without taking the |
977 | * sp_lock and checking XPT_BUSY. | 977 | * sp_lock and checking XPT_BUSY. |
978 | */ | 978 | */ |
979 | svc_clear_list(&serv->sv_tempsocks, net); | 979 | svc_clear_list(&serv->sv_tempsocks, net); |
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 6138c925923d..2777fa896645 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c | |||
@@ -746,6 +746,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) | |||
746 | struct svc_cred *cred = &rqstp->rq_cred; | 746 | struct svc_cred *cred = &rqstp->rq_cred; |
747 | 747 | ||
748 | cred->cr_group_info = NULL; | 748 | cred->cr_group_info = NULL; |
749 | cred->cr_principal = NULL; | ||
749 | rqstp->rq_client = NULL; | 750 | rqstp->rq_client = NULL; |
750 | 751 | ||
751 | if (argv->iov_len < 3*4) | 752 | if (argv->iov_len < 3*4) |
@@ -773,7 +774,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) | |||
773 | svc_putnl(resv, RPC_AUTH_NULL); | 774 | svc_putnl(resv, RPC_AUTH_NULL); |
774 | svc_putnl(resv, 0); | 775 | svc_putnl(resv, 0); |
775 | 776 | ||
776 | rqstp->rq_flavor = RPC_AUTH_NULL; | 777 | rqstp->rq_cred.cr_flavor = RPC_AUTH_NULL; |
777 | return SVC_OK; | 778 | return SVC_OK; |
778 | } | 779 | } |
779 | 780 | ||
@@ -811,6 +812,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) | |||
811 | int len = argv->iov_len; | 812 | int len = argv->iov_len; |
812 | 813 | ||
813 | cred->cr_group_info = NULL; | 814 | cred->cr_group_info = NULL; |
815 | cred->cr_principal = NULL; | ||
814 | rqstp->rq_client = NULL; | 816 | rqstp->rq_client = NULL; |
815 | 817 | ||
816 | if ((len -= 3*4) < 0) | 818 | if ((len -= 3*4) < 0) |
@@ -847,7 +849,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) | |||
847 | svc_putnl(resv, RPC_AUTH_NULL); | 849 | svc_putnl(resv, RPC_AUTH_NULL); |
848 | svc_putnl(resv, 0); | 850 | svc_putnl(resv, 0); |
849 | 851 | ||
850 | rqstp->rq_flavor = RPC_AUTH_UNIX; | 852 | rqstp->rq_cred.cr_flavor = RPC_AUTH_UNIX; |
851 | return SVC_OK; | 853 | return SVC_OK; |
852 | 854 | ||
853 | badcred: | 855 | badcred: |
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 032daab449b0..8ea39aabe948 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c | |||
@@ -490,17 +490,9 @@ static int common_mmap(int op, struct file *file, unsigned long prot, | |||
490 | return common_file_perm(op, file, mask); | 490 | return common_file_perm(op, file, mask); |
491 | } | 491 | } |
492 | 492 | ||
493 | static int apparmor_file_mmap(struct file *file, unsigned long reqprot, | 493 | static int apparmor_mmap_file(struct file *file, unsigned long reqprot, |
494 | unsigned long prot, unsigned long flags, | 494 | unsigned long prot, unsigned long flags) |
495 | unsigned long addr, unsigned long addr_only) | ||
496 | { | 495 | { |
497 | int rc = 0; | ||
498 | |||
499 | /* do DAC check */ | ||
500 | rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); | ||
501 | if (rc || addr_only) | ||
502 | return rc; | ||
503 | |||
504 | return common_mmap(OP_FMMAP, file, prot, flags); | 496 | return common_mmap(OP_FMMAP, file, prot, flags); |
505 | } | 497 | } |
506 | 498 | ||
@@ -646,7 +638,8 @@ static struct security_operations apparmor_ops = { | |||
646 | .file_permission = apparmor_file_permission, | 638 | .file_permission = apparmor_file_permission, |
647 | .file_alloc_security = apparmor_file_alloc_security, | 639 | .file_alloc_security = apparmor_file_alloc_security, |
648 | .file_free_security = apparmor_file_free_security, | 640 | .file_free_security = apparmor_file_free_security, |
649 | .file_mmap = apparmor_file_mmap, | 641 | .mmap_file = apparmor_mmap_file, |
642 | .mmap_addr = cap_mmap_addr, | ||
650 | .file_mprotect = apparmor_file_mprotect, | 643 | .file_mprotect = apparmor_file_mprotect, |
651 | .file_lock = apparmor_file_lock, | 644 | .file_lock = apparmor_file_lock, |
652 | 645 | ||
diff --git a/security/capability.c b/security/capability.c index fca889676c5e..61095df8b89a 100644 --- a/security/capability.c +++ b/security/capability.c | |||
@@ -949,7 +949,8 @@ void __init security_fixup_ops(struct security_operations *ops) | |||
949 | set_to_cap_if_null(ops, file_alloc_security); | 949 | set_to_cap_if_null(ops, file_alloc_security); |
950 | set_to_cap_if_null(ops, file_free_security); | 950 | set_to_cap_if_null(ops, file_free_security); |
951 | set_to_cap_if_null(ops, file_ioctl); | 951 | set_to_cap_if_null(ops, file_ioctl); |
952 | set_to_cap_if_null(ops, file_mmap); | 952 | set_to_cap_if_null(ops, mmap_addr); |
953 | set_to_cap_if_null(ops, mmap_file); | ||
953 | set_to_cap_if_null(ops, file_mprotect); | 954 | set_to_cap_if_null(ops, file_mprotect); |
954 | set_to_cap_if_null(ops, file_lock); | 955 | set_to_cap_if_null(ops, file_lock); |
955 | set_to_cap_if_null(ops, file_fcntl); | 956 | set_to_cap_if_null(ops, file_fcntl); |
diff --git a/security/commoncap.c b/security/commoncap.c index e771cb1b2d79..6dbae4650abe 100644 --- a/security/commoncap.c +++ b/security/commoncap.c | |||
@@ -958,22 +958,15 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages) | |||
958 | } | 958 | } |
959 | 959 | ||
960 | /* | 960 | /* |
961 | * cap_file_mmap - check if able to map given addr | 961 | * cap_mmap_addr - check if able to map given addr |
962 | * @file: unused | ||
963 | * @reqprot: unused | ||
964 | * @prot: unused | ||
965 | * @flags: unused | ||
966 | * @addr: address attempting to be mapped | 962 | * @addr: address attempting to be mapped |
967 | * @addr_only: unused | ||
968 | * | 963 | * |
969 | * If the process is attempting to map memory below dac_mmap_min_addr they need | 964 | * If the process is attempting to map memory below dac_mmap_min_addr they need |
970 | * CAP_SYS_RAWIO. The other parameters to this function are unused by the | 965 | * CAP_SYS_RAWIO. The other parameters to this function are unused by the |
971 | * capability security module. Returns 0 if this mapping should be allowed | 966 | * capability security module. Returns 0 if this mapping should be allowed |
972 | * -EPERM if not. | 967 | * -EPERM if not. |
973 | */ | 968 | */ |
974 | int cap_file_mmap(struct file *file, unsigned long reqprot, | 969 | int cap_mmap_addr(unsigned long addr) |
975 | unsigned long prot, unsigned long flags, | ||
976 | unsigned long addr, unsigned long addr_only) | ||
977 | { | 970 | { |
978 | int ret = 0; | 971 | int ret = 0; |
979 | 972 | ||
@@ -986,3 +979,9 @@ int cap_file_mmap(struct file *file, unsigned long reqprot, | |||
986 | } | 979 | } |
987 | return ret; | 980 | return ret; |
988 | } | 981 | } |
982 | |||
983 | int cap_mmap_file(struct file *file, unsigned long reqprot, | ||
984 | unsigned long prot, unsigned long flags) | ||
985 | { | ||
986 | return 0; | ||
987 | } | ||
diff --git a/security/security.c b/security/security.c index 5497a57fba01..3efc9b12aef4 100644 --- a/security/security.c +++ b/security/security.c | |||
@@ -20,6 +20,9 @@ | |||
20 | #include <linux/ima.h> | 20 | #include <linux/ima.h> |
21 | #include <linux/evm.h> | 21 | #include <linux/evm.h> |
22 | #include <linux/fsnotify.h> | 22 | #include <linux/fsnotify.h> |
23 | #include <linux/mman.h> | ||
24 | #include <linux/mount.h> | ||
25 | #include <linux/personality.h> | ||
23 | #include <net/flow.h> | 26 | #include <net/flow.h> |
24 | 27 | ||
25 | #define MAX_LSM_EVM_XATTR 2 | 28 | #define MAX_LSM_EVM_XATTR 2 |
@@ -657,18 +660,56 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
657 | return security_ops->file_ioctl(file, cmd, arg); | 660 | return security_ops->file_ioctl(file, cmd, arg); |
658 | } | 661 | } |
659 | 662 | ||
660 | int security_file_mmap(struct file *file, unsigned long reqprot, | 663 | static inline unsigned long mmap_prot(struct file *file, unsigned long prot) |
661 | unsigned long prot, unsigned long flags, | ||
662 | unsigned long addr, unsigned long addr_only) | ||
663 | { | 664 | { |
664 | int ret; | 665 | /* |
666 | * Does we have PROT_READ and does the application expect | ||
667 | * it to imply PROT_EXEC? If not, nothing to talk about... | ||
668 | */ | ||
669 | if ((prot & (PROT_READ | PROT_EXEC)) != PROT_READ) | ||
670 | return prot; | ||
671 | if (!(current->personality & READ_IMPLIES_EXEC)) | ||
672 | return prot; | ||
673 | /* | ||
674 | * if that's an anonymous mapping, let it. | ||
675 | */ | ||
676 | if (!file) | ||
677 | return prot | PROT_EXEC; | ||
678 | /* | ||
679 | * ditto if it's not on noexec mount, except that on !MMU we need | ||
680 | * BDI_CAP_EXEC_MMAP (== VM_MAYEXEC) in this case | ||
681 | */ | ||
682 | if (!(file->f_path.mnt->mnt_flags & MNT_NOEXEC)) { | ||
683 | #ifndef CONFIG_MMU | ||
684 | unsigned long caps = 0; | ||
685 | struct address_space *mapping = file->f_mapping; | ||
686 | if (mapping && mapping->backing_dev_info) | ||
687 | caps = mapping->backing_dev_info->capabilities; | ||
688 | if (!(caps & BDI_CAP_EXEC_MAP)) | ||
689 | return prot; | ||
690 | #endif | ||
691 | return prot | PROT_EXEC; | ||
692 | } | ||
693 | /* anything on noexec mount won't get PROT_EXEC */ | ||
694 | return prot; | ||
695 | } | ||
665 | 696 | ||
666 | ret = security_ops->file_mmap(file, reqprot, prot, flags, addr, addr_only); | 697 | int security_mmap_file(struct file *file, unsigned long prot, |
698 | unsigned long flags) | ||
699 | { | ||
700 | int ret; | ||
701 | ret = security_ops->mmap_file(file, prot, | ||
702 | mmap_prot(file, prot), flags); | ||
667 | if (ret) | 703 | if (ret) |
668 | return ret; | 704 | return ret; |
669 | return ima_file_mmap(file, prot); | 705 | return ima_file_mmap(file, prot); |
670 | } | 706 | } |
671 | 707 | ||
708 | int security_mmap_addr(unsigned long addr) | ||
709 | { | ||
710 | return security_ops->mmap_addr(addr); | ||
711 | } | ||
712 | |||
672 | int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, | 713 | int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, |
673 | unsigned long prot) | 714 | unsigned long prot) |
674 | { | 715 | { |
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index fa2341b68331..372ec6502aa8 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c | |||
@@ -3083,9 +3083,7 @@ error: | |||
3083 | return rc; | 3083 | return rc; |
3084 | } | 3084 | } |
3085 | 3085 | ||
3086 | static int selinux_file_mmap(struct file *file, unsigned long reqprot, | 3086 | static int selinux_mmap_addr(unsigned long addr) |
3087 | unsigned long prot, unsigned long flags, | ||
3088 | unsigned long addr, unsigned long addr_only) | ||
3089 | { | 3087 | { |
3090 | int rc = 0; | 3088 | int rc = 0; |
3091 | u32 sid = current_sid(); | 3089 | u32 sid = current_sid(); |
@@ -3104,10 +3102,12 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot, | |||
3104 | } | 3102 | } |
3105 | 3103 | ||
3106 | /* do DAC check on address space usage */ | 3104 | /* do DAC check on address space usage */ |
3107 | rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); | 3105 | return cap_mmap_addr(addr); |
3108 | if (rc || addr_only) | 3106 | } |
3109 | return rc; | ||
3110 | 3107 | ||
3108 | static int selinux_mmap_file(struct file *file, unsigned long reqprot, | ||
3109 | unsigned long prot, unsigned long flags) | ||
3110 | { | ||
3111 | if (selinux_checkreqprot) | 3111 | if (selinux_checkreqprot) |
3112 | prot = reqprot; | 3112 | prot = reqprot; |
3113 | 3113 | ||
@@ -5570,7 +5570,8 @@ static struct security_operations selinux_ops = { | |||
5570 | .file_alloc_security = selinux_file_alloc_security, | 5570 | .file_alloc_security = selinux_file_alloc_security, |
5571 | .file_free_security = selinux_file_free_security, | 5571 | .file_free_security = selinux_file_free_security, |
5572 | .file_ioctl = selinux_file_ioctl, | 5572 | .file_ioctl = selinux_file_ioctl, |
5573 | .file_mmap = selinux_file_mmap, | 5573 | .mmap_file = selinux_mmap_file, |
5574 | .mmap_addr = selinux_mmap_addr, | ||
5574 | .file_mprotect = selinux_file_mprotect, | 5575 | .file_mprotect = selinux_file_mprotect, |
5575 | .file_lock = selinux_file_lock, | 5576 | .file_lock = selinux_file_lock, |
5576 | .file_fcntl = selinux_file_fcntl, | 5577 | .file_fcntl = selinux_file_fcntl, |
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 4e93f9ef970b..3ad290251288 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c | |||
@@ -1259,12 +1259,8 @@ static int sel_make_bools(void) | |||
1259 | if (!inode) | 1259 | if (!inode) |
1260 | goto out; | 1260 | goto out; |
1261 | 1261 | ||
1262 | ret = -EINVAL; | ||
1263 | len = snprintf(page, PAGE_SIZE, "/%s/%s", BOOL_DIR_NAME, names[i]); | ||
1264 | if (len < 0) | ||
1265 | goto out; | ||
1266 | |||
1267 | ret = -ENAMETOOLONG; | 1262 | ret = -ENAMETOOLONG; |
1263 | len = snprintf(page, PAGE_SIZE, "/%s/%s", BOOL_DIR_NAME, names[i]); | ||
1268 | if (len >= PAGE_SIZE) | 1264 | if (len >= PAGE_SIZE) |
1269 | goto out; | 1265 | goto out; |
1270 | 1266 | ||
@@ -1557,19 +1553,10 @@ static inline u32 sel_ino_to_perm(unsigned long ino) | |||
1557 | static ssize_t sel_read_class(struct file *file, char __user *buf, | 1553 | static ssize_t sel_read_class(struct file *file, char __user *buf, |
1558 | size_t count, loff_t *ppos) | 1554 | size_t count, loff_t *ppos) |
1559 | { | 1555 | { |
1560 | ssize_t rc, len; | ||
1561 | char *page; | ||
1562 | unsigned long ino = file->f_path.dentry->d_inode->i_ino; | 1556 | unsigned long ino = file->f_path.dentry->d_inode->i_ino; |
1563 | 1557 | char res[TMPBUFLEN]; | |
1564 | page = (char *)__get_free_page(GFP_KERNEL); | 1558 | ssize_t len = snprintf(res, sizeof(res), "%d", sel_ino_to_class(ino)); |
1565 | if (!page) | 1559 | return simple_read_from_buffer(buf, count, ppos, res, len); |
1566 | return -ENOMEM; | ||
1567 | |||
1568 | len = snprintf(page, PAGE_SIZE, "%d", sel_ino_to_class(ino)); | ||
1569 | rc = simple_read_from_buffer(buf, count, ppos, page, len); | ||
1570 | free_page((unsigned long)page); | ||
1571 | |||
1572 | return rc; | ||
1573 | } | 1560 | } |
1574 | 1561 | ||
1575 | static const struct file_operations sel_class_ops = { | 1562 | static const struct file_operations sel_class_ops = { |
@@ -1580,19 +1567,10 @@ static const struct file_operations sel_class_ops = { | |||
1580 | static ssize_t sel_read_perm(struct file *file, char __user *buf, | 1567 | static ssize_t sel_read_perm(struct file *file, char __user *buf, |
1581 | size_t count, loff_t *ppos) | 1568 | size_t count, loff_t *ppos) |
1582 | { | 1569 | { |
1583 | ssize_t rc, len; | ||
1584 | char *page; | ||
1585 | unsigned long ino = file->f_path.dentry->d_inode->i_ino; | 1570 | unsigned long ino = file->f_path.dentry->d_inode->i_ino; |
1586 | 1571 | char res[TMPBUFLEN]; | |
1587 | page = (char *)__get_free_page(GFP_KERNEL); | 1572 | ssize_t len = snprintf(res, sizeof(res), "%d", sel_ino_to_perm(ino)); |
1588 | if (!page) | 1573 | return simple_read_from_buffer(buf, count, ppos, res, len); |
1589 | return -ENOMEM; | ||
1590 | |||
1591 | len = snprintf(page, PAGE_SIZE, "%d", sel_ino_to_perm(ino)); | ||
1592 | rc = simple_read_from_buffer(buf, count, ppos, page, len); | ||
1593 | free_page((unsigned long)page); | ||
1594 | |||
1595 | return rc; | ||
1596 | } | 1574 | } |
1597 | 1575 | ||
1598 | static const struct file_operations sel_perm_ops = { | 1576 | static const struct file_operations sel_perm_ops = { |
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index d583c0545808..ee0bb5735f35 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c | |||
@@ -1171,7 +1171,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, | |||
1171 | } | 1171 | } |
1172 | 1172 | ||
1173 | /** | 1173 | /** |
1174 | * smack_file_mmap : | 1174 | * smack_mmap_file : |
1175 | * Check permissions for a mmap operation. The @file may be NULL, e.g. | 1175 | * Check permissions for a mmap operation. The @file may be NULL, e.g. |
1176 | * if mapping anonymous memory. | 1176 | * if mapping anonymous memory. |
1177 | * @file contains the file structure for file to map (may be NULL). | 1177 | * @file contains the file structure for file to map (may be NULL). |
@@ -1180,10 +1180,9 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, | |||
1180 | * @flags contains the operational flags. | 1180 | * @flags contains the operational flags. |
1181 | * Return 0 if permission is granted. | 1181 | * Return 0 if permission is granted. |
1182 | */ | 1182 | */ |
1183 | static int smack_file_mmap(struct file *file, | 1183 | static int smack_mmap_file(struct file *file, |
1184 | unsigned long reqprot, unsigned long prot, | 1184 | unsigned long reqprot, unsigned long prot, |
1185 | unsigned long flags, unsigned long addr, | 1185 | unsigned long flags) |
1186 | unsigned long addr_only) | ||
1187 | { | 1186 | { |
1188 | struct smack_known *skp; | 1187 | struct smack_known *skp; |
1189 | struct smack_rule *srp; | 1188 | struct smack_rule *srp; |
@@ -1198,11 +1197,6 @@ static int smack_file_mmap(struct file *file, | |||
1198 | int tmay; | 1197 | int tmay; |
1199 | int rc; | 1198 | int rc; |
1200 | 1199 | ||
1201 | /* do DAC check on address space usage */ | ||
1202 | rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); | ||
1203 | if (rc || addr_only) | ||
1204 | return rc; | ||
1205 | |||
1206 | if (file == NULL || file->f_dentry == NULL) | 1200 | if (file == NULL || file->f_dentry == NULL) |
1207 | return 0; | 1201 | return 0; |
1208 | 1202 | ||
@@ -3482,7 +3476,8 @@ struct security_operations smack_ops = { | |||
3482 | .file_ioctl = smack_file_ioctl, | 3476 | .file_ioctl = smack_file_ioctl, |
3483 | .file_lock = smack_file_lock, | 3477 | .file_lock = smack_file_lock, |
3484 | .file_fcntl = smack_file_fcntl, | 3478 | .file_fcntl = smack_file_fcntl, |
3485 | .file_mmap = smack_file_mmap, | 3479 | .mmap_file = smack_mmap_file, |
3480 | .mmap_addr = cap_mmap_addr, | ||
3486 | .file_set_fowner = smack_file_set_fowner, | 3481 | .file_set_fowner = smack_file_set_fowner, |
3487 | .file_send_sigiotask = smack_file_send_sigiotask, | 3482 | .file_send_sigiotask = smack_file_send_sigiotask, |
3488 | .file_receive = smack_file_receive, | 3483 | .file_receive = smack_file_receive, |