diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/Kconfig.binfmt | 3 | ||||
-rw-r--r-- | fs/binfmt_elf.c | 2 | ||||
-rw-r--r-- | fs/btrfs/file.c | 2 | ||||
-rw-r--r-- | fs/exec.c | 4 | ||||
-rw-r--r-- | fs/inode.c | 2 | ||||
-rw-r--r-- | fs/proc/base.c | 447 | ||||
-rw-r--r-- | fs/proc/inode.c | 18 | ||||
-rw-r--r-- | fs/proc/internal.h | 1 | ||||
-rw-r--r-- | fs/proc/root.c | 70 | ||||
-rw-r--r-- | fs/reiserfs/bitmap.c | 3 | ||||
-rw-r--r-- | fs/reiserfs/journal.c | 64 | ||||
-rw-r--r-- | fs/reiserfs/super.c | 54 |
12 files changed, 587 insertions, 83 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 79e2ca7973b7..e95d1b64082c 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt | |||
@@ -27,6 +27,9 @@ config COMPAT_BINFMT_ELF | |||
27 | bool | 27 | bool |
28 | depends on COMPAT && BINFMT_ELF | 28 | depends on COMPAT && BINFMT_ELF |
29 | 29 | ||
30 | config ARCH_BINFMT_ELF_RANDOMIZE_PIE | ||
31 | bool | ||
32 | |||
30 | config BINFMT_ELF_FDPIC | 33 | config BINFMT_ELF_FDPIC |
31 | bool "Kernel support for FDPIC ELF binaries" | 34 | bool "Kernel support for FDPIC ELF binaries" |
32 | default y | 35 | default y |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 21ac5ee4b43f..bcb884e2d613 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -794,7 +794,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
794 | * default mmap base, as well as whatever program they | 794 | * default mmap base, as well as whatever program they |
795 | * might try to exec. This is because the brk will | 795 | * might try to exec. This is because the brk will |
796 | * follow the loader, and is not movable. */ | 796 | * follow the loader, and is not movable. */ |
797 | #if defined(CONFIG_X86) || defined(CONFIG_ARM) | 797 | #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE |
798 | /* Memory randomization might have been switched off | 798 | /* Memory randomization might have been switched off |
799 | * in runtime via sysctl. | 799 | * in runtime via sysctl. |
800 | * If that is the case, retain the original non-zero | 800 | * If that is the case, retain the original non-zero |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 97fbe939c050..20375e6691c3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1081,7 +1081,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
1081 | again: | 1081 | again: |
1082 | for (i = 0; i < num_pages; i++) { | 1082 | for (i = 0; i < num_pages; i++) { |
1083 | pages[i] = find_or_create_page(inode->i_mapping, index + i, | 1083 | pages[i] = find_or_create_page(inode->i_mapping, index + i, |
1084 | mask); | 1084 | mask | __GFP_WRITE); |
1085 | if (!pages[i]) { | 1085 | if (!pages[i]) { |
1086 | faili = i - 1; | 1086 | faili = i - 1; |
1087 | err = -ENOMEM; | 1087 | err = -ENOMEM; |
@@ -59,6 +59,8 @@ | |||
59 | #include <asm/uaccess.h> | 59 | #include <asm/uaccess.h> |
60 | #include <asm/mmu_context.h> | 60 | #include <asm/mmu_context.h> |
61 | #include <asm/tlb.h> | 61 | #include <asm/tlb.h> |
62 | |||
63 | #include <trace/events/task.h> | ||
62 | #include "internal.h" | 64 | #include "internal.h" |
63 | 65 | ||
64 | int core_uses_pid; | 66 | int core_uses_pid; |
@@ -1054,6 +1056,8 @@ void set_task_comm(struct task_struct *tsk, char *buf) | |||
1054 | { | 1056 | { |
1055 | task_lock(tsk); | 1057 | task_lock(tsk); |
1056 | 1058 | ||
1059 | trace_task_rename(tsk, buf); | ||
1060 | |||
1057 | /* | 1061 | /* |
1058 | * Threads may access current->comm without holding | 1062 | * Threads may access current->comm without holding |
1059 | * the task lock, so write the string carefully. | 1063 | * the task lock, so write the string carefully. |
diff --git a/fs/inode.c b/fs/inode.c index 87535753ab04..4fa4f0916af9 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -776,6 +776,8 @@ void prune_icache_sb(struct super_block *sb, int nr_to_scan) | |||
776 | else | 776 | else |
777 | __count_vm_events(PGINODESTEAL, reap); | 777 | __count_vm_events(PGINODESTEAL, reap); |
778 | spin_unlock(&sb->s_inode_lru_lock); | 778 | spin_unlock(&sb->s_inode_lru_lock); |
779 | if (current->reclaim_state) | ||
780 | current->reclaim_state->reclaimed_slab += reap; | ||
779 | 781 | ||
780 | dispose_list(&freeable); | 782 | dispose_list(&freeable); |
781 | } | 783 | } |
diff --git a/fs/proc/base.c b/fs/proc/base.c index a1dddda999f2..8173dfd89cb2 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -83,9 +83,11 @@ | |||
83 | #include <linux/pid_namespace.h> | 83 | #include <linux/pid_namespace.h> |
84 | #include <linux/fs_struct.h> | 84 | #include <linux/fs_struct.h> |
85 | #include <linux/slab.h> | 85 | #include <linux/slab.h> |
86 | #include <linux/flex_array.h> | ||
86 | #ifdef CONFIG_HARDWALL | 87 | #ifdef CONFIG_HARDWALL |
87 | #include <asm/hardwall.h> | 88 | #include <asm/hardwall.h> |
88 | #endif | 89 | #endif |
90 | #include <trace/events/oom.h> | ||
89 | #include "internal.h" | 91 | #include "internal.h" |
90 | 92 | ||
91 | /* NOTE: | 93 | /* NOTE: |
@@ -133,6 +135,8 @@ struct pid_entry { | |||
133 | NULL, &proc_single_file_operations, \ | 135 | NULL, &proc_single_file_operations, \ |
134 | { .proc_show = show } ) | 136 | { .proc_show = show } ) |
135 | 137 | ||
138 | static int proc_fd_permission(struct inode *inode, int mask); | ||
139 | |||
136 | /* | 140 | /* |
137 | * Count the number of hardlinks for the pid_entry table, excluding the . | 141 | * Count the number of hardlinks for the pid_entry table, excluding the . |
138 | * and .. links. | 142 | * and .. links. |
@@ -165,9 +169,9 @@ static int get_task_root(struct task_struct *task, struct path *root) | |||
165 | return result; | 169 | return result; |
166 | } | 170 | } |
167 | 171 | ||
168 | static int proc_cwd_link(struct inode *inode, struct path *path) | 172 | static int proc_cwd_link(struct dentry *dentry, struct path *path) |
169 | { | 173 | { |
170 | struct task_struct *task = get_proc_task(inode); | 174 | struct task_struct *task = get_proc_task(dentry->d_inode); |
171 | int result = -ENOENT; | 175 | int result = -ENOENT; |
172 | 176 | ||
173 | if (task) { | 177 | if (task) { |
@@ -182,9 +186,9 @@ static int proc_cwd_link(struct inode *inode, struct path *path) | |||
182 | return result; | 186 | return result; |
183 | } | 187 | } |
184 | 188 | ||
185 | static int proc_root_link(struct inode *inode, struct path *path) | 189 | static int proc_root_link(struct dentry *dentry, struct path *path) |
186 | { | 190 | { |
187 | struct task_struct *task = get_proc_task(inode); | 191 | struct task_struct *task = get_proc_task(dentry->d_inode); |
188 | int result = -ENOENT; | 192 | int result = -ENOENT; |
189 | 193 | ||
190 | if (task) { | 194 | if (task) { |
@@ -627,6 +631,50 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr) | |||
627 | return 0; | 631 | return 0; |
628 | } | 632 | } |
629 | 633 | ||
634 | /* | ||
635 | * May current process learn task's sched/cmdline info (for hide_pid_min=1) | ||
636 | * or euid/egid (for hide_pid_min=2)? | ||
637 | */ | ||
638 | static bool has_pid_permissions(struct pid_namespace *pid, | ||
639 | struct task_struct *task, | ||
640 | int hide_pid_min) | ||
641 | { | ||
642 | if (pid->hide_pid < hide_pid_min) | ||
643 | return true; | ||
644 | if (in_group_p(pid->pid_gid)) | ||
645 | return true; | ||
646 | return ptrace_may_access(task, PTRACE_MODE_READ); | ||
647 | } | ||
648 | |||
649 | |||
650 | static int proc_pid_permission(struct inode *inode, int mask) | ||
651 | { | ||
652 | struct pid_namespace *pid = inode->i_sb->s_fs_info; | ||
653 | struct task_struct *task; | ||
654 | bool has_perms; | ||
655 | |||
656 | task = get_proc_task(inode); | ||
657 | has_perms = has_pid_permissions(pid, task, 1); | ||
658 | put_task_struct(task); | ||
659 | |||
660 | if (!has_perms) { | ||
661 | if (pid->hide_pid == 2) { | ||
662 | /* | ||
663 | * Let's make getdents(), stat(), and open() | ||
664 | * consistent with each other. If a process | ||
665 | * may not stat() a file, it shouldn't be seen | ||
666 | * in procfs at all. | ||
667 | */ | ||
668 | return -ENOENT; | ||
669 | } | ||
670 | |||
671 | return -EPERM; | ||
672 | } | ||
673 | return generic_permission(inode, mask); | ||
674 | } | ||
675 | |||
676 | |||
677 | |||
630 | static const struct inode_operations proc_def_inode_operations = { | 678 | static const struct inode_operations proc_def_inode_operations = { |
631 | .setattr = proc_setattr, | 679 | .setattr = proc_setattr, |
632 | }; | 680 | }; |
@@ -1010,6 +1058,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | |||
1010 | else | 1058 | else |
1011 | task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / | 1059 | task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / |
1012 | -OOM_DISABLE; | 1060 | -OOM_DISABLE; |
1061 | trace_oom_score_adj_update(task); | ||
1013 | err_sighand: | 1062 | err_sighand: |
1014 | unlock_task_sighand(task, &flags); | 1063 | unlock_task_sighand(task, &flags); |
1015 | err_task_lock: | 1064 | err_task_lock: |
@@ -1097,6 +1146,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, | |||
1097 | task->signal->oom_score_adj = oom_score_adj; | 1146 | task->signal->oom_score_adj = oom_score_adj; |
1098 | if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) | 1147 | if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) |
1099 | task->signal->oom_score_adj_min = oom_score_adj; | 1148 | task->signal->oom_score_adj_min = oom_score_adj; |
1149 | trace_oom_score_adj_update(task); | ||
1100 | /* | 1150 | /* |
1101 | * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is | 1151 | * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is |
1102 | * always attainable. | 1152 | * always attainable. |
@@ -1453,13 +1503,13 @@ static const struct file_operations proc_pid_set_comm_operations = { | |||
1453 | .release = single_release, | 1503 | .release = single_release, |
1454 | }; | 1504 | }; |
1455 | 1505 | ||
1456 | static int proc_exe_link(struct inode *inode, struct path *exe_path) | 1506 | static int proc_exe_link(struct dentry *dentry, struct path *exe_path) |
1457 | { | 1507 | { |
1458 | struct task_struct *task; | 1508 | struct task_struct *task; |
1459 | struct mm_struct *mm; | 1509 | struct mm_struct *mm; |
1460 | struct file *exe_file; | 1510 | struct file *exe_file; |
1461 | 1511 | ||
1462 | task = get_proc_task(inode); | 1512 | task = get_proc_task(dentry->d_inode); |
1463 | if (!task) | 1513 | if (!task) |
1464 | return -ENOENT; | 1514 | return -ENOENT; |
1465 | mm = get_task_mm(task); | 1515 | mm = get_task_mm(task); |
@@ -1489,7 +1539,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
1489 | if (!proc_fd_access_allowed(inode)) | 1539 | if (!proc_fd_access_allowed(inode)) |
1490 | goto out; | 1540 | goto out; |
1491 | 1541 | ||
1492 | error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); | 1542 | error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path); |
1493 | out: | 1543 | out: |
1494 | return ERR_PTR(error); | 1544 | return ERR_PTR(error); |
1495 | } | 1545 | } |
@@ -1528,7 +1578,7 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b | |||
1528 | if (!proc_fd_access_allowed(inode)) | 1578 | if (!proc_fd_access_allowed(inode)) |
1529 | goto out; | 1579 | goto out; |
1530 | 1580 | ||
1531 | error = PROC_I(inode)->op.proc_get_link(inode, &path); | 1581 | error = PROC_I(inode)->op.proc_get_link(dentry, &path); |
1532 | if (error) | 1582 | if (error) |
1533 | goto out; | 1583 | goto out; |
1534 | 1584 | ||
@@ -1609,6 +1659,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
1609 | struct inode *inode = dentry->d_inode; | 1659 | struct inode *inode = dentry->d_inode; |
1610 | struct task_struct *task; | 1660 | struct task_struct *task; |
1611 | const struct cred *cred; | 1661 | const struct cred *cred; |
1662 | struct pid_namespace *pid = dentry->d_sb->s_fs_info; | ||
1612 | 1663 | ||
1613 | generic_fillattr(inode, stat); | 1664 | generic_fillattr(inode, stat); |
1614 | 1665 | ||
@@ -1617,6 +1668,14 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
1617 | stat->gid = 0; | 1668 | stat->gid = 0; |
1618 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | 1669 | task = pid_task(proc_pid(inode), PIDTYPE_PID); |
1619 | if (task) { | 1670 | if (task) { |
1671 | if (!has_pid_permissions(pid, task, 2)) { | ||
1672 | rcu_read_unlock(); | ||
1673 | /* | ||
1674 | * This doesn't prevent learning whether PID exists, | ||
1675 | * it only makes getattr() consistent with readdir(). | ||
1676 | */ | ||
1677 | return -ENOENT; | ||
1678 | } | ||
1620 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || | 1679 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || |
1621 | task_dumpable(task)) { | 1680 | task_dumpable(task)) { |
1622 | cred = __task_cred(task); | 1681 | cred = __task_cred(task); |
@@ -1820,9 +1879,9 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info) | |||
1820 | return -ENOENT; | 1879 | return -ENOENT; |
1821 | } | 1880 | } |
1822 | 1881 | ||
1823 | static int proc_fd_link(struct inode *inode, struct path *path) | 1882 | static int proc_fd_link(struct dentry *dentry, struct path *path) |
1824 | { | 1883 | { |
1825 | return proc_fd_info(inode, path, NULL); | 1884 | return proc_fd_info(dentry->d_inode, path, NULL); |
1826 | } | 1885 | } |
1827 | 1886 | ||
1828 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | 1887 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) |
@@ -2043,6 +2102,355 @@ static const struct file_operations proc_fd_operations = { | |||
2043 | .llseek = default_llseek, | 2102 | .llseek = default_llseek, |
2044 | }; | 2103 | }; |
2045 | 2104 | ||
2105 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
2106 | |||
2107 | /* | ||
2108 | * dname_to_vma_addr - maps a dentry name into two unsigned longs | ||
2109 | * which represent vma start and end addresses. | ||
2110 | */ | ||
2111 | static int dname_to_vma_addr(struct dentry *dentry, | ||
2112 | unsigned long *start, unsigned long *end) | ||
2113 | { | ||
2114 | if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2) | ||
2115 | return -EINVAL; | ||
2116 | |||
2117 | return 0; | ||
2118 | } | ||
2119 | |||
2120 | static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) | ||
2121 | { | ||
2122 | unsigned long vm_start, vm_end; | ||
2123 | bool exact_vma_exists = false; | ||
2124 | struct mm_struct *mm = NULL; | ||
2125 | struct task_struct *task; | ||
2126 | const struct cred *cred; | ||
2127 | struct inode *inode; | ||
2128 | int status = 0; | ||
2129 | |||
2130 | if (nd && nd->flags & LOOKUP_RCU) | ||
2131 | return -ECHILD; | ||
2132 | |||
2133 | if (!capable(CAP_SYS_ADMIN)) { | ||
2134 | status = -EACCES; | ||
2135 | goto out_notask; | ||
2136 | } | ||
2137 | |||
2138 | inode = dentry->d_inode; | ||
2139 | task = get_proc_task(inode); | ||
2140 | if (!task) | ||
2141 | goto out_notask; | ||
2142 | |||
2143 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
2144 | goto out; | ||
2145 | |||
2146 | mm = get_task_mm(task); | ||
2147 | if (!mm) | ||
2148 | goto out; | ||
2149 | |||
2150 | if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { | ||
2151 | down_read(&mm->mmap_sem); | ||
2152 | exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); | ||
2153 | up_read(&mm->mmap_sem); | ||
2154 | } | ||
2155 | |||
2156 | mmput(mm); | ||
2157 | |||
2158 | if (exact_vma_exists) { | ||
2159 | if (task_dumpable(task)) { | ||
2160 | rcu_read_lock(); | ||
2161 | cred = __task_cred(task); | ||
2162 | inode->i_uid = cred->euid; | ||
2163 | inode->i_gid = cred->egid; | ||
2164 | rcu_read_unlock(); | ||
2165 | } else { | ||
2166 | inode->i_uid = 0; | ||
2167 | inode->i_gid = 0; | ||
2168 | } | ||
2169 | security_task_to_inode(task, inode); | ||
2170 | status = 1; | ||
2171 | } | ||
2172 | |||
2173 | out: | ||
2174 | put_task_struct(task); | ||
2175 | |||
2176 | out_notask: | ||
2177 | if (status <= 0) | ||
2178 | d_drop(dentry); | ||
2179 | |||
2180 | return status; | ||
2181 | } | ||
2182 | |||
2183 | static const struct dentry_operations tid_map_files_dentry_operations = { | ||
2184 | .d_revalidate = map_files_d_revalidate, | ||
2185 | .d_delete = pid_delete_dentry, | ||
2186 | }; | ||
2187 | |||
2188 | static int proc_map_files_get_link(struct dentry *dentry, struct path *path) | ||
2189 | { | ||
2190 | unsigned long vm_start, vm_end; | ||
2191 | struct vm_area_struct *vma; | ||
2192 | struct task_struct *task; | ||
2193 | struct mm_struct *mm; | ||
2194 | int rc; | ||
2195 | |||
2196 | rc = -ENOENT; | ||
2197 | task = get_proc_task(dentry->d_inode); | ||
2198 | if (!task) | ||
2199 | goto out; | ||
2200 | |||
2201 | mm = get_task_mm(task); | ||
2202 | put_task_struct(task); | ||
2203 | if (!mm) | ||
2204 | goto out; | ||
2205 | |||
2206 | rc = dname_to_vma_addr(dentry, &vm_start, &vm_end); | ||
2207 | if (rc) | ||
2208 | goto out_mmput; | ||
2209 | |||
2210 | down_read(&mm->mmap_sem); | ||
2211 | vma = find_exact_vma(mm, vm_start, vm_end); | ||
2212 | if (vma && vma->vm_file) { | ||
2213 | *path = vma->vm_file->f_path; | ||
2214 | path_get(path); | ||
2215 | rc = 0; | ||
2216 | } | ||
2217 | up_read(&mm->mmap_sem); | ||
2218 | |||
2219 | out_mmput: | ||
2220 | mmput(mm); | ||
2221 | out: | ||
2222 | return rc; | ||
2223 | } | ||
2224 | |||
2225 | struct map_files_info { | ||
2226 | struct file *file; | ||
2227 | unsigned long len; | ||
2228 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ | ||
2229 | }; | ||
2230 | |||
2231 | static struct dentry * | ||
2232 | proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | ||
2233 | struct task_struct *task, const void *ptr) | ||
2234 | { | ||
2235 | const struct file *file = ptr; | ||
2236 | struct proc_inode *ei; | ||
2237 | struct inode *inode; | ||
2238 | |||
2239 | if (!file) | ||
2240 | return ERR_PTR(-ENOENT); | ||
2241 | |||
2242 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
2243 | if (!inode) | ||
2244 | return ERR_PTR(-ENOENT); | ||
2245 | |||
2246 | ei = PROC_I(inode); | ||
2247 | ei->op.proc_get_link = proc_map_files_get_link; | ||
2248 | |||
2249 | inode->i_op = &proc_pid_link_inode_operations; | ||
2250 | inode->i_size = 64; | ||
2251 | inode->i_mode = S_IFLNK; | ||
2252 | |||
2253 | if (file->f_mode & FMODE_READ) | ||
2254 | inode->i_mode |= S_IRUSR; | ||
2255 | if (file->f_mode & FMODE_WRITE) | ||
2256 | inode->i_mode |= S_IWUSR; | ||
2257 | |||
2258 | d_set_d_op(dentry, &tid_map_files_dentry_operations); | ||
2259 | d_add(dentry, inode); | ||
2260 | |||
2261 | return NULL; | ||
2262 | } | ||
2263 | |||
2264 | static struct dentry *proc_map_files_lookup(struct inode *dir, | ||
2265 | struct dentry *dentry, struct nameidata *nd) | ||
2266 | { | ||
2267 | unsigned long vm_start, vm_end; | ||
2268 | struct vm_area_struct *vma; | ||
2269 | struct task_struct *task; | ||
2270 | struct dentry *result; | ||
2271 | struct mm_struct *mm; | ||
2272 | |||
2273 | result = ERR_PTR(-EACCES); | ||
2274 | if (!capable(CAP_SYS_ADMIN)) | ||
2275 | goto out; | ||
2276 | |||
2277 | result = ERR_PTR(-ENOENT); | ||
2278 | task = get_proc_task(dir); | ||
2279 | if (!task) | ||
2280 | goto out; | ||
2281 | |||
2282 | result = ERR_PTR(-EACCES); | ||
2283 | if (lock_trace(task)) | ||
2284 | goto out_put_task; | ||
2285 | |||
2286 | result = ERR_PTR(-ENOENT); | ||
2287 | if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) | ||
2288 | goto out_unlock; | ||
2289 | |||
2290 | mm = get_task_mm(task); | ||
2291 | if (!mm) | ||
2292 | goto out_unlock; | ||
2293 | |||
2294 | down_read(&mm->mmap_sem); | ||
2295 | vma = find_exact_vma(mm, vm_start, vm_end); | ||
2296 | if (!vma) | ||
2297 | goto out_no_vma; | ||
2298 | |||
2299 | result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file); | ||
2300 | |||
2301 | out_no_vma: | ||
2302 | up_read(&mm->mmap_sem); | ||
2303 | mmput(mm); | ||
2304 | out_unlock: | ||
2305 | unlock_trace(task); | ||
2306 | out_put_task: | ||
2307 | put_task_struct(task); | ||
2308 | out: | ||
2309 | return result; | ||
2310 | } | ||
2311 | |||
2312 | static const struct inode_operations proc_map_files_inode_operations = { | ||
2313 | .lookup = proc_map_files_lookup, | ||
2314 | .permission = proc_fd_permission, | ||
2315 | .setattr = proc_setattr, | ||
2316 | }; | ||
2317 | |||
2318 | static int | ||
2319 | proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
2320 | { | ||
2321 | struct dentry *dentry = filp->f_path.dentry; | ||
2322 | struct inode *inode = dentry->d_inode; | ||
2323 | struct vm_area_struct *vma; | ||
2324 | struct task_struct *task; | ||
2325 | struct mm_struct *mm; | ||
2326 | ino_t ino; | ||
2327 | int ret; | ||
2328 | |||
2329 | ret = -EACCES; | ||
2330 | if (!capable(CAP_SYS_ADMIN)) | ||
2331 | goto out; | ||
2332 | |||
2333 | ret = -ENOENT; | ||
2334 | task = get_proc_task(inode); | ||
2335 | if (!task) | ||
2336 | goto out; | ||
2337 | |||
2338 | ret = -EACCES; | ||
2339 | if (lock_trace(task)) | ||
2340 | goto out_put_task; | ||
2341 | |||
2342 | ret = 0; | ||
2343 | switch (filp->f_pos) { | ||
2344 | case 0: | ||
2345 | ino = inode->i_ino; | ||
2346 | if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0) | ||
2347 | goto out_unlock; | ||
2348 | filp->f_pos++; | ||
2349 | case 1: | ||
2350 | ino = parent_ino(dentry); | ||
2351 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
2352 | goto out_unlock; | ||
2353 | filp->f_pos++; | ||
2354 | default: | ||
2355 | { | ||
2356 | unsigned long nr_files, pos, i; | ||
2357 | struct flex_array *fa = NULL; | ||
2358 | struct map_files_info info; | ||
2359 | struct map_files_info *p; | ||
2360 | |||
2361 | mm = get_task_mm(task); | ||
2362 | if (!mm) | ||
2363 | goto out_unlock; | ||
2364 | down_read(&mm->mmap_sem); | ||
2365 | |||
2366 | nr_files = 0; | ||
2367 | |||
2368 | /* | ||
2369 | * We need two passes here: | ||
2370 | * | ||
2371 | * 1) Collect vmas of mapped files with mmap_sem taken | ||
2372 | * 2) Release mmap_sem and instantiate entries | ||
2373 | * | ||
2374 | * otherwise we get lockdep complained, since filldir() | ||
2375 | * routine might require mmap_sem taken in might_fault(). | ||
2376 | */ | ||
2377 | |||
2378 | for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { | ||
2379 | if (vma->vm_file && ++pos > filp->f_pos) | ||
2380 | nr_files++; | ||
2381 | } | ||
2382 | |||
2383 | if (nr_files) { | ||
2384 | fa = flex_array_alloc(sizeof(info), nr_files, | ||
2385 | GFP_KERNEL); | ||
2386 | if (!fa || flex_array_prealloc(fa, 0, nr_files, | ||
2387 | GFP_KERNEL)) { | ||
2388 | ret = -ENOMEM; | ||
2389 | if (fa) | ||
2390 | flex_array_free(fa); | ||
2391 | up_read(&mm->mmap_sem); | ||
2392 | mmput(mm); | ||
2393 | goto out_unlock; | ||
2394 | } | ||
2395 | for (i = 0, vma = mm->mmap, pos = 2; vma; | ||
2396 | vma = vma->vm_next) { | ||
2397 | if (!vma->vm_file) | ||
2398 | continue; | ||
2399 | if (++pos <= filp->f_pos) | ||
2400 | continue; | ||
2401 | |||
2402 | get_file(vma->vm_file); | ||
2403 | info.file = vma->vm_file; | ||
2404 | info.len = snprintf(info.name, | ||
2405 | sizeof(info.name), "%lx-%lx", | ||
2406 | vma->vm_start, vma->vm_end); | ||
2407 | if (flex_array_put(fa, i++, &info, GFP_KERNEL)) | ||
2408 | BUG(); | ||
2409 | } | ||
2410 | } | ||
2411 | up_read(&mm->mmap_sem); | ||
2412 | |||
2413 | for (i = 0; i < nr_files; i++) { | ||
2414 | p = flex_array_get(fa, i); | ||
2415 | ret = proc_fill_cache(filp, dirent, filldir, | ||
2416 | p->name, p->len, | ||
2417 | proc_map_files_instantiate, | ||
2418 | task, p->file); | ||
2419 | if (ret) | ||
2420 | break; | ||
2421 | filp->f_pos++; | ||
2422 | fput(p->file); | ||
2423 | } | ||
2424 | for (; i < nr_files; i++) { | ||
2425 | /* | ||
2426 | * In case of error don't forget | ||
2427 | * to put rest of file refs. | ||
2428 | */ | ||
2429 | p = flex_array_get(fa, i); | ||
2430 | fput(p->file); | ||
2431 | } | ||
2432 | if (fa) | ||
2433 | flex_array_free(fa); | ||
2434 | mmput(mm); | ||
2435 | } | ||
2436 | } | ||
2437 | |||
2438 | out_unlock: | ||
2439 | unlock_trace(task); | ||
2440 | out_put_task: | ||
2441 | put_task_struct(task); | ||
2442 | out: | ||
2443 | return ret; | ||
2444 | } | ||
2445 | |||
2446 | static const struct file_operations proc_map_files_operations = { | ||
2447 | .read = generic_read_dir, | ||
2448 | .readdir = proc_map_files_readdir, | ||
2449 | .llseek = default_llseek, | ||
2450 | }; | ||
2451 | |||
2452 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | ||
2453 | |||
2046 | /* | 2454 | /* |
2047 | * /proc/pid/fd needs a special permission handler so that a process can still | 2455 | * /proc/pid/fd needs a special permission handler so that a process can still |
2048 | * access /proc/self/fd after it has executed a setuid(). | 2456 | * access /proc/self/fd after it has executed a setuid(). |
@@ -2658,6 +3066,9 @@ static const struct inode_operations proc_task_inode_operations; | |||
2658 | static const struct pid_entry tgid_base_stuff[] = { | 3066 | static const struct pid_entry tgid_base_stuff[] = { |
2659 | DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), | 3067 | DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), |
2660 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), | 3068 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), |
3069 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
3070 | DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), | ||
3071 | #endif | ||
2661 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), | 3072 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), |
2662 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), | 3073 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), |
2663 | #ifdef CONFIG_NET | 3074 | #ifdef CONFIG_NET |
@@ -2761,6 +3172,7 @@ static const struct inode_operations proc_tgid_base_inode_operations = { | |||
2761 | .lookup = proc_tgid_base_lookup, | 3172 | .lookup = proc_tgid_base_lookup, |
2762 | .getattr = pid_getattr, | 3173 | .getattr = pid_getattr, |
2763 | .setattr = proc_setattr, | 3174 | .setattr = proc_setattr, |
3175 | .permission = proc_pid_permission, | ||
2764 | }; | 3176 | }; |
2765 | 3177 | ||
2766 | static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) | 3178 | static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) |
@@ -2964,6 +3376,12 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi | |||
2964 | proc_pid_instantiate, iter.task, NULL); | 3376 | proc_pid_instantiate, iter.task, NULL); |
2965 | } | 3377 | } |
2966 | 3378 | ||
3379 | static int fake_filldir(void *buf, const char *name, int namelen, | ||
3380 | loff_t offset, u64 ino, unsigned d_type) | ||
3381 | { | ||
3382 | return 0; | ||
3383 | } | ||
3384 | |||
2967 | /* for the /proc/ directory itself, after non-process stuff has been done */ | 3385 | /* for the /proc/ directory itself, after non-process stuff has been done */ |
2968 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | 3386 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) |
2969 | { | 3387 | { |
@@ -2971,6 +3389,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
2971 | struct task_struct *reaper; | 3389 | struct task_struct *reaper; |
2972 | struct tgid_iter iter; | 3390 | struct tgid_iter iter; |
2973 | struct pid_namespace *ns; | 3391 | struct pid_namespace *ns; |
3392 | filldir_t __filldir; | ||
2974 | 3393 | ||
2975 | if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) | 3394 | if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) |
2976 | goto out_no_task; | 3395 | goto out_no_task; |
@@ -2992,8 +3411,13 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
2992 | for (iter = next_tgid(ns, iter); | 3411 | for (iter = next_tgid(ns, iter); |
2993 | iter.task; | 3412 | iter.task; |
2994 | iter.tgid += 1, iter = next_tgid(ns, iter)) { | 3413 | iter.tgid += 1, iter = next_tgid(ns, iter)) { |
3414 | if (has_pid_permissions(ns, iter.task, 2)) | ||
3415 | __filldir = filldir; | ||
3416 | else | ||
3417 | __filldir = fake_filldir; | ||
3418 | |||
2995 | filp->f_pos = iter.tgid + TGID_OFFSET; | 3419 | filp->f_pos = iter.tgid + TGID_OFFSET; |
2996 | if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { | 3420 | if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) { |
2997 | put_task_struct(iter.task); | 3421 | put_task_struct(iter.task); |
2998 | goto out; | 3422 | goto out; |
2999 | } | 3423 | } |
@@ -3328,6 +3752,7 @@ static const struct inode_operations proc_task_inode_operations = { | |||
3328 | .lookup = proc_task_lookup, | 3752 | .lookup = proc_task_lookup, |
3329 | .getattr = proc_task_getattr, | 3753 | .getattr = proc_task_getattr, |
3330 | .setattr = proc_setattr, | 3754 | .setattr = proc_setattr, |
3755 | .permission = proc_pid_permission, | ||
3331 | }; | 3756 | }; |
3332 | 3757 | ||
3333 | static const struct file_operations proc_task_operations = { | 3758 | static const struct file_operations proc_task_operations = { |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 51a176622b8f..84fd3235a590 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/time.h> | 7 | #include <linux/time.h> |
8 | #include <linux/proc_fs.h> | 8 | #include <linux/proc_fs.h> |
9 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
10 | #include <linux/pid_namespace.h> | ||
10 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
11 | #include <linux/string.h> | 12 | #include <linux/string.h> |
12 | #include <linux/stat.h> | 13 | #include <linux/stat.h> |
@@ -17,7 +18,9 @@ | |||
17 | #include <linux/init.h> | 18 | #include <linux/init.h> |
18 | #include <linux/module.h> | 19 | #include <linux/module.h> |
19 | #include <linux/sysctl.h> | 20 | #include <linux/sysctl.h> |
21 | #include <linux/seq_file.h> | ||
20 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
23 | #include <linux/mount.h> | ||
21 | 24 | ||
22 | #include <asm/system.h> | 25 | #include <asm/system.h> |
23 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
@@ -101,12 +104,27 @@ void __init proc_init_inodecache(void) | |||
101 | init_once); | 104 | init_once); |
102 | } | 105 | } |
103 | 106 | ||
107 | static int proc_show_options(struct seq_file *seq, struct dentry *root) | ||
108 | { | ||
109 | struct super_block *sb = root->d_sb; | ||
110 | struct pid_namespace *pid = sb->s_fs_info; | ||
111 | |||
112 | if (pid->pid_gid) | ||
113 | seq_printf(seq, ",gid=%lu", (unsigned long)pid->pid_gid); | ||
114 | if (pid->hide_pid != 0) | ||
115 | seq_printf(seq, ",hidepid=%u", pid->hide_pid); | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
104 | static const struct super_operations proc_sops = { | 120 | static const struct super_operations proc_sops = { |
105 | .alloc_inode = proc_alloc_inode, | 121 | .alloc_inode = proc_alloc_inode, |
106 | .destroy_inode = proc_destroy_inode, | 122 | .destroy_inode = proc_destroy_inode, |
107 | .drop_inode = generic_delete_inode, | 123 | .drop_inode = generic_delete_inode, |
108 | .evict_inode = proc_evict_inode, | 124 | .evict_inode = proc_evict_inode, |
109 | .statfs = simple_statfs, | 125 | .statfs = simple_statfs, |
126 | .remount_fs = proc_remount, | ||
127 | .show_options = proc_show_options, | ||
110 | }; | 128 | }; |
111 | 129 | ||
112 | static void __pde_users_dec(struct proc_dir_entry *pde) | 130 | static void __pde_users_dec(struct proc_dir_entry *pde) |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 7838e5cfec14..292577531ad1 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -117,6 +117,7 @@ void pde_put(struct proc_dir_entry *pde); | |||
117 | 117 | ||
118 | int proc_fill_super(struct super_block *); | 118 | int proc_fill_super(struct super_block *); |
119 | struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); | 119 | struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); |
120 | int proc_remount(struct super_block *sb, int *flags, char *data); | ||
120 | 121 | ||
121 | /* | 122 | /* |
122 | * These are generic /proc routines that use the internal | 123 | * These are generic /proc routines that use the internal |
diff --git a/fs/proc/root.c b/fs/proc/root.c index 03102d978180..46a15d8a29ca 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/bitops.h> | 18 | #include <linux/bitops.h> |
19 | #include <linux/mount.h> | 19 | #include <linux/mount.h> |
20 | #include <linux/pid_namespace.h> | 20 | #include <linux/pid_namespace.h> |
21 | #include <linux/parser.h> | ||
21 | 22 | ||
22 | #include "internal.h" | 23 | #include "internal.h" |
23 | 24 | ||
@@ -36,6 +37,63 @@ static int proc_set_super(struct super_block *sb, void *data) | |||
36 | return err; | 37 | return err; |
37 | } | 38 | } |
38 | 39 | ||
40 | enum { | ||
41 | Opt_gid, Opt_hidepid, Opt_err, | ||
42 | }; | ||
43 | |||
44 | static const match_table_t tokens = { | ||
45 | {Opt_hidepid, "hidepid=%u"}, | ||
46 | {Opt_gid, "gid=%u"}, | ||
47 | {Opt_err, NULL}, | ||
48 | }; | ||
49 | |||
50 | static int proc_parse_options(char *options, struct pid_namespace *pid) | ||
51 | { | ||
52 | char *p; | ||
53 | substring_t args[MAX_OPT_ARGS]; | ||
54 | int option; | ||
55 | |||
56 | if (!options) | ||
57 | return 1; | ||
58 | |||
59 | while ((p = strsep(&options, ",")) != NULL) { | ||
60 | int token; | ||
61 | if (!*p) | ||
62 | continue; | ||
63 | |||
64 | args[0].to = args[0].from = 0; | ||
65 | token = match_token(p, tokens, args); | ||
66 | switch (token) { | ||
67 | case Opt_gid: | ||
68 | if (match_int(&args[0], &option)) | ||
69 | return 0; | ||
70 | pid->pid_gid = option; | ||
71 | break; | ||
72 | case Opt_hidepid: | ||
73 | if (match_int(&args[0], &option)) | ||
74 | return 0; | ||
75 | if (option < 0 || option > 2) { | ||
76 | pr_err("proc: hidepid value must be between 0 and 2.\n"); | ||
77 | return 0; | ||
78 | } | ||
79 | pid->hide_pid = option; | ||
80 | break; | ||
81 | default: | ||
82 | pr_err("proc: unrecognized mount option \"%s\" " | ||
83 | "or missing value\n", p); | ||
84 | return 0; | ||
85 | } | ||
86 | } | ||
87 | |||
88 | return 1; | ||
89 | } | ||
90 | |||
91 | int proc_remount(struct super_block *sb, int *flags, char *data) | ||
92 | { | ||
93 | struct pid_namespace *pid = sb->s_fs_info; | ||
94 | return !proc_parse_options(data, pid); | ||
95 | } | ||
96 | |||
39 | static struct dentry *proc_mount(struct file_system_type *fs_type, | 97 | static struct dentry *proc_mount(struct file_system_type *fs_type, |
40 | int flags, const char *dev_name, void *data) | 98 | int flags, const char *dev_name, void *data) |
41 | { | 99 | { |
@@ -43,11 +101,15 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, | |||
43 | struct super_block *sb; | 101 | struct super_block *sb; |
44 | struct pid_namespace *ns; | 102 | struct pid_namespace *ns; |
45 | struct proc_inode *ei; | 103 | struct proc_inode *ei; |
104 | char *options; | ||
46 | 105 | ||
47 | if (flags & MS_KERNMOUNT) | 106 | if (flags & MS_KERNMOUNT) { |
48 | ns = (struct pid_namespace *)data; | 107 | ns = (struct pid_namespace *)data; |
49 | else | 108 | options = NULL; |
109 | } else { | ||
50 | ns = current->nsproxy->pid_ns; | 110 | ns = current->nsproxy->pid_ns; |
111 | options = data; | ||
112 | } | ||
51 | 113 | ||
52 | sb = sget(fs_type, proc_test_super, proc_set_super, ns); | 114 | sb = sget(fs_type, proc_test_super, proc_set_super, ns); |
53 | if (IS_ERR(sb)) | 115 | if (IS_ERR(sb)) |
@@ -55,6 +117,10 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, | |||
55 | 117 | ||
56 | if (!sb->s_root) { | 118 | if (!sb->s_root) { |
57 | sb->s_flags = flags; | 119 | sb->s_flags = flags; |
120 | if (!proc_parse_options(options, ns)) { | ||
121 | deactivate_locked_super(sb); | ||
122 | return ERR_PTR(-EINVAL); | ||
123 | } | ||
58 | err = proc_fill_super(sb); | 124 | err = proc_fill_super(sb); |
59 | if (err) { | 125 | if (err) { |
60 | deactivate_locked_super(sb); | 126 | deactivate_locked_super(sb); |
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index a945cd265228..70de42f09f1d 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c | |||
@@ -1364,10 +1364,7 @@ int reiserfs_init_bitmap_cache(struct super_block *sb) | |||
1364 | struct reiserfs_bitmap_info *bitmap; | 1364 | struct reiserfs_bitmap_info *bitmap; |
1365 | unsigned int bmap_nr = reiserfs_bmap_count(sb); | 1365 | unsigned int bmap_nr = reiserfs_bmap_count(sb); |
1366 | 1366 | ||
1367 | /* Avoid lock recursion in fault case */ | ||
1368 | reiserfs_write_unlock(sb); | ||
1369 | bitmap = vmalloc(sizeof(*bitmap) * bmap_nr); | 1367 | bitmap = vmalloc(sizeof(*bitmap) * bmap_nr); |
1370 | reiserfs_write_lock(sb); | ||
1371 | if (bitmap == NULL) | 1368 | if (bitmap == NULL) |
1372 | return -ENOMEM; | 1369 | return -ENOMEM; |
1373 | 1370 | ||
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index eb711060a6f2..c3cf54fd4de3 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -2678,16 +2678,10 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2678 | char b[BDEVNAME_SIZE]; | 2678 | char b[BDEVNAME_SIZE]; |
2679 | int ret; | 2679 | int ret; |
2680 | 2680 | ||
2681 | /* | ||
2682 | * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS | ||
2683 | * dependency inversion warnings. | ||
2684 | */ | ||
2685 | reiserfs_write_unlock(sb); | ||
2686 | journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal)); | 2681 | journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal)); |
2687 | if (!journal) { | 2682 | if (!journal) { |
2688 | reiserfs_warning(sb, "journal-1256", | 2683 | reiserfs_warning(sb, "journal-1256", |
2689 | "unable to get memory for journal structure"); | 2684 | "unable to get memory for journal structure"); |
2690 | reiserfs_write_lock(sb); | ||
2691 | return 1; | 2685 | return 1; |
2692 | } | 2686 | } |
2693 | INIT_LIST_HEAD(&journal->j_bitmap_nodes); | 2687 | INIT_LIST_HEAD(&journal->j_bitmap_nodes); |
@@ -2695,10 +2689,8 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2695 | INIT_LIST_HEAD(&journal->j_working_list); | 2689 | INIT_LIST_HEAD(&journal->j_working_list); |
2696 | INIT_LIST_HEAD(&journal->j_journal_list); | 2690 | INIT_LIST_HEAD(&journal->j_journal_list); |
2697 | journal->j_persistent_trans = 0; | 2691 | journal->j_persistent_trans = 0; |
2698 | ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap, | 2692 | if (reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap, |
2699 | reiserfs_bmap_count(sb)); | 2693 | reiserfs_bmap_count(sb))) |
2700 | reiserfs_write_lock(sb); | ||
2701 | if (ret) | ||
2702 | goto free_and_return; | 2694 | goto free_and_return; |
2703 | 2695 | ||
2704 | allocate_bitmap_nodes(sb); | 2696 | allocate_bitmap_nodes(sb); |
@@ -2727,27 +2719,11 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2727 | goto free_and_return; | 2719 | goto free_and_return; |
2728 | } | 2720 | } |
2729 | 2721 | ||
2730 | /* | ||
2731 | * We need to unlock here to avoid creating the following | ||
2732 | * dependency: | ||
2733 | * reiserfs_lock -> sysfs_mutex | ||
2734 | * Because the reiserfs mmap path creates the following dependency: | ||
2735 | * mm->mmap -> reiserfs_lock, hence we have | ||
2736 | * mm->mmap -> reiserfs_lock ->sysfs_mutex | ||
2737 | * This would ends up in a circular dependency with sysfs readdir path | ||
2738 | * which does sysfs_mutex -> mm->mmap_sem | ||
2739 | * This is fine because the reiserfs lock is useless in mount path, | ||
2740 | * at least until we call journal_begin. We keep it for paranoid | ||
2741 | * reasons. | ||
2742 | */ | ||
2743 | reiserfs_write_unlock(sb); | ||
2744 | if (journal_init_dev(sb, journal, j_dev_name) != 0) { | 2722 | if (journal_init_dev(sb, journal, j_dev_name) != 0) { |
2745 | reiserfs_write_lock(sb); | ||
2746 | reiserfs_warning(sb, "sh-462", | 2723 | reiserfs_warning(sb, "sh-462", |
2747 | "unable to initialize jornal device"); | 2724 | "unable to initialize jornal device"); |
2748 | goto free_and_return; | 2725 | goto free_and_return; |
2749 | } | 2726 | } |
2750 | reiserfs_write_lock(sb); | ||
2751 | 2727 | ||
2752 | rs = SB_DISK_SUPER_BLOCK(sb); | 2728 | rs = SB_DISK_SUPER_BLOCK(sb); |
2753 | 2729 | ||
@@ -2829,9 +2805,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2829 | journal->j_mount_id = 10; | 2805 | journal->j_mount_id = 10; |
2830 | journal->j_state = 0; | 2806 | journal->j_state = 0; |
2831 | atomic_set(&(journal->j_jlock), 0); | 2807 | atomic_set(&(journal->j_jlock), 0); |
2832 | reiserfs_write_unlock(sb); | ||
2833 | journal->j_cnode_free_list = allocate_cnodes(num_cnodes); | 2808 | journal->j_cnode_free_list = allocate_cnodes(num_cnodes); |
2834 | reiserfs_write_lock(sb); | ||
2835 | journal->j_cnode_free_orig = journal->j_cnode_free_list; | 2809 | journal->j_cnode_free_orig = journal->j_cnode_free_list; |
2836 | journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; | 2810 | journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; |
2837 | journal->j_cnode_used = 0; | 2811 | journal->j_cnode_used = 0; |
@@ -2848,24 +2822,37 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2848 | 2822 | ||
2849 | init_journal_hash(sb); | 2823 | init_journal_hash(sb); |
2850 | jl = journal->j_current_jl; | 2824 | jl = journal->j_current_jl; |
2825 | |||
2826 | /* | ||
2827 | * get_list_bitmap() may call flush_commit_list() which | ||
2828 | * requires the lock. Calling flush_commit_list() shouldn't happen | ||
2829 | * this early but I like to be paranoid. | ||
2830 | */ | ||
2831 | reiserfs_write_lock(sb); | ||
2851 | jl->j_list_bitmap = get_list_bitmap(sb, jl); | 2832 | jl->j_list_bitmap = get_list_bitmap(sb, jl); |
2833 | reiserfs_write_unlock(sb); | ||
2852 | if (!jl->j_list_bitmap) { | 2834 | if (!jl->j_list_bitmap) { |
2853 | reiserfs_warning(sb, "journal-2005", | 2835 | reiserfs_warning(sb, "journal-2005", |
2854 | "get_list_bitmap failed for journal list 0"); | 2836 | "get_list_bitmap failed for journal list 0"); |
2855 | goto free_and_return; | 2837 | goto free_and_return; |
2856 | } | 2838 | } |
2857 | if (journal_read(sb) < 0) { | 2839 | |
2840 | /* | ||
2841 | * Journal_read needs to be inspected in order to push down | ||
2842 | * the lock further inside (or even remove it). | ||
2843 | */ | ||
2844 | reiserfs_write_lock(sb); | ||
2845 | ret = journal_read(sb); | ||
2846 | reiserfs_write_unlock(sb); | ||
2847 | if (ret < 0) { | ||
2858 | reiserfs_warning(sb, "reiserfs-2006", | 2848 | reiserfs_warning(sb, "reiserfs-2006", |
2859 | "Replay Failure, unable to mount"); | 2849 | "Replay Failure, unable to mount"); |
2860 | goto free_and_return; | 2850 | goto free_and_return; |
2861 | } | 2851 | } |
2862 | 2852 | ||
2863 | reiserfs_mounted_fs_count++; | 2853 | reiserfs_mounted_fs_count++; |
2864 | if (reiserfs_mounted_fs_count <= 1) { | 2854 | if (reiserfs_mounted_fs_count <= 1) |
2865 | reiserfs_write_unlock(sb); | ||
2866 | commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0); | 2855 | commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0); |
2867 | reiserfs_write_lock(sb); | ||
2868 | } | ||
2869 | 2856 | ||
2870 | INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); | 2857 | INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); |
2871 | journal->j_work_sb = sb; | 2858 | journal->j_work_sb = sb; |
@@ -2896,14 +2883,13 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th, | |||
2896 | journal->j_cnode_free < (journal->j_trans_max * 3)) { | 2883 | journal->j_cnode_free < (journal->j_trans_max * 3)) { |
2897 | return 1; | 2884 | return 1; |
2898 | } | 2885 | } |
2899 | /* protected by the BKL here */ | 2886 | |
2900 | journal->j_len_alloc += new_alloc; | 2887 | journal->j_len_alloc += new_alloc; |
2901 | th->t_blocks_allocated += new_alloc ; | 2888 | th->t_blocks_allocated += new_alloc ; |
2902 | return 0; | 2889 | return 0; |
2903 | } | 2890 | } |
2904 | 2891 | ||
2905 | /* this must be called inside a transaction, and requires the | 2892 | /* this must be called inside a transaction |
2906 | ** kernel_lock to be held | ||
2907 | */ | 2893 | */ |
2908 | void reiserfs_block_writes(struct reiserfs_transaction_handle *th) | 2894 | void reiserfs_block_writes(struct reiserfs_transaction_handle *th) |
2909 | { | 2895 | { |
@@ -2914,8 +2900,7 @@ void reiserfs_block_writes(struct reiserfs_transaction_handle *th) | |||
2914 | return; | 2900 | return; |
2915 | } | 2901 | } |
2916 | 2902 | ||
2917 | /* this must be called without a transaction started, and does not | 2903 | /* this must be called without a transaction started |
2918 | ** require BKL | ||
2919 | */ | 2904 | */ |
2920 | void reiserfs_allow_writes(struct super_block *s) | 2905 | void reiserfs_allow_writes(struct super_block *s) |
2921 | { | 2906 | { |
@@ -2924,8 +2909,7 @@ void reiserfs_allow_writes(struct super_block *s) | |||
2924 | wake_up(&journal->j_join_wait); | 2909 | wake_up(&journal->j_join_wait); |
2925 | } | 2910 | } |
2926 | 2911 | ||
2927 | /* this must be called without a transaction started, and does not | 2912 | /* this must be called without a transaction started |
2928 | ** require BKL | ||
2929 | */ | 2913 | */ |
2930 | void reiserfs_wait_on_write_block(struct super_block *s) | 2914 | void reiserfs_wait_on_write_block(struct super_block *s) |
2931 | { | 2915 | { |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 1d42e707d5fa..e12d8b97cd4d 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -1519,9 +1519,7 @@ static int read_super_block(struct super_block *s, int offset) | |||
1519 | static int reread_meta_blocks(struct super_block *s) | 1519 | static int reread_meta_blocks(struct super_block *s) |
1520 | { | 1520 | { |
1521 | ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); | 1521 | ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); |
1522 | reiserfs_write_unlock(s); | ||
1523 | wait_on_buffer(SB_BUFFER_WITH_SB(s)); | 1522 | wait_on_buffer(SB_BUFFER_WITH_SB(s)); |
1524 | reiserfs_write_lock(s); | ||
1525 | if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { | 1523 | if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { |
1526 | reiserfs_warning(s, "reiserfs-2504", "error reading the super"); | 1524 | reiserfs_warning(s, "reiserfs-2504", "error reading the super"); |
1527 | return 1; | 1525 | return 1; |
@@ -1746,22 +1744,11 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1746 | mutex_init(&REISERFS_SB(s)->lock); | 1744 | mutex_init(&REISERFS_SB(s)->lock); |
1747 | REISERFS_SB(s)->lock_depth = -1; | 1745 | REISERFS_SB(s)->lock_depth = -1; |
1748 | 1746 | ||
1749 | /* | ||
1750 | * This function is called with the bkl, which also was the old | ||
1751 | * locking used here. | ||
1752 | * do_journal_begin() will soon check if we hold the lock (ie: was the | ||
1753 | * bkl). This is likely because do_journal_begin() has several another | ||
1754 | * callers because at this time, it doesn't seem to be necessary to | ||
1755 | * protect against anything. | ||
1756 | * Anyway, let's be conservative and lock for now. | ||
1757 | */ | ||
1758 | reiserfs_write_lock(s); | ||
1759 | |||
1760 | jdev_name = NULL; | 1747 | jdev_name = NULL; |
1761 | if (reiserfs_parse_options | 1748 | if (reiserfs_parse_options |
1762 | (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, | 1749 | (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, |
1763 | &commit_max_age, qf_names, &qfmt) == 0) { | 1750 | &commit_max_age, qf_names, &qfmt) == 0) { |
1764 | goto error; | 1751 | goto error_unlocked; |
1765 | } | 1752 | } |
1766 | if (jdev_name && jdev_name[0]) { | 1753 | if (jdev_name && jdev_name[0]) { |
1767 | REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL); | 1754 | REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL); |
@@ -1777,7 +1764,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1777 | 1764 | ||
1778 | if (blocks) { | 1765 | if (blocks) { |
1779 | SWARN(silent, s, "jmacd-7", "resize option for remount only"); | 1766 | SWARN(silent, s, "jmacd-7", "resize option for remount only"); |
1780 | goto error; | 1767 | goto error_unlocked; |
1781 | } | 1768 | } |
1782 | 1769 | ||
1783 | /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */ | 1770 | /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */ |
@@ -1787,7 +1774,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1787 | else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { | 1774 | else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { |
1788 | SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", | 1775 | SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", |
1789 | reiserfs_bdevname(s)); | 1776 | reiserfs_bdevname(s)); |
1790 | goto error; | 1777 | goto error_unlocked; |
1791 | } | 1778 | } |
1792 | 1779 | ||
1793 | rs = SB_DISK_SUPER_BLOCK(s); | 1780 | rs = SB_DISK_SUPER_BLOCK(s); |
@@ -1803,7 +1790,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1803 | "or increase size of your LVM partition"); | 1790 | "or increase size of your LVM partition"); |
1804 | SWARN(silent, s, "", "Or may be you forgot to " | 1791 | SWARN(silent, s, "", "Or may be you forgot to " |
1805 | "reboot after fdisk when it told you to"); | 1792 | "reboot after fdisk when it told you to"); |
1806 | goto error; | 1793 | goto error_unlocked; |
1807 | } | 1794 | } |
1808 | 1795 | ||
1809 | sbi->s_mount_state = SB_REISERFS_STATE(s); | 1796 | sbi->s_mount_state = SB_REISERFS_STATE(s); |
@@ -1811,8 +1798,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1811 | 1798 | ||
1812 | if ((errval = reiserfs_init_bitmap_cache(s))) { | 1799 | if ((errval = reiserfs_init_bitmap_cache(s))) { |
1813 | SWARN(silent, s, "jmacd-8", "unable to read bitmap"); | 1800 | SWARN(silent, s, "jmacd-8", "unable to read bitmap"); |
1814 | goto error; | 1801 | goto error_unlocked; |
1815 | } | 1802 | } |
1803 | |||
1816 | errval = -EINVAL; | 1804 | errval = -EINVAL; |
1817 | #ifdef CONFIG_REISERFS_CHECK | 1805 | #ifdef CONFIG_REISERFS_CHECK |
1818 | SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON"); | 1806 | SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON"); |
@@ -1835,24 +1823,26 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1835 | if (reiserfs_barrier_flush(s)) { | 1823 | if (reiserfs_barrier_flush(s)) { |
1836 | printk("reiserfs: using flush barriers\n"); | 1824 | printk("reiserfs: using flush barriers\n"); |
1837 | } | 1825 | } |
1826 | |||
1838 | // set_device_ro(s->s_dev, 1) ; | 1827 | // set_device_ro(s->s_dev, 1) ; |
1839 | if (journal_init(s, jdev_name, old_format, commit_max_age)) { | 1828 | if (journal_init(s, jdev_name, old_format, commit_max_age)) { |
1840 | SWARN(silent, s, "sh-2022", | 1829 | SWARN(silent, s, "sh-2022", |
1841 | "unable to initialize journal space"); | 1830 | "unable to initialize journal space"); |
1842 | goto error; | 1831 | goto error_unlocked; |
1843 | } else { | 1832 | } else { |
1844 | jinit_done = 1; /* once this is set, journal_release must be called | 1833 | jinit_done = 1; /* once this is set, journal_release must be called |
1845 | ** if we error out of the mount | 1834 | ** if we error out of the mount |
1846 | */ | 1835 | */ |
1847 | } | 1836 | } |
1837 | |||
1848 | if (reread_meta_blocks(s)) { | 1838 | if (reread_meta_blocks(s)) { |
1849 | SWARN(silent, s, "jmacd-9", | 1839 | SWARN(silent, s, "jmacd-9", |
1850 | "unable to reread meta blocks after journal init"); | 1840 | "unable to reread meta blocks after journal init"); |
1851 | goto error; | 1841 | goto error_unlocked; |
1852 | } | 1842 | } |
1853 | 1843 | ||
1854 | if (replay_only(s)) | 1844 | if (replay_only(s)) |
1855 | goto error; | 1845 | goto error_unlocked; |
1856 | 1846 | ||
1857 | if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) { | 1847 | if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) { |
1858 | SWARN(silent, s, "clm-7000", | 1848 | SWARN(silent, s, "clm-7000", |
@@ -1866,9 +1856,19 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1866 | reiserfs_init_locked_inode, (void *)(&args)); | 1856 | reiserfs_init_locked_inode, (void *)(&args)); |
1867 | if (!root_inode) { | 1857 | if (!root_inode) { |
1868 | SWARN(silent, s, "jmacd-10", "get root inode failed"); | 1858 | SWARN(silent, s, "jmacd-10", "get root inode failed"); |
1869 | goto error; | 1859 | goto error_unlocked; |
1870 | } | 1860 | } |
1871 | 1861 | ||
1862 | /* | ||
1863 | * This path assumed to be called with the BKL in the old times. | ||
1864 | * Now we have inherited the big reiserfs lock from it and many | ||
1865 | * reiserfs helpers called in the mount path and elsewhere require | ||
1866 | * this lock to be held even if it's not always necessary. Let's be | ||
1867 | * conservative and hold it early. The window can be reduced after | ||
1868 | * careful review of the code. | ||
1869 | */ | ||
1870 | reiserfs_write_lock(s); | ||
1871 | |||
1872 | if (root_inode->i_state & I_NEW) { | 1872 | if (root_inode->i_state & I_NEW) { |
1873 | reiserfs_read_locked_inode(root_inode, &args); | 1873 | reiserfs_read_locked_inode(root_inode, &args); |
1874 | unlock_new_inode(root_inode); | 1874 | unlock_new_inode(root_inode); |
@@ -1995,12 +1995,16 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1995 | return (0); | 1995 | return (0); |
1996 | 1996 | ||
1997 | error: | 1997 | error: |
1998 | if (jinit_done) { /* kill the commit thread, free journal ram */ | 1998 | reiserfs_write_unlock(s); |
1999 | |||
2000 | error_unlocked: | ||
2001 | /* kill the commit thread, free journal ram */ | ||
2002 | if (jinit_done) { | ||
2003 | reiserfs_write_lock(s); | ||
1999 | journal_release_error(NULL, s); | 2004 | journal_release_error(NULL, s); |
2005 | reiserfs_write_unlock(s); | ||
2000 | } | 2006 | } |
2001 | 2007 | ||
2002 | reiserfs_write_unlock(s); | ||
2003 | |||
2004 | reiserfs_free_bitmap_cache(s); | 2008 | reiserfs_free_bitmap_cache(s); |
2005 | if (SB_BUFFER_WITH_SB(s)) | 2009 | if (SB_BUFFER_WITH_SB(s)) |
2006 | brelse(SB_BUFFER_WITH_SB(s)); | 2010 | brelse(SB_BUFFER_WITH_SB(s)); |