aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig.binfmt3
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/exec.c4
-rw-r--r--fs/inode.c2
-rw-r--r--fs/proc/base.c447
-rw-r--r--fs/proc/inode.c18
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/root.c70
-rw-r--r--fs/reiserfs/bitmap.c3
-rw-r--r--fs/reiserfs/journal.c64
-rw-r--r--fs/reiserfs/super.c54
12 files changed, 587 insertions, 83 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 79e2ca7973b7..e95d1b64082c 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -27,6 +27,9 @@ config COMPAT_BINFMT_ELF
27 bool 27 bool
28 depends on COMPAT && BINFMT_ELF 28 depends on COMPAT && BINFMT_ELF
29 29
30config ARCH_BINFMT_ELF_RANDOMIZE_PIE
31 bool
32
30config BINFMT_ELF_FDPIC 33config BINFMT_ELF_FDPIC
31 bool "Kernel support for FDPIC ELF binaries" 34 bool "Kernel support for FDPIC ELF binaries"
32 default y 35 default y
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 21ac5ee4b43f..bcb884e2d613 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -794,7 +794,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
794 * default mmap base, as well as whatever program they 794 * default mmap base, as well as whatever program they
795 * might try to exec. This is because the brk will 795 * might try to exec. This is because the brk will
796 * follow the loader, and is not movable. */ 796 * follow the loader, and is not movable. */
797#if defined(CONFIG_X86) || defined(CONFIG_ARM) 797#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
798 /* Memory randomization might have been switched off 798 /* Memory randomization might have been switched off
799 * in runtime via sysctl. 799 * in runtime via sysctl.
800 * If that is the case, retain the original non-zero 800 * If that is the case, retain the original non-zero
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 97fbe939c050..20375e6691c3 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1081,7 +1081,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
1081again: 1081again:
1082 for (i = 0; i < num_pages; i++) { 1082 for (i = 0; i < num_pages; i++) {
1083 pages[i] = find_or_create_page(inode->i_mapping, index + i, 1083 pages[i] = find_or_create_page(inode->i_mapping, index + i,
1084 mask); 1084 mask | __GFP_WRITE);
1085 if (!pages[i]) { 1085 if (!pages[i]) {
1086 faili = i - 1; 1086 faili = i - 1;
1087 err = -ENOMEM; 1087 err = -ENOMEM;
diff --git a/fs/exec.c b/fs/exec.c
index 3f64b9f26e7d..aeb135c7ff5c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -59,6 +59,8 @@
59#include <asm/uaccess.h> 59#include <asm/uaccess.h>
60#include <asm/mmu_context.h> 60#include <asm/mmu_context.h>
61#include <asm/tlb.h> 61#include <asm/tlb.h>
62
63#include <trace/events/task.h>
62#include "internal.h" 64#include "internal.h"
63 65
64int core_uses_pid; 66int core_uses_pid;
@@ -1054,6 +1056,8 @@ void set_task_comm(struct task_struct *tsk, char *buf)
1054{ 1056{
1055 task_lock(tsk); 1057 task_lock(tsk);
1056 1058
1059 trace_task_rename(tsk, buf);
1060
1057 /* 1061 /*
1058 * Threads may access current->comm without holding 1062 * Threads may access current->comm without holding
1059 * the task lock, so write the string carefully. 1063 * the task lock, so write the string carefully.
diff --git a/fs/inode.c b/fs/inode.c
index 87535753ab04..4fa4f0916af9 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -776,6 +776,8 @@ void prune_icache_sb(struct super_block *sb, int nr_to_scan)
776 else 776 else
777 __count_vm_events(PGINODESTEAL, reap); 777 __count_vm_events(PGINODESTEAL, reap);
778 spin_unlock(&sb->s_inode_lru_lock); 778 spin_unlock(&sb->s_inode_lru_lock);
779 if (current->reclaim_state)
780 current->reclaim_state->reclaimed_slab += reap;
779 781
780 dispose_list(&freeable); 782 dispose_list(&freeable);
781} 783}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a1dddda999f2..8173dfd89cb2 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -83,9 +83,11 @@
83#include <linux/pid_namespace.h> 83#include <linux/pid_namespace.h>
84#include <linux/fs_struct.h> 84#include <linux/fs_struct.h>
85#include <linux/slab.h> 85#include <linux/slab.h>
86#include <linux/flex_array.h>
86#ifdef CONFIG_HARDWALL 87#ifdef CONFIG_HARDWALL
87#include <asm/hardwall.h> 88#include <asm/hardwall.h>
88#endif 89#endif
90#include <trace/events/oom.h>
89#include "internal.h" 91#include "internal.h"
90 92
91/* NOTE: 93/* NOTE:
@@ -133,6 +135,8 @@ struct pid_entry {
133 NULL, &proc_single_file_operations, \ 135 NULL, &proc_single_file_operations, \
134 { .proc_show = show } ) 136 { .proc_show = show } )
135 137
138static int proc_fd_permission(struct inode *inode, int mask);
139
136/* 140/*
137 * Count the number of hardlinks for the pid_entry table, excluding the . 141 * Count the number of hardlinks for the pid_entry table, excluding the .
138 * and .. links. 142 * and .. links.
@@ -165,9 +169,9 @@ static int get_task_root(struct task_struct *task, struct path *root)
165 return result; 169 return result;
166} 170}
167 171
168static int proc_cwd_link(struct inode *inode, struct path *path) 172static int proc_cwd_link(struct dentry *dentry, struct path *path)
169{ 173{
170 struct task_struct *task = get_proc_task(inode); 174 struct task_struct *task = get_proc_task(dentry->d_inode);
171 int result = -ENOENT; 175 int result = -ENOENT;
172 176
173 if (task) { 177 if (task) {
@@ -182,9 +186,9 @@ static int proc_cwd_link(struct inode *inode, struct path *path)
182 return result; 186 return result;
183} 187}
184 188
185static int proc_root_link(struct inode *inode, struct path *path) 189static int proc_root_link(struct dentry *dentry, struct path *path)
186{ 190{
187 struct task_struct *task = get_proc_task(inode); 191 struct task_struct *task = get_proc_task(dentry->d_inode);
188 int result = -ENOENT; 192 int result = -ENOENT;
189 193
190 if (task) { 194 if (task) {
@@ -627,6 +631,50 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr)
627 return 0; 631 return 0;
628} 632}
629 633
634/*
635 * May current process learn task's sched/cmdline info (for hide_pid_min=1)
636 * or euid/egid (for hide_pid_min=2)?
637 */
638static bool has_pid_permissions(struct pid_namespace *pid,
639 struct task_struct *task,
640 int hide_pid_min)
641{
642 if (pid->hide_pid < hide_pid_min)
643 return true;
644 if (in_group_p(pid->pid_gid))
645 return true;
646 return ptrace_may_access(task, PTRACE_MODE_READ);
647}
648
649
650static int proc_pid_permission(struct inode *inode, int mask)
651{
652 struct pid_namespace *pid = inode->i_sb->s_fs_info;
653 struct task_struct *task;
654 bool has_perms;
655
656 task = get_proc_task(inode);
657 has_perms = has_pid_permissions(pid, task, 1);
658 put_task_struct(task);
659
660 if (!has_perms) {
661 if (pid->hide_pid == 2) {
662 /*
663 * Let's make getdents(), stat(), and open()
664 * consistent with each other. If a process
665 * may not stat() a file, it shouldn't be seen
666 * in procfs at all.
667 */
668 return -ENOENT;
669 }
670
671 return -EPERM;
672 }
673 return generic_permission(inode, mask);
674}
675
676
677
630static const struct inode_operations proc_def_inode_operations = { 678static const struct inode_operations proc_def_inode_operations = {
631 .setattr = proc_setattr, 679 .setattr = proc_setattr,
632}; 680};
@@ -1010,6 +1058,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1010 else 1058 else
1011 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / 1059 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
1012 -OOM_DISABLE; 1060 -OOM_DISABLE;
1061 trace_oom_score_adj_update(task);
1013err_sighand: 1062err_sighand:
1014 unlock_task_sighand(task, &flags); 1063 unlock_task_sighand(task, &flags);
1015err_task_lock: 1064err_task_lock:
@@ -1097,6 +1146,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1097 task->signal->oom_score_adj = oom_score_adj; 1146 task->signal->oom_score_adj = oom_score_adj;
1098 if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1147 if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
1099 task->signal->oom_score_adj_min = oom_score_adj; 1148 task->signal->oom_score_adj_min = oom_score_adj;
1149 trace_oom_score_adj_update(task);
1100 /* 1150 /*
1101 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is 1151 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
1102 * always attainable. 1152 * always attainable.
@@ -1453,13 +1503,13 @@ static const struct file_operations proc_pid_set_comm_operations = {
1453 .release = single_release, 1503 .release = single_release,
1454}; 1504};
1455 1505
1456static int proc_exe_link(struct inode *inode, struct path *exe_path) 1506static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
1457{ 1507{
1458 struct task_struct *task; 1508 struct task_struct *task;
1459 struct mm_struct *mm; 1509 struct mm_struct *mm;
1460 struct file *exe_file; 1510 struct file *exe_file;
1461 1511
1462 task = get_proc_task(inode); 1512 task = get_proc_task(dentry->d_inode);
1463 if (!task) 1513 if (!task)
1464 return -ENOENT; 1514 return -ENOENT;
1465 mm = get_task_mm(task); 1515 mm = get_task_mm(task);
@@ -1489,7 +1539,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1489 if (!proc_fd_access_allowed(inode)) 1539 if (!proc_fd_access_allowed(inode))
1490 goto out; 1540 goto out;
1491 1541
1492 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); 1542 error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path);
1493out: 1543out:
1494 return ERR_PTR(error); 1544 return ERR_PTR(error);
1495} 1545}
@@ -1528,7 +1578,7 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
1528 if (!proc_fd_access_allowed(inode)) 1578 if (!proc_fd_access_allowed(inode))
1529 goto out; 1579 goto out;
1530 1580
1531 error = PROC_I(inode)->op.proc_get_link(inode, &path); 1581 error = PROC_I(inode)->op.proc_get_link(dentry, &path);
1532 if (error) 1582 if (error)
1533 goto out; 1583 goto out;
1534 1584
@@ -1609,6 +1659,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1609 struct inode *inode = dentry->d_inode; 1659 struct inode *inode = dentry->d_inode;
1610 struct task_struct *task; 1660 struct task_struct *task;
1611 const struct cred *cred; 1661 const struct cred *cred;
1662 struct pid_namespace *pid = dentry->d_sb->s_fs_info;
1612 1663
1613 generic_fillattr(inode, stat); 1664 generic_fillattr(inode, stat);
1614 1665
@@ -1617,6 +1668,14 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1617 stat->gid = 0; 1668 stat->gid = 0;
1618 task = pid_task(proc_pid(inode), PIDTYPE_PID); 1669 task = pid_task(proc_pid(inode), PIDTYPE_PID);
1619 if (task) { 1670 if (task) {
1671 if (!has_pid_permissions(pid, task, 2)) {
1672 rcu_read_unlock();
1673 /*
1674 * This doesn't prevent learning whether PID exists,
1675 * it only makes getattr() consistent with readdir().
1676 */
1677 return -ENOENT;
1678 }
1620 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1679 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1621 task_dumpable(task)) { 1680 task_dumpable(task)) {
1622 cred = __task_cred(task); 1681 cred = __task_cred(task);
@@ -1820,9 +1879,9 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1820 return -ENOENT; 1879 return -ENOENT;
1821} 1880}
1822 1881
1823static int proc_fd_link(struct inode *inode, struct path *path) 1882static int proc_fd_link(struct dentry *dentry, struct path *path)
1824{ 1883{
1825 return proc_fd_info(inode, path, NULL); 1884 return proc_fd_info(dentry->d_inode, path, NULL);
1826} 1885}
1827 1886
1828static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1887static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
@@ -2043,6 +2102,355 @@ static const struct file_operations proc_fd_operations = {
2043 .llseek = default_llseek, 2102 .llseek = default_llseek,
2044}; 2103};
2045 2104
2105#ifdef CONFIG_CHECKPOINT_RESTORE
2106
2107/*
2108 * dname_to_vma_addr - maps a dentry name into two unsigned longs
2109 * which represent vma start and end addresses.
2110 */
2111static int dname_to_vma_addr(struct dentry *dentry,
2112 unsigned long *start, unsigned long *end)
2113{
2114 if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)
2115 return -EINVAL;
2116
2117 return 0;
2118}
2119
2120static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
2121{
2122 unsigned long vm_start, vm_end;
2123 bool exact_vma_exists = false;
2124 struct mm_struct *mm = NULL;
2125 struct task_struct *task;
2126 const struct cred *cred;
2127 struct inode *inode;
2128 int status = 0;
2129
2130 if (nd && nd->flags & LOOKUP_RCU)
2131 return -ECHILD;
2132
2133 if (!capable(CAP_SYS_ADMIN)) {
2134 status = -EACCES;
2135 goto out_notask;
2136 }
2137
2138 inode = dentry->d_inode;
2139 task = get_proc_task(inode);
2140 if (!task)
2141 goto out_notask;
2142
2143 if (!ptrace_may_access(task, PTRACE_MODE_READ))
2144 goto out;
2145
2146 mm = get_task_mm(task);
2147 if (!mm)
2148 goto out;
2149
2150 if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
2151 down_read(&mm->mmap_sem);
2152 exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end);
2153 up_read(&mm->mmap_sem);
2154 }
2155
2156 mmput(mm);
2157
2158 if (exact_vma_exists) {
2159 if (task_dumpable(task)) {
2160 rcu_read_lock();
2161 cred = __task_cred(task);
2162 inode->i_uid = cred->euid;
2163 inode->i_gid = cred->egid;
2164 rcu_read_unlock();
2165 } else {
2166 inode->i_uid = 0;
2167 inode->i_gid = 0;
2168 }
2169 security_task_to_inode(task, inode);
2170 status = 1;
2171 }
2172
2173out:
2174 put_task_struct(task);
2175
2176out_notask:
2177 if (status <= 0)
2178 d_drop(dentry);
2179
2180 return status;
2181}
2182
2183static const struct dentry_operations tid_map_files_dentry_operations = {
2184 .d_revalidate = map_files_d_revalidate,
2185 .d_delete = pid_delete_dentry,
2186};
2187
2188static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
2189{
2190 unsigned long vm_start, vm_end;
2191 struct vm_area_struct *vma;
2192 struct task_struct *task;
2193 struct mm_struct *mm;
2194 int rc;
2195
2196 rc = -ENOENT;
2197 task = get_proc_task(dentry->d_inode);
2198 if (!task)
2199 goto out;
2200
2201 mm = get_task_mm(task);
2202 put_task_struct(task);
2203 if (!mm)
2204 goto out;
2205
2206 rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
2207 if (rc)
2208 goto out_mmput;
2209
2210 down_read(&mm->mmap_sem);
2211 vma = find_exact_vma(mm, vm_start, vm_end);
2212 if (vma && vma->vm_file) {
2213 *path = vma->vm_file->f_path;
2214 path_get(path);
2215 rc = 0;
2216 }
2217 up_read(&mm->mmap_sem);
2218
2219out_mmput:
2220 mmput(mm);
2221out:
2222 return rc;
2223}
2224
2225struct map_files_info {
2226 struct file *file;
2227 unsigned long len;
2228 unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
2229};
2230
2231static struct dentry *
2232proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
2233 struct task_struct *task, const void *ptr)
2234{
2235 const struct file *file = ptr;
2236 struct proc_inode *ei;
2237 struct inode *inode;
2238
2239 if (!file)
2240 return ERR_PTR(-ENOENT);
2241
2242 inode = proc_pid_make_inode(dir->i_sb, task);
2243 if (!inode)
2244 return ERR_PTR(-ENOENT);
2245
2246 ei = PROC_I(inode);
2247 ei->op.proc_get_link = proc_map_files_get_link;
2248
2249 inode->i_op = &proc_pid_link_inode_operations;
2250 inode->i_size = 64;
2251 inode->i_mode = S_IFLNK;
2252
2253 if (file->f_mode & FMODE_READ)
2254 inode->i_mode |= S_IRUSR;
2255 if (file->f_mode & FMODE_WRITE)
2256 inode->i_mode |= S_IWUSR;
2257
2258 d_set_d_op(dentry, &tid_map_files_dentry_operations);
2259 d_add(dentry, inode);
2260
2261 return NULL;
2262}
2263
2264static struct dentry *proc_map_files_lookup(struct inode *dir,
2265 struct dentry *dentry, struct nameidata *nd)
2266{
2267 unsigned long vm_start, vm_end;
2268 struct vm_area_struct *vma;
2269 struct task_struct *task;
2270 struct dentry *result;
2271 struct mm_struct *mm;
2272
2273 result = ERR_PTR(-EACCES);
2274 if (!capable(CAP_SYS_ADMIN))
2275 goto out;
2276
2277 result = ERR_PTR(-ENOENT);
2278 task = get_proc_task(dir);
2279 if (!task)
2280 goto out;
2281
2282 result = ERR_PTR(-EACCES);
2283 if (lock_trace(task))
2284 goto out_put_task;
2285
2286 result = ERR_PTR(-ENOENT);
2287 if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
2288 goto out_unlock;
2289
2290 mm = get_task_mm(task);
2291 if (!mm)
2292 goto out_unlock;
2293
2294 down_read(&mm->mmap_sem);
2295 vma = find_exact_vma(mm, vm_start, vm_end);
2296 if (!vma)
2297 goto out_no_vma;
2298
2299 result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file);
2300
2301out_no_vma:
2302 up_read(&mm->mmap_sem);
2303 mmput(mm);
2304out_unlock:
2305 unlock_trace(task);
2306out_put_task:
2307 put_task_struct(task);
2308out:
2309 return result;
2310}
2311
2312static const struct inode_operations proc_map_files_inode_operations = {
2313 .lookup = proc_map_files_lookup,
2314 .permission = proc_fd_permission,
2315 .setattr = proc_setattr,
2316};
2317
2318static int
2319proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
2320{
2321 struct dentry *dentry = filp->f_path.dentry;
2322 struct inode *inode = dentry->d_inode;
2323 struct vm_area_struct *vma;
2324 struct task_struct *task;
2325 struct mm_struct *mm;
2326 ino_t ino;
2327 int ret;
2328
2329 ret = -EACCES;
2330 if (!capable(CAP_SYS_ADMIN))
2331 goto out;
2332
2333 ret = -ENOENT;
2334 task = get_proc_task(inode);
2335 if (!task)
2336 goto out;
2337
2338 ret = -EACCES;
2339 if (lock_trace(task))
2340 goto out_put_task;
2341
2342 ret = 0;
2343 switch (filp->f_pos) {
2344 case 0:
2345 ino = inode->i_ino;
2346 if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0)
2347 goto out_unlock;
2348 filp->f_pos++;
2349 case 1:
2350 ino = parent_ino(dentry);
2351 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
2352 goto out_unlock;
2353 filp->f_pos++;
2354 default:
2355 {
2356 unsigned long nr_files, pos, i;
2357 struct flex_array *fa = NULL;
2358 struct map_files_info info;
2359 struct map_files_info *p;
2360
2361 mm = get_task_mm(task);
2362 if (!mm)
2363 goto out_unlock;
2364 down_read(&mm->mmap_sem);
2365
2366 nr_files = 0;
2367
2368 /*
2369 * We need two passes here:
2370 *
2371 * 1) Collect vmas of mapped files with mmap_sem taken
2372 * 2) Release mmap_sem and instantiate entries
2373 *
2374 * otherwise we get lockdep complained, since filldir()
2375 * routine might require mmap_sem taken in might_fault().
2376 */
2377
2378 for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
2379 if (vma->vm_file && ++pos > filp->f_pos)
2380 nr_files++;
2381 }
2382
2383 if (nr_files) {
2384 fa = flex_array_alloc(sizeof(info), nr_files,
2385 GFP_KERNEL);
2386 if (!fa || flex_array_prealloc(fa, 0, nr_files,
2387 GFP_KERNEL)) {
2388 ret = -ENOMEM;
2389 if (fa)
2390 flex_array_free(fa);
2391 up_read(&mm->mmap_sem);
2392 mmput(mm);
2393 goto out_unlock;
2394 }
2395 for (i = 0, vma = mm->mmap, pos = 2; vma;
2396 vma = vma->vm_next) {
2397 if (!vma->vm_file)
2398 continue;
2399 if (++pos <= filp->f_pos)
2400 continue;
2401
2402 get_file(vma->vm_file);
2403 info.file = vma->vm_file;
2404 info.len = snprintf(info.name,
2405 sizeof(info.name), "%lx-%lx",
2406 vma->vm_start, vma->vm_end);
2407 if (flex_array_put(fa, i++, &info, GFP_KERNEL))
2408 BUG();
2409 }
2410 }
2411 up_read(&mm->mmap_sem);
2412
2413 for (i = 0; i < nr_files; i++) {
2414 p = flex_array_get(fa, i);
2415 ret = proc_fill_cache(filp, dirent, filldir,
2416 p->name, p->len,
2417 proc_map_files_instantiate,
2418 task, p->file);
2419 if (ret)
2420 break;
2421 filp->f_pos++;
2422 fput(p->file);
2423 }
2424 for (; i < nr_files; i++) {
2425 /*
2426 * In case of error don't forget
2427 * to put rest of file refs.
2428 */
2429 p = flex_array_get(fa, i);
2430 fput(p->file);
2431 }
2432 if (fa)
2433 flex_array_free(fa);
2434 mmput(mm);
2435 }
2436 }
2437
2438out_unlock:
2439 unlock_trace(task);
2440out_put_task:
2441 put_task_struct(task);
2442out:
2443 return ret;
2444}
2445
2446static const struct file_operations proc_map_files_operations = {
2447 .read = generic_read_dir,
2448 .readdir = proc_map_files_readdir,
2449 .llseek = default_llseek,
2450};
2451
2452#endif /* CONFIG_CHECKPOINT_RESTORE */
2453
2046/* 2454/*
2047 * /proc/pid/fd needs a special permission handler so that a process can still 2455 * /proc/pid/fd needs a special permission handler so that a process can still
2048 * access /proc/self/fd after it has executed a setuid(). 2456 * access /proc/self/fd after it has executed a setuid().
@@ -2658,6 +3066,9 @@ static const struct inode_operations proc_task_inode_operations;
2658static const struct pid_entry tgid_base_stuff[] = { 3066static const struct pid_entry tgid_base_stuff[] = {
2659 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), 3067 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
2660 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 3068 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3069#ifdef CONFIG_CHECKPOINT_RESTORE
3070 DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
3071#endif
2661 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 3072 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2662 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 3073 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
2663#ifdef CONFIG_NET 3074#ifdef CONFIG_NET
@@ -2761,6 +3172,7 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
2761 .lookup = proc_tgid_base_lookup, 3172 .lookup = proc_tgid_base_lookup,
2762 .getattr = pid_getattr, 3173 .getattr = pid_getattr,
2763 .setattr = proc_setattr, 3174 .setattr = proc_setattr,
3175 .permission = proc_pid_permission,
2764}; 3176};
2765 3177
2766static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) 3178static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
@@ -2964,6 +3376,12 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
2964 proc_pid_instantiate, iter.task, NULL); 3376 proc_pid_instantiate, iter.task, NULL);
2965} 3377}
2966 3378
3379static int fake_filldir(void *buf, const char *name, int namelen,
3380 loff_t offset, u64 ino, unsigned d_type)
3381{
3382 return 0;
3383}
3384
2967/* for the /proc/ directory itself, after non-process stuff has been done */ 3385/* for the /proc/ directory itself, after non-process stuff has been done */
2968int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 3386int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2969{ 3387{
@@ -2971,6 +3389,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2971 struct task_struct *reaper; 3389 struct task_struct *reaper;
2972 struct tgid_iter iter; 3390 struct tgid_iter iter;
2973 struct pid_namespace *ns; 3391 struct pid_namespace *ns;
3392 filldir_t __filldir;
2974 3393
2975 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) 3394 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
2976 goto out_no_task; 3395 goto out_no_task;
@@ -2992,8 +3411,13 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2992 for (iter = next_tgid(ns, iter); 3411 for (iter = next_tgid(ns, iter);
2993 iter.task; 3412 iter.task;
2994 iter.tgid += 1, iter = next_tgid(ns, iter)) { 3413 iter.tgid += 1, iter = next_tgid(ns, iter)) {
3414 if (has_pid_permissions(ns, iter.task, 2))
3415 __filldir = filldir;
3416 else
3417 __filldir = fake_filldir;
3418
2995 filp->f_pos = iter.tgid + TGID_OFFSET; 3419 filp->f_pos = iter.tgid + TGID_OFFSET;
2996 if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { 3420 if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) {
2997 put_task_struct(iter.task); 3421 put_task_struct(iter.task);
2998 goto out; 3422 goto out;
2999 } 3423 }
@@ -3328,6 +3752,7 @@ static const struct inode_operations proc_task_inode_operations = {
3328 .lookup = proc_task_lookup, 3752 .lookup = proc_task_lookup,
3329 .getattr = proc_task_getattr, 3753 .getattr = proc_task_getattr,
3330 .setattr = proc_setattr, 3754 .setattr = proc_setattr,
3755 .permission = proc_pid_permission,
3331}; 3756};
3332 3757
3333static const struct file_operations proc_task_operations = { 3758static const struct file_operations proc_task_operations = {
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 51a176622b8f..84fd3235a590 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -7,6 +7,7 @@
7#include <linux/time.h> 7#include <linux/time.h>
8#include <linux/proc_fs.h> 8#include <linux/proc_fs.h>
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/pid_namespace.h>
10#include <linux/mm.h> 11#include <linux/mm.h>
11#include <linux/string.h> 12#include <linux/string.h>
12#include <linux/stat.h> 13#include <linux/stat.h>
@@ -17,7 +18,9 @@
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/module.h> 19#include <linux/module.h>
19#include <linux/sysctl.h> 20#include <linux/sysctl.h>
21#include <linux/seq_file.h>
20#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/mount.h>
21 24
22#include <asm/system.h> 25#include <asm/system.h>
23#include <asm/uaccess.h> 26#include <asm/uaccess.h>
@@ -101,12 +104,27 @@ void __init proc_init_inodecache(void)
101 init_once); 104 init_once);
102} 105}
103 106
107static int proc_show_options(struct seq_file *seq, struct dentry *root)
108{
109 struct super_block *sb = root->d_sb;
110 struct pid_namespace *pid = sb->s_fs_info;
111
112 if (pid->pid_gid)
113 seq_printf(seq, ",gid=%lu", (unsigned long)pid->pid_gid);
114 if (pid->hide_pid != 0)
115 seq_printf(seq, ",hidepid=%u", pid->hide_pid);
116
117 return 0;
118}
119
104static const struct super_operations proc_sops = { 120static const struct super_operations proc_sops = {
105 .alloc_inode = proc_alloc_inode, 121 .alloc_inode = proc_alloc_inode,
106 .destroy_inode = proc_destroy_inode, 122 .destroy_inode = proc_destroy_inode,
107 .drop_inode = generic_delete_inode, 123 .drop_inode = generic_delete_inode,
108 .evict_inode = proc_evict_inode, 124 .evict_inode = proc_evict_inode,
109 .statfs = simple_statfs, 125 .statfs = simple_statfs,
126 .remount_fs = proc_remount,
127 .show_options = proc_show_options,
110}; 128};
111 129
112static void __pde_users_dec(struct proc_dir_entry *pde) 130static void __pde_users_dec(struct proc_dir_entry *pde)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 7838e5cfec14..292577531ad1 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -117,6 +117,7 @@ void pde_put(struct proc_dir_entry *pde);
117 117
118int proc_fill_super(struct super_block *); 118int proc_fill_super(struct super_block *);
119struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); 119struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
120int proc_remount(struct super_block *sb, int *flags, char *data);
120 121
121/* 122/*
122 * These are generic /proc routines that use the internal 123 * These are generic /proc routines that use the internal
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 03102d978180..46a15d8a29ca 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,6 +18,7 @@
18#include <linux/bitops.h> 18#include <linux/bitops.h>
19#include <linux/mount.h> 19#include <linux/mount.h>
20#include <linux/pid_namespace.h> 20#include <linux/pid_namespace.h>
21#include <linux/parser.h>
21 22
22#include "internal.h" 23#include "internal.h"
23 24
@@ -36,6 +37,63 @@ static int proc_set_super(struct super_block *sb, void *data)
36 return err; 37 return err;
37} 38}
38 39
40enum {
41 Opt_gid, Opt_hidepid, Opt_err,
42};
43
44static const match_table_t tokens = {
45 {Opt_hidepid, "hidepid=%u"},
46 {Opt_gid, "gid=%u"},
47 {Opt_err, NULL},
48};
49
50static int proc_parse_options(char *options, struct pid_namespace *pid)
51{
52 char *p;
53 substring_t args[MAX_OPT_ARGS];
54 int option;
55
56 if (!options)
57 return 1;
58
59 while ((p = strsep(&options, ",")) != NULL) {
60 int token;
61 if (!*p)
62 continue;
63
64 args[0].to = args[0].from = 0;
65 token = match_token(p, tokens, args);
66 switch (token) {
67 case Opt_gid:
68 if (match_int(&args[0], &option))
69 return 0;
70 pid->pid_gid = option;
71 break;
72 case Opt_hidepid:
73 if (match_int(&args[0], &option))
74 return 0;
75 if (option < 0 || option > 2) {
76 pr_err("proc: hidepid value must be between 0 and 2.\n");
77 return 0;
78 }
79 pid->hide_pid = option;
80 break;
81 default:
82 pr_err("proc: unrecognized mount option \"%s\" "
83 "or missing value\n", p);
84 return 0;
85 }
86 }
87
88 return 1;
89}
90
91int proc_remount(struct super_block *sb, int *flags, char *data)
92{
93 struct pid_namespace *pid = sb->s_fs_info;
94 return !proc_parse_options(data, pid);
95}
96
39static struct dentry *proc_mount(struct file_system_type *fs_type, 97static struct dentry *proc_mount(struct file_system_type *fs_type,
40 int flags, const char *dev_name, void *data) 98 int flags, const char *dev_name, void *data)
41{ 99{
@@ -43,11 +101,15 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
43 struct super_block *sb; 101 struct super_block *sb;
44 struct pid_namespace *ns; 102 struct pid_namespace *ns;
45 struct proc_inode *ei; 103 struct proc_inode *ei;
104 char *options;
46 105
47 if (flags & MS_KERNMOUNT) 106 if (flags & MS_KERNMOUNT) {
48 ns = (struct pid_namespace *)data; 107 ns = (struct pid_namespace *)data;
49 else 108 options = NULL;
109 } else {
50 ns = current->nsproxy->pid_ns; 110 ns = current->nsproxy->pid_ns;
111 options = data;
112 }
51 113
52 sb = sget(fs_type, proc_test_super, proc_set_super, ns); 114 sb = sget(fs_type, proc_test_super, proc_set_super, ns);
53 if (IS_ERR(sb)) 115 if (IS_ERR(sb))
@@ -55,6 +117,10 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
55 117
56 if (!sb->s_root) { 118 if (!sb->s_root) {
57 sb->s_flags = flags; 119 sb->s_flags = flags;
120 if (!proc_parse_options(options, ns)) {
121 deactivate_locked_super(sb);
122 return ERR_PTR(-EINVAL);
123 }
58 err = proc_fill_super(sb); 124 err = proc_fill_super(sb);
59 if (err) { 125 if (err) {
60 deactivate_locked_super(sb); 126 deactivate_locked_super(sb);
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index a945cd265228..70de42f09f1d 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1364,10 +1364,7 @@ int reiserfs_init_bitmap_cache(struct super_block *sb)
1364 struct reiserfs_bitmap_info *bitmap; 1364 struct reiserfs_bitmap_info *bitmap;
1365 unsigned int bmap_nr = reiserfs_bmap_count(sb); 1365 unsigned int bmap_nr = reiserfs_bmap_count(sb);
1366 1366
1367 /* Avoid lock recursion in fault case */
1368 reiserfs_write_unlock(sb);
1369 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr); 1367 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
1370 reiserfs_write_lock(sb);
1371 if (bitmap == NULL) 1368 if (bitmap == NULL)
1372 return -ENOMEM; 1369 return -ENOMEM;
1373 1370
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index eb711060a6f2..c3cf54fd4de3 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2678,16 +2678,10 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2678 char b[BDEVNAME_SIZE]; 2678 char b[BDEVNAME_SIZE];
2679 int ret; 2679 int ret;
2680 2680
2681 /*
2682 * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS
2683 * dependency inversion warnings.
2684 */
2685 reiserfs_write_unlock(sb);
2686 journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal)); 2681 journal = SB_JOURNAL(sb) = vzalloc(sizeof(struct reiserfs_journal));
2687 if (!journal) { 2682 if (!journal) {
2688 reiserfs_warning(sb, "journal-1256", 2683 reiserfs_warning(sb, "journal-1256",
2689 "unable to get memory for journal structure"); 2684 "unable to get memory for journal structure");
2690 reiserfs_write_lock(sb);
2691 return 1; 2685 return 1;
2692 } 2686 }
2693 INIT_LIST_HEAD(&journal->j_bitmap_nodes); 2687 INIT_LIST_HEAD(&journal->j_bitmap_nodes);
@@ -2695,10 +2689,8 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2695 INIT_LIST_HEAD(&journal->j_working_list); 2689 INIT_LIST_HEAD(&journal->j_working_list);
2696 INIT_LIST_HEAD(&journal->j_journal_list); 2690 INIT_LIST_HEAD(&journal->j_journal_list);
2697 journal->j_persistent_trans = 0; 2691 journal->j_persistent_trans = 0;
2698 ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap, 2692 if (reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
2699 reiserfs_bmap_count(sb)); 2693 reiserfs_bmap_count(sb)))
2700 reiserfs_write_lock(sb);
2701 if (ret)
2702 goto free_and_return; 2694 goto free_and_return;
2703 2695
2704 allocate_bitmap_nodes(sb); 2696 allocate_bitmap_nodes(sb);
@@ -2727,27 +2719,11 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2727 goto free_and_return; 2719 goto free_and_return;
2728 } 2720 }
2729 2721
2730 /*
2731 * We need to unlock here to avoid creating the following
2732 * dependency:
2733 * reiserfs_lock -> sysfs_mutex
2734 * Because the reiserfs mmap path creates the following dependency:
2735 * mm->mmap -> reiserfs_lock, hence we have
2736 * mm->mmap -> reiserfs_lock ->sysfs_mutex
2737 * This would ends up in a circular dependency with sysfs readdir path
2738 * which does sysfs_mutex -> mm->mmap_sem
2739 * This is fine because the reiserfs lock is useless in mount path,
2740 * at least until we call journal_begin. We keep it for paranoid
2741 * reasons.
2742 */
2743 reiserfs_write_unlock(sb);
2744 if (journal_init_dev(sb, journal, j_dev_name) != 0) { 2722 if (journal_init_dev(sb, journal, j_dev_name) != 0) {
2745 reiserfs_write_lock(sb);
2746 reiserfs_warning(sb, "sh-462", 2723 reiserfs_warning(sb, "sh-462",
2747 "unable to initialize jornal device"); 2724 "unable to initialize jornal device");
2748 goto free_and_return; 2725 goto free_and_return;
2749 } 2726 }
2750 reiserfs_write_lock(sb);
2751 2727
2752 rs = SB_DISK_SUPER_BLOCK(sb); 2728 rs = SB_DISK_SUPER_BLOCK(sb);
2753 2729
@@ -2829,9 +2805,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2829 journal->j_mount_id = 10; 2805 journal->j_mount_id = 10;
2830 journal->j_state = 0; 2806 journal->j_state = 0;
2831 atomic_set(&(journal->j_jlock), 0); 2807 atomic_set(&(journal->j_jlock), 0);
2832 reiserfs_write_unlock(sb);
2833 journal->j_cnode_free_list = allocate_cnodes(num_cnodes); 2808 journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
2834 reiserfs_write_lock(sb);
2835 journal->j_cnode_free_orig = journal->j_cnode_free_list; 2809 journal->j_cnode_free_orig = journal->j_cnode_free_list;
2836 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; 2810 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
2837 journal->j_cnode_used = 0; 2811 journal->j_cnode_used = 0;
@@ -2848,24 +2822,37 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2848 2822
2849 init_journal_hash(sb); 2823 init_journal_hash(sb);
2850 jl = journal->j_current_jl; 2824 jl = journal->j_current_jl;
2825
2826 /*
2827 * get_list_bitmap() may call flush_commit_list() which
2828 * requires the lock. Calling flush_commit_list() shouldn't happen
2829 * this early but I like to be paranoid.
2830 */
2831 reiserfs_write_lock(sb);
2851 jl->j_list_bitmap = get_list_bitmap(sb, jl); 2832 jl->j_list_bitmap = get_list_bitmap(sb, jl);
2833 reiserfs_write_unlock(sb);
2852 if (!jl->j_list_bitmap) { 2834 if (!jl->j_list_bitmap) {
2853 reiserfs_warning(sb, "journal-2005", 2835 reiserfs_warning(sb, "journal-2005",
2854 "get_list_bitmap failed for journal list 0"); 2836 "get_list_bitmap failed for journal list 0");
2855 goto free_and_return; 2837 goto free_and_return;
2856 } 2838 }
2857 if (journal_read(sb) < 0) { 2839
2840 /*
2841 * Journal_read needs to be inspected in order to push down
2842 * the lock further inside (or even remove it).
2843 */
2844 reiserfs_write_lock(sb);
2845 ret = journal_read(sb);
2846 reiserfs_write_unlock(sb);
2847 if (ret < 0) {
2858 reiserfs_warning(sb, "reiserfs-2006", 2848 reiserfs_warning(sb, "reiserfs-2006",
2859 "Replay Failure, unable to mount"); 2849 "Replay Failure, unable to mount");
2860 goto free_and_return; 2850 goto free_and_return;
2861 } 2851 }
2862 2852
2863 reiserfs_mounted_fs_count++; 2853 reiserfs_mounted_fs_count++;
2864 if (reiserfs_mounted_fs_count <= 1) { 2854 if (reiserfs_mounted_fs_count <= 1)
2865 reiserfs_write_unlock(sb);
2866 commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0); 2855 commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0);
2867 reiserfs_write_lock(sb);
2868 }
2869 2856
2870 INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); 2857 INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
2871 journal->j_work_sb = sb; 2858 journal->j_work_sb = sb;
@@ -2896,14 +2883,13 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2896 journal->j_cnode_free < (journal->j_trans_max * 3)) { 2883 journal->j_cnode_free < (journal->j_trans_max * 3)) {
2897 return 1; 2884 return 1;
2898 } 2885 }
2899 /* protected by the BKL here */ 2886
2900 journal->j_len_alloc += new_alloc; 2887 journal->j_len_alloc += new_alloc;
2901 th->t_blocks_allocated += new_alloc ; 2888 th->t_blocks_allocated += new_alloc ;
2902 return 0; 2889 return 0;
2903} 2890}
2904 2891
2905/* this must be called inside a transaction, and requires the 2892/* this must be called inside a transaction
2906** kernel_lock to be held
2907*/ 2893*/
2908void reiserfs_block_writes(struct reiserfs_transaction_handle *th) 2894void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2909{ 2895{
@@ -2914,8 +2900,7 @@ void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2914 return; 2900 return;
2915} 2901}
2916 2902
2917/* this must be called without a transaction started, and does not 2903/* this must be called without a transaction started
2918** require BKL
2919*/ 2904*/
2920void reiserfs_allow_writes(struct super_block *s) 2905void reiserfs_allow_writes(struct super_block *s)
2921{ 2906{
@@ -2924,8 +2909,7 @@ void reiserfs_allow_writes(struct super_block *s)
2924 wake_up(&journal->j_join_wait); 2909 wake_up(&journal->j_join_wait);
2925} 2910}
2926 2911
2927/* this must be called without a transaction started, and does not 2912/* this must be called without a transaction started
2928** require BKL
2929*/ 2913*/
2930void reiserfs_wait_on_write_block(struct super_block *s) 2914void reiserfs_wait_on_write_block(struct super_block *s)
2931{ 2915{
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1d42e707d5fa..e12d8b97cd4d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1519,9 +1519,7 @@ static int read_super_block(struct super_block *s, int offset)
1519static int reread_meta_blocks(struct super_block *s) 1519static int reread_meta_blocks(struct super_block *s)
1520{ 1520{
1521 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); 1521 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
1522 reiserfs_write_unlock(s);
1523 wait_on_buffer(SB_BUFFER_WITH_SB(s)); 1522 wait_on_buffer(SB_BUFFER_WITH_SB(s));
1524 reiserfs_write_lock(s);
1525 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { 1523 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
1526 reiserfs_warning(s, "reiserfs-2504", "error reading the super"); 1524 reiserfs_warning(s, "reiserfs-2504", "error reading the super");
1527 return 1; 1525 return 1;
@@ -1746,22 +1744,11 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1746 mutex_init(&REISERFS_SB(s)->lock); 1744 mutex_init(&REISERFS_SB(s)->lock);
1747 REISERFS_SB(s)->lock_depth = -1; 1745 REISERFS_SB(s)->lock_depth = -1;
1748 1746
1749 /*
1750 * This function is called with the bkl, which also was the old
1751 * locking used here.
1752 * do_journal_begin() will soon check if we hold the lock (ie: was the
1753 * bkl). This is likely because do_journal_begin() has several another
1754 * callers because at this time, it doesn't seem to be necessary to
1755 * protect against anything.
1756 * Anyway, let's be conservative and lock for now.
1757 */
1758 reiserfs_write_lock(s);
1759
1760 jdev_name = NULL; 1747 jdev_name = NULL;
1761 if (reiserfs_parse_options 1748 if (reiserfs_parse_options
1762 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, 1749 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
1763 &commit_max_age, qf_names, &qfmt) == 0) { 1750 &commit_max_age, qf_names, &qfmt) == 0) {
1764 goto error; 1751 goto error_unlocked;
1765 } 1752 }
1766 if (jdev_name && jdev_name[0]) { 1753 if (jdev_name && jdev_name[0]) {
1767 REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL); 1754 REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL);
@@ -1777,7 +1764,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1777 1764
1778 if (blocks) { 1765 if (blocks) {
1779 SWARN(silent, s, "jmacd-7", "resize option for remount only"); 1766 SWARN(silent, s, "jmacd-7", "resize option for remount only");
1780 goto error; 1767 goto error_unlocked;
1781 } 1768 }
1782 1769
1783 /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */ 1770 /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */
@@ -1787,7 +1774,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1787 else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { 1774 else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) {
1788 SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", 1775 SWARN(silent, s, "sh-2021", "can not find reiserfs on %s",
1789 reiserfs_bdevname(s)); 1776 reiserfs_bdevname(s));
1790 goto error; 1777 goto error_unlocked;
1791 } 1778 }
1792 1779
1793 rs = SB_DISK_SUPER_BLOCK(s); 1780 rs = SB_DISK_SUPER_BLOCK(s);
@@ -1803,7 +1790,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1803 "or increase size of your LVM partition"); 1790 "or increase size of your LVM partition");
1804 SWARN(silent, s, "", "Or may be you forgot to " 1791 SWARN(silent, s, "", "Or may be you forgot to "
1805 "reboot after fdisk when it told you to"); 1792 "reboot after fdisk when it told you to");
1806 goto error; 1793 goto error_unlocked;
1807 } 1794 }
1808 1795
1809 sbi->s_mount_state = SB_REISERFS_STATE(s); 1796 sbi->s_mount_state = SB_REISERFS_STATE(s);
@@ -1811,8 +1798,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1811 1798
1812 if ((errval = reiserfs_init_bitmap_cache(s))) { 1799 if ((errval = reiserfs_init_bitmap_cache(s))) {
1813 SWARN(silent, s, "jmacd-8", "unable to read bitmap"); 1800 SWARN(silent, s, "jmacd-8", "unable to read bitmap");
1814 goto error; 1801 goto error_unlocked;
1815 } 1802 }
1803
1816 errval = -EINVAL; 1804 errval = -EINVAL;
1817#ifdef CONFIG_REISERFS_CHECK 1805#ifdef CONFIG_REISERFS_CHECK
1818 SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON"); 1806 SWARN(silent, s, "", "CONFIG_REISERFS_CHECK is set ON");
@@ -1835,24 +1823,26 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1835 if (reiserfs_barrier_flush(s)) { 1823 if (reiserfs_barrier_flush(s)) {
1836 printk("reiserfs: using flush barriers\n"); 1824 printk("reiserfs: using flush barriers\n");
1837 } 1825 }
1826
1838 // set_device_ro(s->s_dev, 1) ; 1827 // set_device_ro(s->s_dev, 1) ;
1839 if (journal_init(s, jdev_name, old_format, commit_max_age)) { 1828 if (journal_init(s, jdev_name, old_format, commit_max_age)) {
1840 SWARN(silent, s, "sh-2022", 1829 SWARN(silent, s, "sh-2022",
1841 "unable to initialize journal space"); 1830 "unable to initialize journal space");
1842 goto error; 1831 goto error_unlocked;
1843 } else { 1832 } else {
1844 jinit_done = 1; /* once this is set, journal_release must be called 1833 jinit_done = 1; /* once this is set, journal_release must be called
1845 ** if we error out of the mount 1834 ** if we error out of the mount
1846 */ 1835 */
1847 } 1836 }
1837
1848 if (reread_meta_blocks(s)) { 1838 if (reread_meta_blocks(s)) {
1849 SWARN(silent, s, "jmacd-9", 1839 SWARN(silent, s, "jmacd-9",
1850 "unable to reread meta blocks after journal init"); 1840 "unable to reread meta blocks after journal init");
1851 goto error; 1841 goto error_unlocked;
1852 } 1842 }
1853 1843
1854 if (replay_only(s)) 1844 if (replay_only(s))
1855 goto error; 1845 goto error_unlocked;
1856 1846
1857 if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) { 1847 if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) {
1858 SWARN(silent, s, "clm-7000", 1848 SWARN(silent, s, "clm-7000",
@@ -1866,9 +1856,19 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1866 reiserfs_init_locked_inode, (void *)(&args)); 1856 reiserfs_init_locked_inode, (void *)(&args));
1867 if (!root_inode) { 1857 if (!root_inode) {
1868 SWARN(silent, s, "jmacd-10", "get root inode failed"); 1858 SWARN(silent, s, "jmacd-10", "get root inode failed");
1869 goto error; 1859 goto error_unlocked;
1870 } 1860 }
1871 1861
1862 /*
1863 * This path assumed to be called with the BKL in the old times.
1864 * Now we have inherited the big reiserfs lock from it and many
1865 * reiserfs helpers called in the mount path and elsewhere require
1866 * this lock to be held even if it's not always necessary. Let's be
1867 * conservative and hold it early. The window can be reduced after
1868 * careful review of the code.
1869 */
1870 reiserfs_write_lock(s);
1871
1872 if (root_inode->i_state & I_NEW) { 1872 if (root_inode->i_state & I_NEW) {
1873 reiserfs_read_locked_inode(root_inode, &args); 1873 reiserfs_read_locked_inode(root_inode, &args);
1874 unlock_new_inode(root_inode); 1874 unlock_new_inode(root_inode);
@@ -1995,12 +1995,16 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1995 return (0); 1995 return (0);
1996 1996
1997error: 1997error:
1998 if (jinit_done) { /* kill the commit thread, free journal ram */ 1998 reiserfs_write_unlock(s);
1999
2000error_unlocked:
2001 /* kill the commit thread, free journal ram */
2002 if (jinit_done) {
2003 reiserfs_write_lock(s);
1999 journal_release_error(NULL, s); 2004 journal_release_error(NULL, s);
2005 reiserfs_write_unlock(s);
2000 } 2006 }
2001 2007
2002 reiserfs_write_unlock(s);
2003
2004 reiserfs_free_bitmap_cache(s); 2008 reiserfs_free_bitmap_cache(s);
2005 if (SB_BUFFER_WITH_SB(s)) 2009 if (SB_BUFFER_WITH_SB(s))
2006 brelse(SB_BUFFER_WITH_SB(s)); 2010 brelse(SB_BUFFER_WITH_SB(s));