aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-09-24 03:56:20 -0400
committerIngo Molnar <mingo@elte.hu>2008-09-24 03:56:20 -0400
commitebdd90a8cb2e3963f55499850f02ce6003558b55 (patch)
treed153f917ed41d257ddafa22f9cc2201bfddf8f9c /fs/proc
parent3c9339049df5cc3a468c11de6c4101a1ea8c3d83 (diff)
parent72d31053f62c4bc464c2783974926969614a8649 (diff)
Merge commit 'v2.6.27-rc7' into x86/pebs
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/Kconfig59
-rw-r--r--fs/proc/array.c68
-rw-r--r--fs/proc/base.c92
-rw-r--r--fs/proc/generic.c51
-rw-r--r--fs/proc/inode.c88
-rw-r--r--fs/proc/internal.h8
-rw-r--r--fs/proc/kcore.c10
-rw-r--r--fs/proc/kmsg.c2
-rw-r--r--fs/proc/nommu.c4
-rw-r--r--fs/proc/proc_misc.c7
-rw-r--r--fs/proc/proc_sysctl.c429
-rw-r--r--fs/proc/task_mmu.c4
12 files changed, 463 insertions, 359 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
new file mode 100644
index 000000000000..73cd7a418f06
--- /dev/null
+++ b/fs/proc/Kconfig
@@ -0,0 +1,59 @@
1config PROC_FS
2 bool "/proc file system support" if EMBEDDED
3 default y
4 help
5 This is a virtual file system providing information about the status
6 of the system. "Virtual" means that it doesn't take up any space on
7 your hard disk: the files are created on the fly by the kernel when
8 you try to access them. Also, you cannot read the files with older
9 version of the program less: you need to use more or cat.
10
11 It's totally cool; for example, "cat /proc/interrupts" gives
12 information about what the different IRQs are used for at the moment
13 (there is a small number of Interrupt ReQuest lines in your computer
14 that are used by the attached devices to gain the CPU's attention --
15 often a source of trouble if two devices are mistakenly configured
16 to use the same IRQ). The program procinfo to display some
17 information about your system gathered from the /proc file system.
18
19 Before you can use the /proc file system, it has to be mounted,
20 meaning it has to be given a location in the directory hierarchy.
21 That location should be /proc. A command such as "mount -t proc proc
22 /proc" or the equivalent line in /etc/fstab does the job.
23
24 The /proc file system is explained in the file
25 <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
26 ("man 5 proc").
27
28 This option will enlarge your kernel by about 67 KB. Several
29 programs depend on this, so everyone should say Y here.
30
31config PROC_KCORE
32 bool "/proc/kcore support" if !ARM
33 depends on PROC_FS && MMU
34
35config PROC_VMCORE
36 bool "/proc/vmcore support (EXPERIMENTAL)"
37 depends on PROC_FS && CRASH_DUMP
38 default y
39 help
40 Exports the dump image of crashed kernel in ELF format.
41
42config PROC_SYSCTL
43 bool "Sysctl support (/proc/sys)" if EMBEDDED
44 depends on PROC_FS
45 select SYSCTL
46 default y
47 ---help---
48 The sysctl interface provides a means of dynamically changing
49 certain kernel parameters and variables on the fly without requiring
50 a recompile of the kernel or reboot of the system. The primary
51 interface is through /proc/sys. If you say Y here a tree of
52 modifiable sysctl entries will be generated beneath the
53 /proc/sys directory. They are explained in the files
54 in <file:Documentation/sysctl/>. Note that enabling this
55 option will enlarge the kernel by at least 8 KB.
56
57 As it is generally a good thing, you should say Y here unless
58 building a kernel for install/rescue disks or your system is very
59 limited in memory.
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 797d775e0354..71c9be59c9c2 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -80,6 +80,7 @@
80#include <linux/delayacct.h> 80#include <linux/delayacct.h>
81#include <linux/seq_file.h> 81#include <linux/seq_file.h>
82#include <linux/pid_namespace.h> 82#include <linux/pid_namespace.h>
83#include <linux/tracehook.h>
83 84
84#include <asm/pgtable.h> 85#include <asm/pgtable.h>
85#include <asm/processor.h> 86#include <asm/processor.h>
@@ -168,8 +169,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
168 rcu_read_lock(); 169 rcu_read_lock();
169 ppid = pid_alive(p) ? 170 ppid = pid_alive(p) ?
170 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; 171 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
171 tpid = pid_alive(p) && p->ptrace ? 172 tpid = 0;
172 task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0; 173 if (pid_alive(p)) {
174 struct task_struct *tracer = tracehook_tracer_task(p);
175 if (tracer)
176 tpid = task_pid_nr_ns(tracer, ns);
177 }
173 seq_printf(m, 178 seq_printf(m,
174 "State:\t%s\n" 179 "State:\t%s\n"
175 "Tgid:\t%d\n" 180 "Tgid:\t%d\n"
@@ -332,65 +337,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
332 return 0; 337 return 0;
333} 338}
334 339
335/*
336 * Use precise platform statistics if available:
337 */
338#ifdef CONFIG_VIRT_CPU_ACCOUNTING
339static cputime_t task_utime(struct task_struct *p)
340{
341 return p->utime;
342}
343
344static cputime_t task_stime(struct task_struct *p)
345{
346 return p->stime;
347}
348#else
349static cputime_t task_utime(struct task_struct *p)
350{
351 clock_t utime = cputime_to_clock_t(p->utime),
352 total = utime + cputime_to_clock_t(p->stime);
353 u64 temp;
354
355 /*
356 * Use CFS's precise accounting:
357 */
358 temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
359
360 if (total) {
361 temp *= utime;
362 do_div(temp, total);
363 }
364 utime = (clock_t)temp;
365
366 p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
367 return p->prev_utime;
368}
369
370static cputime_t task_stime(struct task_struct *p)
371{
372 clock_t stime;
373
374 /*
375 * Use CFS's precise accounting. (we subtract utime from
376 * the total, to make sure the total observed by userspace
377 * grows monotonically - apps rely on that):
378 */
379 stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
380 cputime_to_clock_t(task_utime(p));
381
382 if (stime >= 0)
383 p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
384
385 return p->prev_stime;
386}
387#endif
388
389static cputime_t task_gtime(struct task_struct *p)
390{
391 return p->gtime;
392}
393
394static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, 340static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
395 struct pid *pid, struct task_struct *task, int whole) 341 struct pid *pid, struct task_struct *task, int whole)
396{ 342{
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58c3e6a8e15e..a28840b11b89 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -53,6 +53,7 @@
53#include <linux/time.h> 53#include <linux/time.h>
54#include <linux/proc_fs.h> 54#include <linux/proc_fs.h>
55#include <linux/stat.h> 55#include <linux/stat.h>
56#include <linux/task_io_accounting_ops.h>
56#include <linux/init.h> 57#include <linux/init.h>
57#include <linux/capability.h> 58#include <linux/capability.h>
58#include <linux/file.h> 59#include <linux/file.h>
@@ -69,6 +70,7 @@
69#include <linux/mount.h> 70#include <linux/mount.h>
70#include <linux/security.h> 71#include <linux/security.h>
71#include <linux/ptrace.h> 72#include <linux/ptrace.h>
73#include <linux/tracehook.h>
72#include <linux/cgroup.h> 74#include <linux/cgroup.h>
73#include <linux/cpuset.h> 75#include <linux/cpuset.h>
74#include <linux/audit.h> 76#include <linux/audit.h>
@@ -231,10 +233,14 @@ static int check_mem_permission(struct task_struct *task)
231 * If current is actively ptrace'ing, and would also be 233 * If current is actively ptrace'ing, and would also be
232 * permitted to freshly attach with ptrace now, permit it. 234 * permitted to freshly attach with ptrace now, permit it.
233 */ 235 */
234 if (task->parent == current && (task->ptrace & PT_PTRACED) && 236 if (task_is_stopped_or_traced(task)) {
235 task_is_stopped_or_traced(task) && 237 int match;
236 ptrace_may_access(task, PTRACE_MODE_ATTACH)) 238 rcu_read_lock();
237 return 0; 239 match = (tracehook_tracer_task(task) == current);
240 rcu_read_unlock();
241 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
242 return 0;
243 }
238 244
239 /* 245 /*
240 * Noone else is allowed. 246 * Noone else is allowed.
@@ -504,6 +510,26 @@ static int proc_pid_limits(struct task_struct *task, char *buffer)
504 return count; 510 return count;
505} 511}
506 512
513#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
514static int proc_pid_syscall(struct task_struct *task, char *buffer)
515{
516 long nr;
517 unsigned long args[6], sp, pc;
518
519 if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
520 return sprintf(buffer, "running\n");
521
522 if (nr < 0)
523 return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
524
525 return sprintf(buffer,
526 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
527 nr,
528 args[0], args[1], args[2], args[3], args[4], args[5],
529 sp, pc);
530}
531#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
532
507/************************************************************************/ 533/************************************************************************/
508/* Here the fs part begins */ 534/* Here the fs part begins */
509/************************************************************************/ 535/************************************************************************/
@@ -1834,8 +1860,7 @@ static const struct file_operations proc_fd_operations = {
1834 * /proc/pid/fd needs a special permission handler so that a process can still 1860 * /proc/pid/fd needs a special permission handler so that a process can still
1835 * access /proc/self/fd after it has executed a setuid(). 1861 * access /proc/self/fd after it has executed a setuid().
1836 */ 1862 */
1837static int proc_fd_permission(struct inode *inode, int mask, 1863static int proc_fd_permission(struct inode *inode, int mask)
1838 struct nameidata *nd)
1839{ 1864{
1840 int rv; 1865 int rv;
1841 1866
@@ -2376,29 +2401,47 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
2376} 2401}
2377 2402
2378#ifdef CONFIG_TASK_IO_ACCOUNTING 2403#ifdef CONFIG_TASK_IO_ACCOUNTING
2379static int proc_pid_io_accounting(struct task_struct *task, char *buffer) 2404static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2380{ 2405{
2406 struct task_io_accounting acct = task->ioac;
2407 unsigned long flags;
2408
2409 if (whole && lock_task_sighand(task, &flags)) {
2410 struct task_struct *t = task;
2411
2412 task_io_accounting_add(&acct, &task->signal->ioac);
2413 while_each_thread(task, t)
2414 task_io_accounting_add(&acct, &t->ioac);
2415
2416 unlock_task_sighand(task, &flags);
2417 }
2381 return sprintf(buffer, 2418 return sprintf(buffer,
2382#ifdef CONFIG_TASK_XACCT
2383 "rchar: %llu\n" 2419 "rchar: %llu\n"
2384 "wchar: %llu\n" 2420 "wchar: %llu\n"
2385 "syscr: %llu\n" 2421 "syscr: %llu\n"
2386 "syscw: %llu\n" 2422 "syscw: %llu\n"
2387#endif
2388 "read_bytes: %llu\n" 2423 "read_bytes: %llu\n"
2389 "write_bytes: %llu\n" 2424 "write_bytes: %llu\n"
2390 "cancelled_write_bytes: %llu\n", 2425 "cancelled_write_bytes: %llu\n",
2391#ifdef CONFIG_TASK_XACCT 2426 (unsigned long long)acct.rchar,
2392 (unsigned long long)task->rchar, 2427 (unsigned long long)acct.wchar,
2393 (unsigned long long)task->wchar, 2428 (unsigned long long)acct.syscr,
2394 (unsigned long long)task->syscr, 2429 (unsigned long long)acct.syscw,
2395 (unsigned long long)task->syscw, 2430 (unsigned long long)acct.read_bytes,
2396#endif 2431 (unsigned long long)acct.write_bytes,
2397 (unsigned long long)task->ioac.read_bytes, 2432 (unsigned long long)acct.cancelled_write_bytes);
2398 (unsigned long long)task->ioac.write_bytes, 2433}
2399 (unsigned long long)task->ioac.cancelled_write_bytes); 2434
2435static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
2436{
2437 return do_io_accounting(task, buffer, 0);
2400} 2438}
2401#endif 2439
2440static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2441{
2442 return do_io_accounting(task, buffer, 1);
2443}
2444#endif /* CONFIG_TASK_IO_ACCOUNTING */
2402 2445
2403/* 2446/*
2404 * Thread groups 2447 * Thread groups
@@ -2420,6 +2463,9 @@ static const struct pid_entry tgid_base_stuff[] = {
2420#ifdef CONFIG_SCHED_DEBUG 2463#ifdef CONFIG_SCHED_DEBUG
2421 REG("sched", S_IRUGO|S_IWUSR, pid_sched), 2464 REG("sched", S_IRUGO|S_IWUSR, pid_sched),
2422#endif 2465#endif
2466#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2467 INF("syscall", S_IRUSR, pid_syscall),
2468#endif
2423 INF("cmdline", S_IRUGO, pid_cmdline), 2469 INF("cmdline", S_IRUGO, pid_cmdline),
2424 ONE("stat", S_IRUGO, tgid_stat), 2470 ONE("stat", S_IRUGO, tgid_stat),
2425 ONE("statm", S_IRUGO, pid_statm), 2471 ONE("statm", S_IRUGO, pid_statm),
@@ -2470,7 +2516,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2470 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), 2516 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
2471#endif 2517#endif
2472#ifdef CONFIG_TASK_IO_ACCOUNTING 2518#ifdef CONFIG_TASK_IO_ACCOUNTING
2473 INF("io", S_IRUGO, pid_io_accounting), 2519 INF("io", S_IRUGO, tgid_io_accounting),
2474#endif 2520#endif
2475}; 2521};
2476 2522
@@ -2752,6 +2798,9 @@ static const struct pid_entry tid_base_stuff[] = {
2752#ifdef CONFIG_SCHED_DEBUG 2798#ifdef CONFIG_SCHED_DEBUG
2753 REG("sched", S_IRUGO|S_IWUSR, pid_sched), 2799 REG("sched", S_IRUGO|S_IWUSR, pid_sched),
2754#endif 2800#endif
2801#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2802 INF("syscall", S_IRUSR, pid_syscall),
2803#endif
2755 INF("cmdline", S_IRUGO, pid_cmdline), 2804 INF("cmdline", S_IRUGO, pid_cmdline),
2756 ONE("stat", S_IRUGO, tid_stat), 2805 ONE("stat", S_IRUGO, tid_stat),
2757 ONE("statm", S_IRUGO, pid_statm), 2806 ONE("statm", S_IRUGO, pid_statm),
@@ -2797,6 +2846,9 @@ static const struct pid_entry tid_base_stuff[] = {
2797#ifdef CONFIG_FAULT_INJECTION 2846#ifdef CONFIG_FAULT_INJECTION
2798 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), 2847 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
2799#endif 2848#endif
2849#ifdef CONFIG_TASK_IO_ACCOUNTING
2850 INF("io", S_IRUGO, tid_io_accounting),
2851#endif
2800}; 2852};
2801 2853
2802static int proc_tid_base_readdir(struct file * filp, 2854static int proc_tid_base_readdir(struct file * filp,
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 43e54e86cefd..7821589a17d5 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -300,10 +300,10 @@ out:
300 return rtn; 300 return rtn;
301} 301}
302 302
303static DEFINE_IDR(proc_inum_idr); 303static DEFINE_IDA(proc_inum_ida);
304static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ 304static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
305 305
306#define PROC_DYNAMIC_FIRST 0xF0000000UL 306#define PROC_DYNAMIC_FIRST 0xF0000000U
307 307
308/* 308/*
309 * Return an inode number between PROC_DYNAMIC_FIRST and 309 * Return an inode number between PROC_DYNAMIC_FIRST and
@@ -311,36 +311,34 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
311 */ 311 */
312static unsigned int get_inode_number(void) 312static unsigned int get_inode_number(void)
313{ 313{
314 int i, inum = 0; 314 unsigned int i;
315 int error; 315 int error;
316 316
317retry: 317retry:
318 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) 318 if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
319 return 0; 319 return 0;
320 320
321 spin_lock(&proc_inum_lock); 321 spin_lock(&proc_inum_lock);
322 error = idr_get_new(&proc_inum_idr, NULL, &i); 322 error = ida_get_new(&proc_inum_ida, &i);
323 spin_unlock(&proc_inum_lock); 323 spin_unlock(&proc_inum_lock);
324 if (error == -EAGAIN) 324 if (error == -EAGAIN)
325 goto retry; 325 goto retry;
326 else if (error) 326 else if (error)
327 return 0; 327 return 0;
328 328
329 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; 329 if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
330 330 spin_lock(&proc_inum_lock);
331 /* inum will never be more than 0xf0ffffff, so no check 331 ida_remove(&proc_inum_ida, i);
332 * for overflow. 332 spin_unlock(&proc_inum_lock);
333 */ 333 return 0;
334 334 }
335 return inum; 335 return PROC_DYNAMIC_FIRST + i;
336} 336}
337 337
338static void release_inode_number(unsigned int inum) 338static void release_inode_number(unsigned int inum)
339{ 339{
340 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK;
341
342 spin_lock(&proc_inum_lock); 340 spin_lock(&proc_inum_lock);
343 idr_remove(&proc_inum_idr, id); 341 ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
344 spin_unlock(&proc_inum_lock); 342 spin_unlock(&proc_inum_lock);
345} 343}
346 344
@@ -549,8 +547,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
549 547
550 for (tmp = dir->subdir; tmp; tmp = tmp->next) 548 for (tmp = dir->subdir; tmp; tmp = tmp->next)
551 if (strcmp(tmp->name, dp->name) == 0) { 549 if (strcmp(tmp->name, dp->name) == 0) {
552 printk(KERN_WARNING "proc_dir_entry '%s' already " 550 printk(KERN_WARNING "proc_dir_entry '%s/%s' already registered\n",
553 "registered\n", dp->name); 551 dir->name, dp->name);
554 dump_stack(); 552 dump_stack();
555 break; 553 break;
556 } 554 }
@@ -597,6 +595,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
597 ent->pde_users = 0; 595 ent->pde_users = 0;
598 spin_lock_init(&ent->pde_unload_lock); 596 spin_lock_init(&ent->pde_unload_lock);
599 ent->pde_unload_completion = NULL; 597 ent->pde_unload_completion = NULL;
598 INIT_LIST_HEAD(&ent->pde_openers);
600 out: 599 out:
601 return ent; 600 return ent;
602} 601}
@@ -789,15 +788,25 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
789 spin_unlock(&de->pde_unload_lock); 788 spin_unlock(&de->pde_unload_lock);
790 789
791continue_removing: 790continue_removing:
791 spin_lock(&de->pde_unload_lock);
792 while (!list_empty(&de->pde_openers)) {
793 struct pde_opener *pdeo;
794
795 pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
796 list_del(&pdeo->lh);
797 spin_unlock(&de->pde_unload_lock);
798 pdeo->release(pdeo->inode, pdeo->file);
799 kfree(pdeo);
800 spin_lock(&de->pde_unload_lock);
801 }
802 spin_unlock(&de->pde_unload_lock);
803
792 if (S_ISDIR(de->mode)) 804 if (S_ISDIR(de->mode))
793 parent->nlink--; 805 parent->nlink--;
794 de->nlink = 0; 806 de->nlink = 0;
795 if (de->subdir) { 807 WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory "
796 printk(KERN_WARNING "%s: removing non-empty directory "
797 "'%s/%s', leaking at least '%s'\n", __func__, 808 "'%s/%s', leaking at least '%s'\n", __func__,
798 de->parent->name, de->name, de->subdir->name); 809 de->parent->name, de->name, de->subdir->name);
799 WARN_ON(1);
800 }
801 if (atomic_dec_and_test(&de->count)) 810 if (atomic_dec_and_test(&de->count))
802 free_proc_entry(de); 811 free_proc_entry(de);
803} 812}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b08d10017911..8bb03f056c28 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -17,6 +17,7 @@
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
20#include <linux/sysctl.h>
20 21
21#include <asm/system.h> 22#include <asm/system.h>
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
@@ -65,6 +66,8 @@ static void proc_delete_inode(struct inode *inode)
65 module_put(de->owner); 66 module_put(de->owner);
66 de_put(de); 67 de_put(de);
67 } 68 }
69 if (PROC_I(inode)->sysctl)
70 sysctl_head_put(PROC_I(inode)->sysctl);
68 clear_inode(inode); 71 clear_inode(inode);
69} 72}
70 73
@@ -84,6 +87,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
84 ei->fd = 0; 87 ei->fd = 0;
85 ei->op.proc_get_link = NULL; 88 ei->op.proc_get_link = NULL;
86 ei->pde = NULL; 89 ei->pde = NULL;
90 ei->sysctl = NULL;
91 ei->sysctl_entry = NULL;
87 inode = &ei->vfs_inode; 92 inode = &ei->vfs_inode;
88 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 93 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
89 return inode; 94 return inode;
@@ -94,7 +99,7 @@ static void proc_destroy_inode(struct inode *inode)
94 kmem_cache_free(proc_inode_cachep, PROC_I(inode)); 99 kmem_cache_free(proc_inode_cachep, PROC_I(inode));
95} 100}
96 101
97static void init_once(struct kmem_cache * cachep, void *foo) 102static void init_once(void *foo)
98{ 103{
99 struct proc_inode *ei = (struct proc_inode *) foo; 104 struct proc_inode *ei = (struct proc_inode *) foo;
100 105
@@ -111,27 +116,25 @@ int __init proc_init_inodecache(void)
111 return 0; 116 return 0;
112} 117}
113 118
114static int proc_remount(struct super_block *sb, int *flags, char *data)
115{
116 *flags |= MS_NODIRATIME;
117 return 0;
118}
119
120static const struct super_operations proc_sops = { 119static const struct super_operations proc_sops = {
121 .alloc_inode = proc_alloc_inode, 120 .alloc_inode = proc_alloc_inode,
122 .destroy_inode = proc_destroy_inode, 121 .destroy_inode = proc_destroy_inode,
123 .drop_inode = generic_delete_inode, 122 .drop_inode = generic_delete_inode,
124 .delete_inode = proc_delete_inode, 123 .delete_inode = proc_delete_inode,
125 .statfs = simple_statfs, 124 .statfs = simple_statfs,
126 .remount_fs = proc_remount,
127}; 125};
128 126
129static void pde_users_dec(struct proc_dir_entry *pde) 127static void __pde_users_dec(struct proc_dir_entry *pde)
130{ 128{
131 spin_lock(&pde->pde_unload_lock);
132 pde->pde_users--; 129 pde->pde_users--;
133 if (pde->pde_unload_completion && pde->pde_users == 0) 130 if (pde->pde_unload_completion && pde->pde_users == 0)
134 complete(pde->pde_unload_completion); 131 complete(pde->pde_unload_completion);
132}
133
134static void pde_users_dec(struct proc_dir_entry *pde)
135{
136 spin_lock(&pde->pde_unload_lock);
137 __pde_users_dec(pde);
135 spin_unlock(&pde->pde_unload_lock); 138 spin_unlock(&pde->pde_unload_lock);
136} 139}
137 140
@@ -318,36 +321,97 @@ static int proc_reg_open(struct inode *inode, struct file *file)
318 struct proc_dir_entry *pde = PDE(inode); 321 struct proc_dir_entry *pde = PDE(inode);
319 int rv = 0; 322 int rv = 0;
320 int (*open)(struct inode *, struct file *); 323 int (*open)(struct inode *, struct file *);
324 int (*release)(struct inode *, struct file *);
325 struct pde_opener *pdeo;
326
327 /*
328 * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
329 * sequence. ->release won't be called because ->proc_fops will be
330 * cleared. Depending on complexity of ->release, consequences vary.
331 *
332 * We can't wait for mercy when close will be done for real, it's
333 * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
334 * by hand in remove_proc_entry(). For this, save opener's credentials
335 * for later.
336 */
337 pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
338 if (!pdeo)
339 return -ENOMEM;
321 340
322 spin_lock(&pde->pde_unload_lock); 341 spin_lock(&pde->pde_unload_lock);
323 if (!pde->proc_fops) { 342 if (!pde->proc_fops) {
324 spin_unlock(&pde->pde_unload_lock); 343 spin_unlock(&pde->pde_unload_lock);
344 kfree(pdeo);
325 return rv; 345 return rv;
326 } 346 }
327 pde->pde_users++; 347 pde->pde_users++;
328 open = pde->proc_fops->open; 348 open = pde->proc_fops->open;
349 release = pde->proc_fops->release;
329 spin_unlock(&pde->pde_unload_lock); 350 spin_unlock(&pde->pde_unload_lock);
330 351
331 if (open) 352 if (open)
332 rv = open(inode, file); 353 rv = open(inode, file);
333 354
334 pde_users_dec(pde); 355 spin_lock(&pde->pde_unload_lock);
356 if (rv == 0 && release) {
357 /* To know what to release. */
358 pdeo->inode = inode;
359 pdeo->file = file;
360 /* Strictly for "too late" ->release in proc_reg_release(). */
361 pdeo->release = release;
362 list_add(&pdeo->lh, &pde->pde_openers);
363 } else
364 kfree(pdeo);
365 __pde_users_dec(pde);
366 spin_unlock(&pde->pde_unload_lock);
335 return rv; 367 return rv;
336} 368}
337 369
370static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
371 struct inode *inode, struct file *file)
372{
373 struct pde_opener *pdeo;
374
375 list_for_each_entry(pdeo, &pde->pde_openers, lh) {
376 if (pdeo->inode == inode && pdeo->file == file)
377 return pdeo;
378 }
379 return NULL;
380}
381
338static int proc_reg_release(struct inode *inode, struct file *file) 382static int proc_reg_release(struct inode *inode, struct file *file)
339{ 383{
340 struct proc_dir_entry *pde = PDE(inode); 384 struct proc_dir_entry *pde = PDE(inode);
341 int rv = 0; 385 int rv = 0;
342 int (*release)(struct inode *, struct file *); 386 int (*release)(struct inode *, struct file *);
387 struct pde_opener *pdeo;
343 388
344 spin_lock(&pde->pde_unload_lock); 389 spin_lock(&pde->pde_unload_lock);
390 pdeo = find_pde_opener(pde, inode, file);
345 if (!pde->proc_fops) { 391 if (!pde->proc_fops) {
346 spin_unlock(&pde->pde_unload_lock); 392 /*
393 * Can't simply exit, __fput() will think that everything is OK,
394 * and move on to freeing struct file. remove_proc_entry() will
395 * find slacker in opener's list and will try to do non-trivial
396 * things with struct file. Therefore, remove opener from list.
397 *
398 * But if opener is removed from list, who will ->release it?
399 */
400 if (pdeo) {
401 list_del(&pdeo->lh);
402 spin_unlock(&pde->pde_unload_lock);
403 rv = pdeo->release(inode, file);
404 kfree(pdeo);
405 } else
406 spin_unlock(&pde->pde_unload_lock);
347 return rv; 407 return rv;
348 } 408 }
349 pde->pde_users++; 409 pde->pde_users++;
350 release = pde->proc_fops->release; 410 release = pde->proc_fops->release;
411 if (pdeo) {
412 list_del(&pdeo->lh);
413 kfree(pdeo);
414 }
351 spin_unlock(&pde->pde_unload_lock); 415 spin_unlock(&pde->pde_unload_lock);
352 416
353 if (release) 417 if (release)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 28cbca805905..442202314d53 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -63,6 +63,7 @@ extern const struct file_operations proc_smaps_operations;
63extern const struct file_operations proc_clear_refs_operations; 63extern const struct file_operations proc_clear_refs_operations;
64extern const struct file_operations proc_pagemap_operations; 64extern const struct file_operations proc_pagemap_operations;
65extern const struct file_operations proc_net_operations; 65extern const struct file_operations proc_net_operations;
66extern const struct file_operations proc_kmsg_operations;
66extern const struct inode_operations proc_net_inode_operations; 67extern const struct inode_operations proc_net_inode_operations;
67 68
68void free_proc_entry(struct proc_dir_entry *de); 69void free_proc_entry(struct proc_dir_entry *de);
@@ -88,3 +89,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
88 struct dentry *dentry); 89 struct dentry *dentry);
89int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, 90int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
90 filldir_t filldir); 91 filldir_t filldir);
92
93struct pde_opener {
94 struct inode *inode;
95 struct file *file;
96 int (*release)(struct inode *, struct file *);
97 struct list_head lh;
98};
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e78c81fcf547..c2370c76fb71 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -23,6 +23,10 @@
23 23
24#define CORE_STR "CORE" 24#define CORE_STR "CORE"
25 25
26#ifndef ELF_CORE_EFLAGS
27#define ELF_CORE_EFLAGS 0
28#endif
29
26static int open_kcore(struct inode * inode, struct file * filp) 30static int open_kcore(struct inode * inode, struct file * filp)
27{ 31{
28 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; 32 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
@@ -164,11 +168,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
164 elf->e_entry = 0; 168 elf->e_entry = 0;
165 elf->e_phoff = sizeof(struct elfhdr); 169 elf->e_phoff = sizeof(struct elfhdr);
166 elf->e_shoff = 0; 170 elf->e_shoff = 0;
167#if defined(CONFIG_H8300) 171 elf->e_flags = ELF_CORE_EFLAGS;
168 elf->e_flags = ELF_FLAGS;
169#else
170 elf->e_flags = 0;
171#endif
172 elf->e_ehsize = sizeof(struct elfhdr); 172 elf->e_ehsize = sizeof(struct elfhdr);
173 elf->e_phentsize= sizeof(struct elf_phdr); 173 elf->e_phentsize= sizeof(struct elf_phdr);
174 elf->e_phnum = nphdr; 174 elf->e_phnum = nphdr;
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index ff3b90b56e9d..9fd5df3f40ce 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -15,6 +15,8 @@
15#include <asm/uaccess.h> 15#include <asm/uaccess.h>
16#include <asm/io.h> 16#include <asm/io.h>
17 17
18#include "internal.h"
19
18extern wait_queue_head_t log_wait; 20extern wait_queue_head_t log_wait;
19 21
20extern int do_syslog(int type, char __user *bug, int count); 22extern int do_syslog(int type, char __user *bug, int count);
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 79ecd281d2cb..3f87d2632947 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -52,14 +52,14 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
52 } 52 }
53 53
54 seq_printf(m, 54 seq_printf(m,
55 "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", 55 "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
56 vma->vm_start, 56 vma->vm_start,
57 vma->vm_end, 57 vma->vm_end,
58 flags & VM_READ ? 'r' : '-', 58 flags & VM_READ ? 'r' : '-',
59 flags & VM_WRITE ? 'w' : '-', 59 flags & VM_WRITE ? 'w' : '-',
60 flags & VM_EXEC ? 'x' : '-', 60 flags & VM_EXEC ? 'x' : '-',
61 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', 61 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
62 vma->vm_pgoff << PAGE_SHIFT, 62 ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
63 MAJOR(dev), MINOR(dev), ino, &len); 63 MAJOR(dev), MINOR(dev), ino, &len);
64 64
65 if (file) { 65 if (file) {
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index ded969862960..29e20c6b1f7f 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -24,6 +24,7 @@
24#include <linux/tty.h> 24#include <linux/tty.h>
25#include <linux/string.h> 25#include <linux/string.h>
26#include <linux/mman.h> 26#include <linux/mman.h>
27#include <linux/quicklist.h>
27#include <linux/proc_fs.h> 28#include <linux/proc_fs.h>
28#include <linux/ioport.h> 29#include <linux/ioport.h>
29#include <linux/mm.h> 30#include <linux/mm.h>
@@ -182,6 +183,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
182 "SReclaimable: %8lu kB\n" 183 "SReclaimable: %8lu kB\n"
183 "SUnreclaim: %8lu kB\n" 184 "SUnreclaim: %8lu kB\n"
184 "PageTables: %8lu kB\n" 185 "PageTables: %8lu kB\n"
186#ifdef CONFIG_QUICKLIST
187 "Quicklists: %8lu kB\n"
188#endif
185 "NFS_Unstable: %8lu kB\n" 189 "NFS_Unstable: %8lu kB\n"
186 "Bounce: %8lu kB\n" 190 "Bounce: %8lu kB\n"
187 "WritebackTmp: %8lu kB\n" 191 "WritebackTmp: %8lu kB\n"
@@ -214,6 +218,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
214 K(global_page_state(NR_SLAB_RECLAIMABLE)), 218 K(global_page_state(NR_SLAB_RECLAIMABLE)),
215 K(global_page_state(NR_SLAB_UNRECLAIMABLE)), 219 K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
216 K(global_page_state(NR_PAGETABLE)), 220 K(global_page_state(NR_PAGETABLE)),
221#ifdef CONFIG_QUICKLIST
222 K(quicklist_total_size()),
223#endif
217 K(global_page_state(NR_UNSTABLE_NFS)), 224 K(global_page_state(NR_UNSTABLE_NFS)),
218 K(global_page_state(NR_BOUNCE)), 225 K(global_page_state(NR_BOUNCE)),
219 K(global_page_state(NR_WRITEBACK_TEMP)), 226 K(global_page_state(NR_WRITEBACK_TEMP)),
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5acc001d49f6..f9a8b892718f 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -10,149 +10,110 @@
10static struct dentry_operations proc_sys_dentry_operations; 10static struct dentry_operations proc_sys_dentry_operations;
11static const struct file_operations proc_sys_file_operations; 11static const struct file_operations proc_sys_file_operations;
12static const struct inode_operations proc_sys_inode_operations; 12static const struct inode_operations proc_sys_inode_operations;
13static const struct file_operations proc_sys_dir_file_operations;
14static const struct inode_operations proc_sys_dir_operations;
13 15
14static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table) 16static struct inode *proc_sys_make_inode(struct super_block *sb,
15{ 17 struct ctl_table_header *head, struct ctl_table *table)
16 /* Refresh the cached information bits in the inode */
17 if (table) {
18 inode->i_uid = 0;
19 inode->i_gid = 0;
20 inode->i_mode = table->mode;
21 if (table->proc_handler) {
22 inode->i_mode |= S_IFREG;
23 inode->i_nlink = 1;
24 } else {
25 inode->i_mode |= S_IFDIR;
26 inode->i_nlink = 0; /* It is too hard to figure out */
27 }
28 }
29}
30
31static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table)
32{ 18{
33 struct inode *inode; 19 struct inode *inode;
34 struct proc_inode *dir_ei, *ei; 20 struct proc_inode *ei;
35 int depth;
36 21
37 inode = new_inode(dir->i_sb); 22 inode = new_inode(sb);
38 if (!inode) 23 if (!inode)
39 goto out; 24 goto out;
40 25
41 /* A directory is always one deeper than it's parent */ 26 sysctl_head_get(head);
42 dir_ei = PROC_I(dir);
43 depth = dir_ei->fd + 1;
44
45 ei = PROC_I(inode); 27 ei = PROC_I(inode);
46 ei->fd = depth; 28 ei->sysctl = head;
29 ei->sysctl_entry = table;
30
47 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 31 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
48 inode->i_op = &proc_sys_inode_operations;
49 inode->i_fop = &proc_sys_file_operations;
50 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */ 32 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
51 proc_sys_refresh_inode(inode, table); 33 inode->i_mode = table->mode;
34 if (!table->child) {
35 inode->i_mode |= S_IFREG;
36 inode->i_op = &proc_sys_inode_operations;
37 inode->i_fop = &proc_sys_file_operations;
38 } else {
39 inode->i_mode |= S_IFDIR;
40 inode->i_nlink = 0;
41 inode->i_op = &proc_sys_dir_operations;
42 inode->i_fop = &proc_sys_dir_file_operations;
43 }
52out: 44out:
53 return inode; 45 return inode;
54} 46}
55 47
56static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth) 48static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
57{
58 for (;;) {
59 struct proc_inode *ei;
60
61 ei = PROC_I(dentry->d_inode);
62 if (ei->fd == depth)
63 break; /* found */
64
65 dentry = dentry->d_parent;
66 }
67 return dentry;
68}
69
70static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table,
71 struct qstr *name)
72{ 49{
73 int len; 50 int len;
74 for ( ; table->ctl_name || table->procname; table++) { 51 for ( ; p->ctl_name || p->procname; p++) {
75 52
76 if (!table->procname) 53 if (!p->procname)
77 continue; 54 continue;
78 55
79 len = strlen(table->procname); 56 len = strlen(p->procname);
80 if (len != name->len) 57 if (len != name->len)
81 continue; 58 continue;
82 59
83 if (memcmp(table->procname, name->name, len) != 0) 60 if (memcmp(p->procname, name->name, len) != 0)
84 continue; 61 continue;
85 62
86 /* I have a match */ 63 /* I have a match */
87 return table; 64 return p;
88 } 65 }
89 return NULL; 66 return NULL;
90} 67}
91 68
92static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry, 69struct ctl_table_header *grab_header(struct inode *inode)
93 struct ctl_table *table)
94{ 70{
95 struct dentry *ancestor; 71 if (PROC_I(inode)->sysctl)
96 struct proc_inode *ei; 72 return sysctl_head_grab(PROC_I(inode)->sysctl);
97 int depth, i; 73 else
74 return sysctl_head_next(NULL);
75}
98 76
99 ei = PROC_I(dentry->d_inode); 77static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
100 depth = ei->fd; 78 struct nameidata *nd)
79{
80 struct ctl_table_header *head = grab_header(dir);
81 struct ctl_table *table = PROC_I(dir)->sysctl_entry;
82 struct ctl_table_header *h = NULL;
83 struct qstr *name = &dentry->d_name;
84 struct ctl_table *p;
85 struct inode *inode;
86 struct dentry *err = ERR_PTR(-ENOENT);
101 87
102 if (depth == 0) 88 if (IS_ERR(head))
103 return table; 89 return ERR_CAST(head);
104 90
105 for (i = 1; table && (i <= depth); i++) { 91 if (table && !table->child) {
106 ancestor = proc_sys_ancestor(dentry, i); 92 WARN_ON(1);
107 table = proc_sys_lookup_table_one(table, &ancestor->d_name); 93 goto out;
108 if (table)
109 table = table->child;
110 } 94 }
111 return table;
112
113}
114static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent,
115 struct qstr *name,
116 struct ctl_table *table)
117{
118 table = proc_sys_lookup_table(dparent, table);
119 if (table)
120 table = proc_sys_lookup_table_one(table, name);
121 return table;
122}
123 95
124static struct ctl_table *do_proc_sys_lookup(struct dentry *parent, 96 table = table ? table->child : head->ctl_table;
125 struct qstr *name,
126 struct ctl_table_header **ptr)
127{
128 struct ctl_table_header *head;
129 struct ctl_table *table = NULL;
130 97
131 for (head = sysctl_head_next(NULL); head; 98 p = find_in_table(table, name);
132 head = sysctl_head_next(head)) { 99 if (!p) {
133 table = proc_sys_lookup_entry(parent, name, head->ctl_table); 100 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
134 if (table) 101 if (h->attached_to != table)
135 break; 102 continue;
103 p = find_in_table(h->attached_by, name);
104 if (p)
105 break;
106 }
136 } 107 }
137 *ptr = head;
138 return table;
139}
140
141static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
142 struct nameidata *nd)
143{
144 struct ctl_table_header *head;
145 struct inode *inode;
146 struct dentry *err;
147 struct ctl_table *table;
148 108
149 err = ERR_PTR(-ENOENT); 109 if (!p)
150 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
151 if (!table)
152 goto out; 110 goto out;
153 111
154 err = ERR_PTR(-ENOMEM); 112 err = ERR_PTR(-ENOMEM);
155 inode = proc_sys_make_inode(dir, table); 113 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
114 if (h)
115 sysctl_head_finish(h);
116
156 if (!inode) 117 if (!inode)
157 goto out; 118 goto out;
158 119
@@ -168,22 +129,14 @@ out:
168static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, 129static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
169 size_t count, loff_t *ppos, int write) 130 size_t count, loff_t *ppos, int write)
170{ 131{
171 struct dentry *dentry = filp->f_dentry; 132 struct inode *inode = filp->f_path.dentry->d_inode;
172 struct ctl_table_header *head; 133 struct ctl_table_header *head = grab_header(inode);
173 struct ctl_table *table; 134 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
174 ssize_t error; 135 ssize_t error;
175 size_t res; 136 size_t res;
176 137
177 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); 138 if (IS_ERR(head))
178 /* Has the sysctl entry disappeared on us? */ 139 return PTR_ERR(head);
179 error = -ENOENT;
180 if (!table)
181 goto out;
182
183 /* Has the sysctl entry been replaced by a directory? */
184 error = -EISDIR;
185 if (!table->proc_handler)
186 goto out;
187 140
188 /* 141 /*
189 * At this point we know that the sysctl was not unregistered 142 * At this point we know that the sysctl was not unregistered
@@ -193,6 +146,11 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
193 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) 146 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
194 goto out; 147 goto out;
195 148
149 /* if that can happen at all, it should be -EINVAL, not -EISDIR */
150 error = -EINVAL;
151 if (!table->proc_handler)
152 goto out;
153
196 /* careful: calling conventions are nasty here */ 154 /* careful: calling conventions are nasty here */
197 res = count; 155 res = count;
198 error = table->proc_handler(table, write, filp, buf, &res, ppos); 156 error = table->proc_handler(table, write, filp, buf, &res, ppos);
@@ -218,82 +176,86 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
218 176
219 177
220static int proc_sys_fill_cache(struct file *filp, void *dirent, 178static int proc_sys_fill_cache(struct file *filp, void *dirent,
221 filldir_t filldir, struct ctl_table *table) 179 filldir_t filldir,
180 struct ctl_table_header *head,
181 struct ctl_table *table)
222{ 182{
223 struct ctl_table_header *head;
224 struct ctl_table *child_table = NULL;
225 struct dentry *child, *dir = filp->f_path.dentry; 183 struct dentry *child, *dir = filp->f_path.dentry;
226 struct inode *inode; 184 struct inode *inode;
227 struct qstr qname; 185 struct qstr qname;
228 ino_t ino = 0; 186 ino_t ino = 0;
229 unsigned type = DT_UNKNOWN; 187 unsigned type = DT_UNKNOWN;
230 int ret;
231 188
232 qname.name = table->procname; 189 qname.name = table->procname;
233 qname.len = strlen(table->procname); 190 qname.len = strlen(table->procname);
234 qname.hash = full_name_hash(qname.name, qname.len); 191 qname.hash = full_name_hash(qname.name, qname.len);
235 192
236 /* Suppress duplicates.
237 * Only fill a directory entry if it is the value that
238 * an ordinary lookup of that name returns. Hide all
239 * others.
240 *
241 * If we ever cache this translation in the dcache
242 * I should do a dcache lookup first. But for now
243 * it is just simpler not to.
244 */
245 ret = 0;
246 child_table = do_proc_sys_lookup(dir, &qname, &head);
247 sysctl_head_finish(head);
248 if (child_table != table)
249 return 0;
250
251 child = d_lookup(dir, &qname); 193 child = d_lookup(dir, &qname);
252 if (!child) { 194 if (!child) {
253 struct dentry *new; 195 child = d_alloc(dir, &qname);
254 new = d_alloc(dir, &qname); 196 if (child) {
255 if (new) { 197 inode = proc_sys_make_inode(dir->d_sb, head, table);
256 inode = proc_sys_make_inode(dir->d_inode, table); 198 if (!inode) {
257 if (!inode) 199 dput(child);
258 child = ERR_PTR(-ENOMEM); 200 return -ENOMEM;
259 else { 201 } else {
260 new->d_op = &proc_sys_dentry_operations; 202 child->d_op = &proc_sys_dentry_operations;
261 d_add(new, inode); 203 d_add(child, inode);
262 } 204 }
263 if (child) 205 } else {
264 dput(new); 206 return -ENOMEM;
265 else
266 child = new;
267 } 207 }
268 } 208 }
269 if (!child || IS_ERR(child) || !child->d_inode)
270 goto end_instantiate;
271 inode = child->d_inode; 209 inode = child->d_inode;
272 if (inode) { 210 ino = inode->i_ino;
273 ino = inode->i_ino; 211 type = inode->i_mode >> 12;
274 type = inode->i_mode >> 12;
275 }
276 dput(child); 212 dput(child);
277end_instantiate: 213 return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
278 if (!ino) 214}
279 ino= find_inode_number(dir, &qname); 215
280 if (!ino) 216static int scan(struct ctl_table_header *head, ctl_table *table,
281 ino = 1; 217 unsigned long *pos, struct file *file,
282 return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); 218 void *dirent, filldir_t filldir)
219{
220
221 for (; table->ctl_name || table->procname; table++, (*pos)++) {
222 int res;
223
224 /* Can't do anything without a proc name */
225 if (!table->procname)
226 continue;
227
228 if (*pos < file->f_pos)
229 continue;
230
231 res = proc_sys_fill_cache(file, dirent, filldir, head, table);
232 if (res)
233 return res;
234
235 file->f_pos = *pos + 1;
236 }
237 return 0;
283} 238}
284 239
285static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) 240static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
286{ 241{
287 struct dentry *dentry = filp->f_dentry; 242 struct dentry *dentry = filp->f_path.dentry;
288 struct inode *inode = dentry->d_inode; 243 struct inode *inode = dentry->d_inode;
289 struct ctl_table_header *head = NULL; 244 struct ctl_table_header *head = grab_header(inode);
290 struct ctl_table *table; 245 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
246 struct ctl_table_header *h = NULL;
291 unsigned long pos; 247 unsigned long pos;
292 int ret; 248 int ret = -EINVAL;
249
250 if (IS_ERR(head))
251 return PTR_ERR(head);
293 252
294 ret = -ENOTDIR; 253 if (table && !table->child) {
295 if (!S_ISDIR(inode->i_mode)) 254 WARN_ON(1);
296 goto out; 255 goto out;
256 }
257
258 table = table ? table->child : head->ctl_table;
297 259
298 ret = 0; 260 ret = 0;
299 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ 261 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
@@ -311,30 +273,17 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
311 } 273 }
312 pos = 2; 274 pos = 2;
313 275
314 /* - Find each instance of the directory 276 ret = scan(head, table, &pos, filp, dirent, filldir);
315 * - Read all entries in each instance 277 if (ret)
316 * - Before returning an entry to user space lookup the entry 278 goto out;
317 * by name and if I find a different entry don't return
318 * this one because it means it is a buried dup.
319 * For sysctl this should only happen for directory entries.
320 */
321 for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) {
322 table = proc_sys_lookup_table(dentry, head->ctl_table);
323 279
324 if (!table) 280 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
281 if (h->attached_to != table)
325 continue; 282 continue;
326 283 ret = scan(h, h->attached_by, &pos, filp, dirent, filldir);
327 for (; table->ctl_name || table->procname; table++, pos++) { 284 if (ret) {
328 /* Can't do anything without a proc name */ 285 sysctl_head_finish(h);
329 if (!table->procname) 286 break;
330 continue;
331
332 if (pos < filp->f_pos)
333 continue;
334
335 if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0)
336 goto out;
337 filp->f_pos = pos + 1;
338 } 287 }
339 } 288 }
340 ret = 1; 289 ret = 1;
@@ -343,53 +292,24 @@ out:
343 return ret; 292 return ret;
344} 293}
345 294
346static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd) 295static int proc_sys_permission(struct inode *inode, int mask)
347{ 296{
348 /* 297 /*
349 * sysctl entries that are not writeable, 298 * sysctl entries that are not writeable,
350 * are _NOT_ writeable, capabilities or not. 299 * are _NOT_ writeable, capabilities or not.
351 */ 300 */
352 struct ctl_table_header *head; 301 struct ctl_table_header *head = grab_header(inode);
353 struct ctl_table *table; 302 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
354 struct dentry *dentry;
355 int mode;
356 int depth;
357 int error; 303 int error;
358 304
359 head = NULL; 305 if (IS_ERR(head))
360 depth = PROC_I(inode)->fd; 306 return PTR_ERR(head);
361
362 /* First check the cached permissions, in case we don't have
363 * enough information to lookup the sysctl table entry.
364 */
365 error = -EACCES;
366 mode = inode->i_mode;
367
368 if (current->euid == 0)
369 mode >>= 6;
370 else if (in_group_p(0))
371 mode >>= 3;
372
373 if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
374 error = 0;
375
376 /* If we can't get a sysctl table entry the permission
377 * checks on the cached mode will have to be enough.
378 */
379 if (!nd || !depth)
380 goto out;
381 307
382 dentry = nd->path.dentry; 308 if (!table) /* global root - r-xr-xr-x */
383 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); 309 error = mask & MAY_WRITE ? -EACCES : 0;
310 else /* Use the permissions on the sysctl table entry */
311 error = sysctl_perm(head->root, table, mask);
384 312
385 /* If the entry does not exist deny permission */
386 error = -EACCES;
387 if (!table)
388 goto out;
389
390 /* Use the permissions on the sysctl table entry */
391 error = sysctl_perm(head->root, table, mask);
392out:
393 sysctl_head_finish(head); 313 sysctl_head_finish(head);
394 return error; 314 return error;
395} 315}
@@ -409,33 +329,70 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
409 return error; 329 return error;
410} 330}
411 331
412/* I'm lazy and don't distinguish between files and directories, 332static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
413 * until access time. 333{
414 */ 334 struct inode *inode = dentry->d_inode;
335 struct ctl_table_header *head = grab_header(inode);
336 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
337
338 if (IS_ERR(head))
339 return PTR_ERR(head);
340
341 generic_fillattr(inode, stat);
342 if (table)
343 stat->mode = (stat->mode & S_IFMT) | table->mode;
344
345 sysctl_head_finish(head);
346 return 0;
347}
348
415static const struct file_operations proc_sys_file_operations = { 349static const struct file_operations proc_sys_file_operations = {
416 .read = proc_sys_read, 350 .read = proc_sys_read,
417 .write = proc_sys_write, 351 .write = proc_sys_write,
352};
353
354static const struct file_operations proc_sys_dir_file_operations = {
418 .readdir = proc_sys_readdir, 355 .readdir = proc_sys_readdir,
419}; 356};
420 357
421static const struct inode_operations proc_sys_inode_operations = { 358static const struct inode_operations proc_sys_inode_operations = {
359 .permission = proc_sys_permission,
360 .setattr = proc_sys_setattr,
361 .getattr = proc_sys_getattr,
362};
363
364static const struct inode_operations proc_sys_dir_operations = {
422 .lookup = proc_sys_lookup, 365 .lookup = proc_sys_lookup,
423 .permission = proc_sys_permission, 366 .permission = proc_sys_permission,
424 .setattr = proc_sys_setattr, 367 .setattr = proc_sys_setattr,
368 .getattr = proc_sys_getattr,
425}; 369};
426 370
427static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) 371static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
428{ 372{
429 struct ctl_table_header *head; 373 return !PROC_I(dentry->d_inode)->sysctl->unregistering;
430 struct ctl_table *table; 374}
431 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); 375
432 proc_sys_refresh_inode(dentry->d_inode, table); 376static int proc_sys_delete(struct dentry *dentry)
433 sysctl_head_finish(head); 377{
434 return !!table; 378 return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
379}
380
381static int proc_sys_compare(struct dentry *dir, struct qstr *qstr,
382 struct qstr *name)
383{
384 struct dentry *dentry = container_of(qstr, struct dentry, d_name);
385 if (qstr->len != name->len)
386 return 1;
387 if (memcmp(qstr->name, name->name, name->len))
388 return 1;
389 return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl);
435} 390}
436 391
437static struct dentry_operations proc_sys_dentry_operations = { 392static struct dentry_operations proc_sys_dentry_operations = {
438 .d_revalidate = proc_sys_revalidate, 393 .d_revalidate = proc_sys_revalidate,
394 .d_delete = proc_sys_delete,
395 .d_compare = proc_sys_compare,
439}; 396};
440 397
441static struct proc_dir_entry *proc_sys_root; 398static struct proc_dir_entry *proc_sys_root;
@@ -443,8 +400,8 @@ static struct proc_dir_entry *proc_sys_root;
443int proc_sys_init(void) 400int proc_sys_init(void)
444{ 401{
445 proc_sys_root = proc_mkdir("sys", NULL); 402 proc_sys_root = proc_mkdir("sys", NULL);
446 proc_sys_root->proc_iops = &proc_sys_inode_operations; 403 proc_sys_root->proc_iops = &proc_sys_dir_operations;
447 proc_sys_root->proc_fops = &proc_sys_file_operations; 404 proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
448 proc_sys_root->nlink = 0; 405 proc_sys_root->nlink = 0;
449 return 0; 406 return 0;
450} 407}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7546a918f790..73d1891ee625 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -219,14 +219,14 @@ static int show_map(struct seq_file *m, void *v)
219 ino = inode->i_ino; 219 ino = inode->i_ino;
220 } 220 }
221 221
222 seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", 222 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
223 vma->vm_start, 223 vma->vm_start,
224 vma->vm_end, 224 vma->vm_end,
225 flags & VM_READ ? 'r' : '-', 225 flags & VM_READ ? 'r' : '-',
226 flags & VM_WRITE ? 'w' : '-', 226 flags & VM_WRITE ? 'w' : '-',
227 flags & VM_EXEC ? 'x' : '-', 227 flags & VM_EXEC ? 'x' : '-',
228 flags & VM_MAYSHARE ? 's' : 'p', 228 flags & VM_MAYSHARE ? 's' : 'p',
229 vma->vm_pgoff << PAGE_SHIFT, 229 ((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
230 MAJOR(dev), MINOR(dev), ino, &len); 230 MAJOR(dev), MINOR(dev), ino, &len);
231 231
232 /* 232 /*