diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
commit | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch) | |
tree | a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /fs/proc | |
parent | 406089d01562f1e2bf9f089fd7637009ebaad589 (diff) |
Patched in Tegra support.
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/Kconfig | 12 | ||||
-rw-r--r-- | fs/proc/Makefile | 3 | ||||
-rw-r--r-- | fs/proc/array.c | 305 | ||||
-rw-r--r-- | fs/proc/base.c | 1571 | ||||
-rw-r--r-- | fs/proc/fd.c | 369 | ||||
-rw-r--r-- | fs/proc/fd.h | 14 | ||||
-rw-r--r-- | fs/proc/generic.c | 75 | ||||
-rw-r--r-- | fs/proc/inode.c | 47 | ||||
-rw-r--r-- | fs/proc/internal.h | 75 | ||||
-rw-r--r-- | fs/proc/kcore.c | 10 | ||||
-rw-r--r-- | fs/proc/namespaces.c | 198 | ||||
-rw-r--r-- | fs/proc/page.c | 8 | ||||
-rw-r--r-- | fs/proc/proc_devtree.c | 11 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 4 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 1341 | ||||
-rw-r--r-- | fs/proc/root.c | 102 | ||||
-rw-r--r-- | fs/proc/self.c | 59 | ||||
-rw-r--r-- | fs/proc/stat.c | 178 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 545 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 71 | ||||
-rw-r--r-- | fs/proc/uptime.c | 11 | ||||
-rw-r--r-- | fs/proc/vmcore.c | 24 |
22 files changed, 1451 insertions, 3582 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 15af6222f8a..ddb83a0e15e 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig | |||
@@ -67,3 +67,15 @@ config PROC_PAGE_MONITOR | |||
67 | /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, | 67 | /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, |
68 | /proc/kpagecount, and /proc/kpageflags. Disabling these | 68 | /proc/kpagecount, and /proc/kpageflags. Disabling these |
69 | interfaces will reduce the size of the kernel by approximately 4kb. | 69 | interfaces will reduce the size of the kernel by approximately 4kb. |
70 | |||
71 | config REPORT_PRESENT_CPUS | ||
72 | default n | ||
73 | depends on PROC_FS && SMP | ||
74 | bool "Report present cpus instead of online cpus" | ||
75 | help | ||
76 | This is a work around to report Present CPUs instead of Online CPUs. | ||
77 | Some power savings implements use CPU hotplug for power domains. | ||
78 | It is a bug to enable this on a server or other architecture that | ||
79 | uses cpu hotplug in the correct way. | ||
80 | |||
81 | |||
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 981b0560193..c1c72933592 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile | |||
@@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o | |||
8 | proc-$(CONFIG_MMU) := mmu.o task_mmu.o | 8 | proc-$(CONFIG_MMU) := mmu.o task_mmu.o |
9 | 9 | ||
10 | proc-y += inode.o root.o base.o generic.o array.o \ | 10 | proc-y += inode.o root.o base.o generic.o array.o \ |
11 | proc_tty.o fd.o | 11 | proc_tty.o |
12 | proc-y += cmdline.o | 12 | proc-y += cmdline.o |
13 | proc-y += consoles.o | 13 | proc-y += consoles.o |
14 | proc-y += cpuinfo.o | 14 | proc-y += cpuinfo.o |
@@ -21,7 +21,6 @@ proc-y += uptime.o | |||
21 | proc-y += version.o | 21 | proc-y += version.o |
22 | proc-y += softirqs.o | 22 | proc-y += softirqs.o |
23 | proc-y += namespaces.o | 23 | proc-y += namespaces.o |
24 | proc-y += self.o | ||
25 | proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o | 24 | proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o |
26 | proc-$(CONFIG_NET) += proc_net.o | 25 | proc-$(CONFIG_NET) += proc_net.o |
27 | proc-$(CONFIG_PROC_KCORE) += kcore.o | 26 | proc-$(CONFIG_PROC_KCORE) += kcore.o |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 6a91e6ffbcb..3a1dafd228d 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -81,7 +81,6 @@ | |||
81 | #include <linux/pid_namespace.h> | 81 | #include <linux/pid_namespace.h> |
82 | #include <linux/ptrace.h> | 82 | #include <linux/ptrace.h> |
83 | #include <linux/tracehook.h> | 83 | #include <linux/tracehook.h> |
84 | #include <linux/user_namespace.h> | ||
85 | 84 | ||
86 | #include <asm/pgtable.h> | 85 | #include <asm/pgtable.h> |
87 | #include <asm/processor.h> | 86 | #include <asm/processor.h> |
@@ -162,7 +161,6 @@ static inline const char *get_task_state(struct task_struct *tsk) | |||
162 | static inline void task_state(struct seq_file *m, struct pid_namespace *ns, | 161 | static inline void task_state(struct seq_file *m, struct pid_namespace *ns, |
163 | struct pid *pid, struct task_struct *p) | 162 | struct pid *pid, struct task_struct *p) |
164 | { | 163 | { |
165 | struct user_namespace *user_ns = seq_user_ns(m); | ||
166 | struct group_info *group_info; | 164 | struct group_info *group_info; |
167 | int g; | 165 | int g; |
168 | struct fdtable *fdt = NULL; | 166 | struct fdtable *fdt = NULL; |
@@ -191,14 +189,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, | |||
191 | task_tgid_nr_ns(p, ns), | 189 | task_tgid_nr_ns(p, ns), |
192 | pid_nr_ns(pid, ns), | 190 | pid_nr_ns(pid, ns), |
193 | ppid, tpid, | 191 | ppid, tpid, |
194 | from_kuid_munged(user_ns, cred->uid), | 192 | cred->uid, cred->euid, cred->suid, cred->fsuid, |
195 | from_kuid_munged(user_ns, cred->euid), | 193 | cred->gid, cred->egid, cred->sgid, cred->fsgid); |
196 | from_kuid_munged(user_ns, cred->suid), | ||
197 | from_kuid_munged(user_ns, cred->fsuid), | ||
198 | from_kgid_munged(user_ns, cred->gid), | ||
199 | from_kgid_munged(user_ns, cred->egid), | ||
200 | from_kgid_munged(user_ns, cred->sgid), | ||
201 | from_kgid_munged(user_ns, cred->fsgid)); | ||
202 | 194 | ||
203 | task_lock(p); | 195 | task_lock(p); |
204 | if (p->files) | 196 | if (p->files) |
@@ -212,15 +204,14 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, | |||
212 | group_info = cred->group_info; | 204 | group_info = cred->group_info; |
213 | task_unlock(p); | 205 | task_unlock(p); |
214 | 206 | ||
215 | for (g = 0; g < group_info->ngroups; g++) | 207 | for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) |
216 | seq_printf(m, "%d ", | 208 | seq_printf(m, "%d ", GROUP_AT(group_info, g)); |
217 | from_kgid_munged(user_ns, GROUP_AT(group_info, g))); | ||
218 | put_cred(cred); | 209 | put_cred(cred); |
219 | 210 | ||
220 | seq_putc(m, '\n'); | 211 | seq_putc(m, '\n'); |
221 | } | 212 | } |
222 | 213 | ||
223 | void render_sigset_t(struct seq_file *m, const char *header, | 214 | static void render_sigset_t(struct seq_file *m, const char *header, |
224 | sigset_t *set) | 215 | sigset_t *set) |
225 | { | 216 | { |
226 | int i; | 217 | int i; |
@@ -308,10 +299,6 @@ static void render_cap_t(struct seq_file *m, const char *header, | |||
308 | seq_putc(m, '\n'); | 299 | seq_putc(m, '\n'); |
309 | } | 300 | } |
310 | 301 | ||
311 | /* Remove non-existent capabilities */ | ||
312 | #define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \ | ||
313 | CAP_TO_MASK(CAP_LAST_CAP + 1) - 1) | ||
314 | |||
315 | static inline void task_cap(struct seq_file *m, struct task_struct *p) | 302 | static inline void task_cap(struct seq_file *m, struct task_struct *p) |
316 | { | 303 | { |
317 | const struct cred *cred; | 304 | const struct cred *cred; |
@@ -325,24 +312,12 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p) | |||
325 | cap_bset = cred->cap_bset; | 312 | cap_bset = cred->cap_bset; |
326 | rcu_read_unlock(); | 313 | rcu_read_unlock(); |
327 | 314 | ||
328 | NORM_CAPS(cap_inheritable); | ||
329 | NORM_CAPS(cap_permitted); | ||
330 | NORM_CAPS(cap_effective); | ||
331 | NORM_CAPS(cap_bset); | ||
332 | |||
333 | render_cap_t(m, "CapInh:\t", &cap_inheritable); | 315 | render_cap_t(m, "CapInh:\t", &cap_inheritable); |
334 | render_cap_t(m, "CapPrm:\t", &cap_permitted); | 316 | render_cap_t(m, "CapPrm:\t", &cap_permitted); |
335 | render_cap_t(m, "CapEff:\t", &cap_effective); | 317 | render_cap_t(m, "CapEff:\t", &cap_effective); |
336 | render_cap_t(m, "CapBnd:\t", &cap_bset); | 318 | render_cap_t(m, "CapBnd:\t", &cap_bset); |
337 | } | 319 | } |
338 | 320 | ||
339 | static inline void task_seccomp(struct seq_file *m, struct task_struct *p) | ||
340 | { | ||
341 | #ifdef CONFIG_SECCOMP | ||
342 | seq_printf(m, "Seccomp:\t%d\n", p->seccomp.mode); | ||
343 | #endif | ||
344 | } | ||
345 | |||
346 | static inline void task_context_switch_counts(struct seq_file *m, | 321 | static inline void task_context_switch_counts(struct seq_file *m, |
347 | struct task_struct *p) | 322 | struct task_struct *p) |
348 | { | 323 | { |
@@ -376,7 +351,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, | |||
376 | } | 351 | } |
377 | task_sig(m, task); | 352 | task_sig(m, task); |
378 | task_cap(m, task); | 353 | task_cap(m, task); |
379 | task_seccomp(m, task); | ||
380 | task_cpus_allowed(m, task); | 354 | task_cpus_allowed(m, task); |
381 | cpuset_task_status_allowed(m, task); | 355 | cpuset_task_status_allowed(m, task); |
382 | task_context_switch_counts(m, task); | 356 | task_context_switch_counts(m, task); |
@@ -387,7 +361,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
387 | struct pid *pid, struct task_struct *task, int whole) | 361 | struct pid *pid, struct task_struct *task, int whole) |
388 | { | 362 | { |
389 | unsigned long vsize, eip, esp, wchan = ~0UL; | 363 | unsigned long vsize, eip, esp, wchan = ~0UL; |
390 | int priority, nice; | 364 | long priority, nice; |
391 | int tty_pgrp = -1, tty_nr = 0; | 365 | int tty_pgrp = -1, tty_nr = 0; |
392 | sigset_t sigign, sigcatch; | 366 | sigset_t sigign, sigcatch; |
393 | char state; | 367 | char state; |
@@ -406,7 +380,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
406 | 380 | ||
407 | state = *get_task_state(task); | 381 | state = *get_task_state(task); |
408 | vsize = eip = esp = 0; | 382 | vsize = eip = esp = 0; |
409 | permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT); | 383 | permitted = ptrace_may_access(task, PTRACE_MODE_READ); |
410 | mm = get_task_mm(task); | 384 | mm = get_task_mm(task); |
411 | if (mm) { | 385 | if (mm) { |
412 | vsize = task_vsize(mm); | 386 | vsize = task_vsize(mm); |
@@ -420,8 +394,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
420 | 394 | ||
421 | sigemptyset(&sigign); | 395 | sigemptyset(&sigign); |
422 | sigemptyset(&sigcatch); | 396 | sigemptyset(&sigcatch); |
423 | cutime = cstime = utime = stime = 0; | 397 | cutime = cstime = utime = stime = cputime_zero; |
424 | cgtime = gtime = 0; | 398 | cgtime = gtime = cputime_zero; |
425 | 399 | ||
426 | if (lock_task_sighand(task, &flags)) { | 400 | if (lock_task_sighand(task, &flags)) { |
427 | struct signal_struct *sig = task->signal; | 401 | struct signal_struct *sig = task->signal; |
@@ -449,14 +423,14 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
449 | do { | 423 | do { |
450 | min_flt += t->min_flt; | 424 | min_flt += t->min_flt; |
451 | maj_flt += t->maj_flt; | 425 | maj_flt += t->maj_flt; |
452 | gtime += t->gtime; | 426 | gtime = cputime_add(gtime, t->gtime); |
453 | t = next_thread(t); | 427 | t = next_thread(t); |
454 | } while (t != task); | 428 | } while (t != task); |
455 | 429 | ||
456 | min_flt += sig->min_flt; | 430 | min_flt += sig->min_flt; |
457 | maj_flt += sig->maj_flt; | 431 | maj_flt += sig->maj_flt; |
458 | thread_group_cputime_adjusted(task, &utime, &stime); | 432 | thread_group_times(task, &utime, &stime); |
459 | gtime += sig->gtime; | 433 | gtime = cputime_add(gtime, sig->gtime); |
460 | } | 434 | } |
461 | 435 | ||
462 | sid = task_session_nr_ns(task, ns); | 436 | sid = task_session_nr_ns(task, ns); |
@@ -471,7 +445,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
471 | if (!whole) { | 445 | if (!whole) { |
472 | min_flt = task->min_flt; | 446 | min_flt = task->min_flt; |
473 | maj_flt = task->maj_flt; | 447 | maj_flt = task->maj_flt; |
474 | task_cputime_adjusted(task, &utime, &stime); | 448 | task_times(task, &utime, &stime); |
475 | gtime = task->gtime; | 449 | gtime = task->gtime; |
476 | } | 450 | } |
477 | 451 | ||
@@ -488,70 +462,56 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
488 | /* convert nsec -> ticks */ | 462 | /* convert nsec -> ticks */ |
489 | start_time = nsec_to_clock_t(start_time); | 463 | start_time = nsec_to_clock_t(start_time); |
490 | 464 | ||
491 | seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state); | 465 | seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ |
492 | seq_put_decimal_ll(m, ' ', ppid); | 466 | %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ |
493 | seq_put_decimal_ll(m, ' ', pgid); | 467 | %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n", |
494 | seq_put_decimal_ll(m, ' ', sid); | 468 | pid_nr_ns(pid, ns), |
495 | seq_put_decimal_ll(m, ' ', tty_nr); | 469 | tcomm, |
496 | seq_put_decimal_ll(m, ' ', tty_pgrp); | 470 | state, |
497 | seq_put_decimal_ull(m, ' ', task->flags); | 471 | ppid, |
498 | seq_put_decimal_ull(m, ' ', min_flt); | 472 | pgid, |
499 | seq_put_decimal_ull(m, ' ', cmin_flt); | 473 | sid, |
500 | seq_put_decimal_ull(m, ' ', maj_flt); | 474 | tty_nr, |
501 | seq_put_decimal_ull(m, ' ', cmaj_flt); | 475 | tty_pgrp, |
502 | seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime)); | 476 | task->flags, |
503 | seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime)); | 477 | min_flt, |
504 | seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime)); | 478 | cmin_flt, |
505 | seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime)); | 479 | maj_flt, |
506 | seq_put_decimal_ll(m, ' ', priority); | 480 | cmaj_flt, |
507 | seq_put_decimal_ll(m, ' ', nice); | 481 | cputime_to_clock_t(utime), |
508 | seq_put_decimal_ll(m, ' ', num_threads); | 482 | cputime_to_clock_t(stime), |
509 | seq_put_decimal_ull(m, ' ', 0); | 483 | cputime_to_clock_t(cutime), |
510 | seq_put_decimal_ull(m, ' ', start_time); | 484 | cputime_to_clock_t(cstime), |
511 | seq_put_decimal_ull(m, ' ', vsize); | 485 | priority, |
512 | seq_put_decimal_ull(m, ' ', mm ? get_mm_rss(mm) : 0); | 486 | nice, |
513 | seq_put_decimal_ull(m, ' ', rsslim); | 487 | num_threads, |
514 | seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0); | 488 | start_time, |
515 | seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0); | 489 | vsize, |
516 | seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0); | 490 | mm ? get_mm_rss(mm) : 0, |
517 | seq_put_decimal_ull(m, ' ', esp); | 491 | rsslim, |
518 | seq_put_decimal_ull(m, ' ', eip); | 492 | mm ? (permitted ? mm->start_code : 1) : 0, |
519 | /* The signal information here is obsolete. | 493 | mm ? (permitted ? mm->end_code : 1) : 0, |
520 | * It must be decimal for Linux 2.0 compatibility. | 494 | (permitted && mm) ? mm->start_stack : 0, |
521 | * Use /proc/#/status for real-time signals. | 495 | esp, |
522 | */ | 496 | eip, |
523 | seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL); | 497 | /* The signal information here is obsolete. |
524 | seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL); | 498 | * It must be decimal for Linux 2.0 compatibility. |
525 | seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL); | 499 | * Use /proc/#/status for real-time signals. |
526 | seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL); | 500 | */ |
527 | seq_put_decimal_ull(m, ' ', wchan); | 501 | task->pending.signal.sig[0] & 0x7fffffffUL, |
528 | seq_put_decimal_ull(m, ' ', 0); | 502 | task->blocked.sig[0] & 0x7fffffffUL, |
529 | seq_put_decimal_ull(m, ' ', 0); | 503 | sigign .sig[0] & 0x7fffffffUL, |
530 | seq_put_decimal_ll(m, ' ', task->exit_signal); | 504 | sigcatch .sig[0] & 0x7fffffffUL, |
531 | seq_put_decimal_ll(m, ' ', task_cpu(task)); | 505 | wchan, |
532 | seq_put_decimal_ull(m, ' ', task->rt_priority); | 506 | 0UL, |
533 | seq_put_decimal_ull(m, ' ', task->policy); | 507 | 0UL, |
534 | seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task)); | 508 | task->exit_signal, |
535 | seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime)); | 509 | task_cpu(task), |
536 | seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime)); | 510 | task->rt_priority, |
537 | 511 | task->policy, | |
538 | if (mm && permitted) { | 512 | (unsigned long long)delayacct_blkio_ticks(task), |
539 | seq_put_decimal_ull(m, ' ', mm->start_data); | 513 | cputime_to_clock_t(gtime), |
540 | seq_put_decimal_ull(m, ' ', mm->end_data); | 514 | cputime_to_clock_t(cgtime)); |
541 | seq_put_decimal_ull(m, ' ', mm->start_brk); | ||
542 | seq_put_decimal_ull(m, ' ', mm->arg_start); | ||
543 | seq_put_decimal_ull(m, ' ', mm->arg_end); | ||
544 | seq_put_decimal_ull(m, ' ', mm->env_start); | ||
545 | seq_put_decimal_ull(m, ' ', mm->env_end); | ||
546 | } else | ||
547 | seq_printf(m, " 0 0 0 0 0 0 0"); | ||
548 | |||
549 | if (permitted) | ||
550 | seq_put_decimal_ll(m, ' ', task->exit_code); | ||
551 | else | ||
552 | seq_put_decimal_ll(m, ' ', 0); | ||
553 | |||
554 | seq_putc(m, '\n'); | ||
555 | if (mm) | 515 | if (mm) |
556 | mmput(mm); | 516 | mmput(mm); |
557 | return 0; | 517 | return 0; |
@@ -579,143 +539,8 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, | |||
579 | size = task_statm(mm, &shared, &text, &data, &resident); | 539 | size = task_statm(mm, &shared, &text, &data, &resident); |
580 | mmput(mm); | 540 | mmput(mm); |
581 | } | 541 | } |
582 | /* | 542 | seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n", |
583 | * For quick read, open code by putting numbers directly | 543 | size, resident, shared, text, data); |
584 | * expected format is | ||
585 | * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n", | ||
586 | * size, resident, shared, text, data); | ||
587 | */ | ||
588 | seq_put_decimal_ull(m, 0, size); | ||
589 | seq_put_decimal_ull(m, ' ', resident); | ||
590 | seq_put_decimal_ull(m, ' ', shared); | ||
591 | seq_put_decimal_ull(m, ' ', text); | ||
592 | seq_put_decimal_ull(m, ' ', 0); | ||
593 | seq_put_decimal_ull(m, ' ', data); | ||
594 | seq_put_decimal_ull(m, ' ', 0); | ||
595 | seq_putc(m, '\n'); | ||
596 | |||
597 | return 0; | ||
598 | } | ||
599 | |||
600 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
601 | static struct pid * | ||
602 | get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos) | ||
603 | { | ||
604 | struct task_struct *start, *task; | ||
605 | struct pid *pid = NULL; | ||
606 | |||
607 | read_lock(&tasklist_lock); | ||
608 | |||
609 | start = pid_task(proc_pid(inode), PIDTYPE_PID); | ||
610 | if (!start) | ||
611 | goto out; | ||
612 | |||
613 | /* | ||
614 | * Lets try to continue searching first, this gives | ||
615 | * us significant speedup on children-rich processes. | ||
616 | */ | ||
617 | if (pid_prev) { | ||
618 | task = pid_task(pid_prev, PIDTYPE_PID); | ||
619 | if (task && task->real_parent == start && | ||
620 | !(list_empty(&task->sibling))) { | ||
621 | if (list_is_last(&task->sibling, &start->children)) | ||
622 | goto out; | ||
623 | task = list_first_entry(&task->sibling, | ||
624 | struct task_struct, sibling); | ||
625 | pid = get_pid(task_pid(task)); | ||
626 | goto out; | ||
627 | } | ||
628 | } | ||
629 | |||
630 | /* | ||
631 | * Slow search case. | ||
632 | * | ||
633 | * We might miss some children here if children | ||
634 | * are exited while we were not holding the lock, | ||
635 | * but it was never promised to be accurate that | ||
636 | * much. | ||
637 | * | ||
638 | * "Just suppose that the parent sleeps, but N children | ||
639 | * exit after we printed their tids. Now the slow paths | ||
640 | * skips N extra children, we miss N tasks." (c) | ||
641 | * | ||
642 | * So one need to stop or freeze the leader and all | ||
643 | * its children to get a precise result. | ||
644 | */ | ||
645 | list_for_each_entry(task, &start->children, sibling) { | ||
646 | if (pos-- == 0) { | ||
647 | pid = get_pid(task_pid(task)); | ||
648 | break; | ||
649 | } | ||
650 | } | ||
651 | |||
652 | out: | ||
653 | read_unlock(&tasklist_lock); | ||
654 | return pid; | ||
655 | } | ||
656 | |||
657 | static int children_seq_show(struct seq_file *seq, void *v) | ||
658 | { | ||
659 | struct inode *inode = seq->private; | ||
660 | pid_t pid; | ||
661 | 544 | ||
662 | pid = pid_nr_ns(v, inode->i_sb->s_fs_info); | ||
663 | return seq_printf(seq, "%d ", pid); | ||
664 | } | ||
665 | |||
666 | static void *children_seq_start(struct seq_file *seq, loff_t *pos) | ||
667 | { | ||
668 | return get_children_pid(seq->private, NULL, *pos); | ||
669 | } | ||
670 | |||
671 | static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
672 | { | ||
673 | struct pid *pid; | ||
674 | |||
675 | pid = get_children_pid(seq->private, v, *pos + 1); | ||
676 | put_pid(v); | ||
677 | |||
678 | ++*pos; | ||
679 | return pid; | ||
680 | } | ||
681 | |||
682 | static void children_seq_stop(struct seq_file *seq, void *v) | ||
683 | { | ||
684 | put_pid(v); | ||
685 | } | ||
686 | |||
687 | static const struct seq_operations children_seq_ops = { | ||
688 | .start = children_seq_start, | ||
689 | .next = children_seq_next, | ||
690 | .stop = children_seq_stop, | ||
691 | .show = children_seq_show, | ||
692 | }; | ||
693 | |||
694 | static int children_seq_open(struct inode *inode, struct file *file) | ||
695 | { | ||
696 | struct seq_file *m; | ||
697 | int ret; | ||
698 | |||
699 | ret = seq_open(file, &children_seq_ops); | ||
700 | if (ret) | ||
701 | return ret; | ||
702 | |||
703 | m = file->private_data; | ||
704 | m->private = inode; | ||
705 | |||
706 | return ret; | ||
707 | } | ||
708 | |||
709 | int children_seq_release(struct inode *inode, struct file *file) | ||
710 | { | ||
711 | seq_release(inode, file); | ||
712 | return 0; | 545 | return 0; |
713 | } | 546 | } |
714 | |||
715 | const struct file_operations proc_tid_children_operations = { | ||
716 | .open = children_seq_open, | ||
717 | .read = seq_read, | ||
718 | .llseek = seq_lseek, | ||
719 | .release = children_seq_release, | ||
720 | }; | ||
721 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | ||
diff --git a/fs/proc/base.c b/fs/proc/base.c index 9b43ff77a51..fc03d161a1d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -81,16 +81,12 @@ | |||
81 | #include <linux/oom.h> | 81 | #include <linux/oom.h> |
82 | #include <linux/elf.h> | 82 | #include <linux/elf.h> |
83 | #include <linux/pid_namespace.h> | 83 | #include <linux/pid_namespace.h> |
84 | #include <linux/user_namespace.h> | ||
85 | #include <linux/fs_struct.h> | 84 | #include <linux/fs_struct.h> |
86 | #include <linux/slab.h> | 85 | #include <linux/slab.h> |
87 | #include <linux/flex_array.h> | ||
88 | #ifdef CONFIG_HARDWALL | 86 | #ifdef CONFIG_HARDWALL |
89 | #include <asm/hardwall.h> | 87 | #include <asm/hardwall.h> |
90 | #endif | 88 | #endif |
91 | #include <trace/events/oom.h> | ||
92 | #include "internal.h" | 89 | #include "internal.h" |
93 | #include "fd.h" | ||
94 | 90 | ||
95 | /* NOTE: | 91 | /* NOTE: |
96 | * Implementing inode permission operations in /proc is almost | 92 | * Implementing inode permission operations in /proc is almost |
@@ -105,7 +101,7 @@ | |||
105 | struct pid_entry { | 101 | struct pid_entry { |
106 | char *name; | 102 | char *name; |
107 | int len; | 103 | int len; |
108 | umode_t mode; | 104 | mode_t mode; |
109 | const struct inode_operations *iop; | 105 | const struct inode_operations *iop; |
110 | const struct file_operations *fop; | 106 | const struct file_operations *fop; |
111 | union proc_op op; | 107 | union proc_op op; |
@@ -137,6 +133,12 @@ struct pid_entry { | |||
137 | NULL, &proc_single_file_operations, \ | 133 | NULL, &proc_single_file_operations, \ |
138 | { .proc_show = show } ) | 134 | { .proc_show = show } ) |
139 | 135 | ||
136 | /* ANDROID is for special files in /proc. */ | ||
137 | #define ANDROID(NAME, MODE, OTYPE) \ | ||
138 | NOD(NAME, (S_IFREG|(MODE)), \ | ||
139 | &proc_##OTYPE##_inode_operations, \ | ||
140 | &proc_##OTYPE##_operations, {}) | ||
141 | |||
140 | /* | 142 | /* |
141 | * Count the number of hardlinks for the pid_entry table, excluding the . | 143 | * Count the number of hardlinks for the pid_entry table, excluding the . |
142 | * and .. links. | 144 | * and .. links. |
@@ -169,9 +171,9 @@ static int get_task_root(struct task_struct *task, struct path *root) | |||
169 | return result; | 171 | return result; |
170 | } | 172 | } |
171 | 173 | ||
172 | static int proc_cwd_link(struct dentry *dentry, struct path *path) | 174 | static int proc_cwd_link(struct inode *inode, struct path *path) |
173 | { | 175 | { |
174 | struct task_struct *task = get_proc_task(dentry->d_inode); | 176 | struct task_struct *task = get_proc_task(inode); |
175 | int result = -ENOENT; | 177 | int result = -ENOENT; |
176 | 178 | ||
177 | if (task) { | 179 | if (task) { |
@@ -186,9 +188,9 @@ static int proc_cwd_link(struct dentry *dentry, struct path *path) | |||
186 | return result; | 188 | return result; |
187 | } | 189 | } |
188 | 190 | ||
189 | static int proc_root_link(struct dentry *dentry, struct path *path) | 191 | static int proc_root_link(struct inode *inode, struct path *path) |
190 | { | 192 | { |
191 | struct task_struct *task = get_proc_task(dentry->d_inode); | 193 | struct task_struct *task = get_proc_task(inode); |
192 | int result = -ENOENT; | 194 | int result = -ENOENT; |
193 | 195 | ||
194 | if (task) { | 196 | if (task) { |
@@ -198,6 +200,85 @@ static int proc_root_link(struct dentry *dentry, struct path *path) | |||
198 | return result; | 200 | return result; |
199 | } | 201 | } |
200 | 202 | ||
203 | static struct mm_struct *__check_mem_permission(struct task_struct *task) | ||
204 | { | ||
205 | struct mm_struct *mm; | ||
206 | |||
207 | mm = get_task_mm(task); | ||
208 | if (!mm) | ||
209 | return ERR_PTR(-EINVAL); | ||
210 | |||
211 | /* | ||
212 | * A task can always look at itself, in case it chooses | ||
213 | * to use system calls instead of load instructions. | ||
214 | */ | ||
215 | if (task == current) | ||
216 | return mm; | ||
217 | |||
218 | /* | ||
219 | * If current is actively ptrace'ing, and would also be | ||
220 | * permitted to freshly attach with ptrace now, permit it. | ||
221 | */ | ||
222 | if (task_is_stopped_or_traced(task)) { | ||
223 | int match; | ||
224 | rcu_read_lock(); | ||
225 | match = (ptrace_parent(task) == current); | ||
226 | rcu_read_unlock(); | ||
227 | if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) | ||
228 | return mm; | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * No one else is allowed. | ||
233 | */ | ||
234 | mmput(mm); | ||
235 | return ERR_PTR(-EPERM); | ||
236 | } | ||
237 | |||
238 | /* | ||
239 | * If current may access user memory in @task return a reference to the | ||
240 | * corresponding mm, otherwise ERR_PTR. | ||
241 | */ | ||
242 | static struct mm_struct *check_mem_permission(struct task_struct *task) | ||
243 | { | ||
244 | struct mm_struct *mm; | ||
245 | int err; | ||
246 | |||
247 | /* | ||
248 | * Avoid racing if task exec's as we might get a new mm but validate | ||
249 | * against old credentials. | ||
250 | */ | ||
251 | err = mutex_lock_killable(&task->signal->cred_guard_mutex); | ||
252 | if (err) | ||
253 | return ERR_PTR(err); | ||
254 | |||
255 | mm = __check_mem_permission(task); | ||
256 | mutex_unlock(&task->signal->cred_guard_mutex); | ||
257 | |||
258 | return mm; | ||
259 | } | ||
260 | |||
261 | struct mm_struct *mm_for_maps(struct task_struct *task) | ||
262 | { | ||
263 | struct mm_struct *mm; | ||
264 | int err; | ||
265 | |||
266 | err = mutex_lock_killable(&task->signal->cred_guard_mutex); | ||
267 | if (err) | ||
268 | return ERR_PTR(err); | ||
269 | |||
270 | mm = get_task_mm(task); | ||
271 | if (mm && mm != current->mm && | ||
272 | !ptrace_may_access(task, PTRACE_MODE_READ) && | ||
273 | !capable(CAP_SYS_RESOURCE)) { | ||
274 | mmput(mm); | ||
275 | mm = ERR_PTR(-EACCES); | ||
276 | } | ||
277 | mutex_unlock(&task->signal->cred_guard_mutex); | ||
278 | |||
279 | return mm; | ||
280 | } | ||
281 | |||
201 | static int proc_pid_cmdline(struct task_struct *task, char * buffer) | 282 | static int proc_pid_cmdline(struct task_struct *task, char * buffer) |
202 | { | 283 | { |
203 | int res = 0; | 284 | int res = 0; |
@@ -237,7 +318,7 @@ out: | |||
237 | 318 | ||
238 | static int proc_pid_auxv(struct task_struct *task, char *buffer) | 319 | static int proc_pid_auxv(struct task_struct *task, char *buffer) |
239 | { | 320 | { |
240 | struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ); | 321 | struct mm_struct *mm = mm_for_maps(task); |
241 | int res = PTR_ERR(mm); | 322 | int res = PTR_ERR(mm); |
242 | if (mm && !IS_ERR(mm)) { | 323 | if (mm && !IS_ERR(mm)) { |
243 | unsigned int nwords = 0; | 324 | unsigned int nwords = 0; |
@@ -405,13 +486,12 @@ static const struct file_operations proc_lstats_operations = { | |||
405 | 486 | ||
406 | static int proc_oom_score(struct task_struct *task, char *buffer) | 487 | static int proc_oom_score(struct task_struct *task, char *buffer) |
407 | { | 488 | { |
408 | unsigned long totalpages = totalram_pages + total_swap_pages; | ||
409 | unsigned long points = 0; | 489 | unsigned long points = 0; |
410 | 490 | ||
411 | read_lock(&tasklist_lock); | 491 | read_lock(&tasklist_lock); |
412 | if (pid_alive(task)) | 492 | if (pid_alive(task)) |
413 | points = oom_badness(task, NULL, NULL, totalpages) * | 493 | points = oom_badness(task, NULL, NULL, |
414 | 1000 / totalpages; | 494 | totalram_pages + total_swap_pages); |
415 | read_unlock(&tasklist_lock); | 495 | read_unlock(&tasklist_lock); |
416 | return sprintf(buffer, "%lu\n", points); | 496 | return sprintf(buffer, "%lu\n", points); |
417 | } | 497 | } |
@@ -542,59 +622,134 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr) | |||
542 | if (error) | 622 | if (error) |
543 | return error; | 623 | return error; |
544 | 624 | ||
625 | if ((attr->ia_valid & ATTR_SIZE) && | ||
626 | attr->ia_size != i_size_read(inode)) { | ||
627 | error = vmtruncate(inode, attr->ia_size); | ||
628 | if (error) | ||
629 | return error; | ||
630 | } | ||
631 | |||
545 | setattr_copy(inode, attr); | 632 | setattr_copy(inode, attr); |
546 | mark_inode_dirty(inode); | 633 | mark_inode_dirty(inode); |
547 | return 0; | 634 | return 0; |
548 | } | 635 | } |
549 | 636 | ||
550 | /* | 637 | static const struct inode_operations proc_def_inode_operations = { |
551 | * May current process learn task's sched/cmdline info (for hide_pid_min=1) | 638 | .setattr = proc_setattr, |
552 | * or euid/egid (for hide_pid_min=2)? | 639 | }; |
553 | */ | 640 | |
554 | static bool has_pid_permissions(struct pid_namespace *pid, | 641 | static int mounts_open_common(struct inode *inode, struct file *file, |
555 | struct task_struct *task, | 642 | const struct seq_operations *op) |
556 | int hide_pid_min) | ||
557 | { | 643 | { |
558 | if (pid->hide_pid < hide_pid_min) | 644 | struct task_struct *task = get_proc_task(inode); |
559 | return true; | 645 | struct nsproxy *nsp; |
560 | if (in_group_p(pid->pid_gid)) | 646 | struct mnt_namespace *ns = NULL; |
561 | return true; | 647 | struct path root; |
562 | return ptrace_may_access(task, PTRACE_MODE_READ); | 648 | struct proc_mounts *p; |
649 | int ret = -EINVAL; | ||
650 | |||
651 | if (task) { | ||
652 | rcu_read_lock(); | ||
653 | nsp = task_nsproxy(task); | ||
654 | if (nsp) { | ||
655 | ns = nsp->mnt_ns; | ||
656 | if (ns) | ||
657 | get_mnt_ns(ns); | ||
658 | } | ||
659 | rcu_read_unlock(); | ||
660 | if (ns && get_task_root(task, &root) == 0) | ||
661 | ret = 0; | ||
662 | put_task_struct(task); | ||
663 | } | ||
664 | |||
665 | if (!ns) | ||
666 | goto err; | ||
667 | if (ret) | ||
668 | goto err_put_ns; | ||
669 | |||
670 | ret = -ENOMEM; | ||
671 | p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); | ||
672 | if (!p) | ||
673 | goto err_put_path; | ||
674 | |||
675 | file->private_data = &p->m; | ||
676 | ret = seq_open(file, op); | ||
677 | if (ret) | ||
678 | goto err_free; | ||
679 | |||
680 | p->m.private = p; | ||
681 | p->ns = ns; | ||
682 | p->root = root; | ||
683 | p->m.poll_event = ns->event; | ||
684 | |||
685 | return 0; | ||
686 | |||
687 | err_free: | ||
688 | kfree(p); | ||
689 | err_put_path: | ||
690 | path_put(&root); | ||
691 | err_put_ns: | ||
692 | put_mnt_ns(ns); | ||
693 | err: | ||
694 | return ret; | ||
563 | } | 695 | } |
564 | 696 | ||
697 | static int mounts_release(struct inode *inode, struct file *file) | ||
698 | { | ||
699 | struct proc_mounts *p = file->private_data; | ||
700 | path_put(&p->root); | ||
701 | put_mnt_ns(p->ns); | ||
702 | return seq_release(inode, file); | ||
703 | } | ||
565 | 704 | ||
566 | static int proc_pid_permission(struct inode *inode, int mask) | 705 | static unsigned mounts_poll(struct file *file, poll_table *wait) |
567 | { | 706 | { |
568 | struct pid_namespace *pid = inode->i_sb->s_fs_info; | 707 | struct proc_mounts *p = file->private_data; |
569 | struct task_struct *task; | 708 | unsigned res = POLLIN | POLLRDNORM; |
570 | bool has_perms; | ||
571 | 709 | ||
572 | task = get_proc_task(inode); | 710 | poll_wait(file, &p->ns->poll, wait); |
573 | if (!task) | 711 | if (mnt_had_events(p)) |
574 | return -ESRCH; | 712 | res |= POLLERR | POLLPRI; |
575 | has_perms = has_pid_permissions(pid, task, 1); | ||
576 | put_task_struct(task); | ||
577 | 713 | ||
578 | if (!has_perms) { | 714 | return res; |
579 | if (pid->hide_pid == 2) { | 715 | } |
580 | /* | ||
581 | * Let's make getdents(), stat(), and open() | ||
582 | * consistent with each other. If a process | ||
583 | * may not stat() a file, it shouldn't be seen | ||
584 | * in procfs at all. | ||
585 | */ | ||
586 | return -ENOENT; | ||
587 | } | ||
588 | 716 | ||
589 | return -EPERM; | 717 | static int mounts_open(struct inode *inode, struct file *file) |
590 | } | 718 | { |
591 | return generic_permission(inode, mask); | 719 | return mounts_open_common(inode, file, &mounts_op); |
592 | } | 720 | } |
593 | 721 | ||
722 | static const struct file_operations proc_mounts_operations = { | ||
723 | .open = mounts_open, | ||
724 | .read = seq_read, | ||
725 | .llseek = seq_lseek, | ||
726 | .release = mounts_release, | ||
727 | .poll = mounts_poll, | ||
728 | }; | ||
594 | 729 | ||
730 | static int mountinfo_open(struct inode *inode, struct file *file) | ||
731 | { | ||
732 | return mounts_open_common(inode, file, &mountinfo_op); | ||
733 | } | ||
595 | 734 | ||
596 | static const struct inode_operations proc_def_inode_operations = { | 735 | static const struct file_operations proc_mountinfo_operations = { |
597 | .setattr = proc_setattr, | 736 | .open = mountinfo_open, |
737 | .read = seq_read, | ||
738 | .llseek = seq_lseek, | ||
739 | .release = mounts_release, | ||
740 | .poll = mounts_poll, | ||
741 | }; | ||
742 | |||
743 | static int mountstats_open(struct inode *inode, struct file *file) | ||
744 | { | ||
745 | return mounts_open_common(inode, file, &mountstats_op); | ||
746 | } | ||
747 | |||
748 | static const struct file_operations proc_mountstats_operations = { | ||
749 | .open = mountstats_open, | ||
750 | .read = seq_read, | ||
751 | .llseek = seq_lseek, | ||
752 | .release = mounts_release, | ||
598 | }; | 753 | }; |
599 | 754 | ||
600 | #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ | 755 | #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ |
@@ -666,105 +821,139 @@ static const struct file_operations proc_single_file_operations = { | |||
666 | .release = single_release, | 821 | .release = single_release, |
667 | }; | 822 | }; |
668 | 823 | ||
669 | static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) | 824 | static int mem_open(struct inode* inode, struct file* file) |
825 | { | ||
826 | file->private_data = (void*)((long)current->self_exec_id); | ||
827 | /* OK to pass negative loff_t, we can catch out-of-range */ | ||
828 | file->f_mode |= FMODE_UNSIGNED_OFFSET; | ||
829 | return 0; | ||
830 | } | ||
831 | |||
832 | static ssize_t mem_read(struct file * file, char __user * buf, | ||
833 | size_t count, loff_t *ppos) | ||
670 | { | 834 | { |
671 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | 835 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); |
836 | char *page; | ||
837 | unsigned long src = *ppos; | ||
838 | int ret = -ESRCH; | ||
672 | struct mm_struct *mm; | 839 | struct mm_struct *mm; |
673 | 840 | ||
674 | if (!task) | 841 | if (!task) |
675 | return -ESRCH; | 842 | goto out_no_task; |
676 | 843 | ||
677 | mm = mm_access(task, mode); | 844 | ret = -ENOMEM; |
678 | put_task_struct(task); | 845 | page = (char *)__get_free_page(GFP_TEMPORARY); |
846 | if (!page) | ||
847 | goto out; | ||
679 | 848 | ||
849 | mm = check_mem_permission(task); | ||
850 | ret = PTR_ERR(mm); | ||
680 | if (IS_ERR(mm)) | 851 | if (IS_ERR(mm)) |
681 | return PTR_ERR(mm); | 852 | goto out_free; |
682 | |||
683 | if (mm) { | ||
684 | /* ensure this mm_struct can't be freed */ | ||
685 | atomic_inc(&mm->mm_count); | ||
686 | /* but do not pin its memory */ | ||
687 | mmput(mm); | ||
688 | } | ||
689 | 853 | ||
690 | file->private_data = mm; | 854 | ret = -EIO; |
855 | |||
856 | if (file->private_data != (void*)((long)current->self_exec_id)) | ||
857 | goto out_put; | ||
691 | 858 | ||
692 | return 0; | 859 | ret = 0; |
693 | } | 860 | |
861 | while (count > 0) { | ||
862 | int this_len, retval; | ||
694 | 863 | ||
695 | static int mem_open(struct inode *inode, struct file *file) | 864 | this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; |
696 | { | 865 | retval = access_remote_vm(mm, src, page, this_len, 0); |
697 | int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH); | 866 | if (!retval) { |
867 | if (!ret) | ||
868 | ret = -EIO; | ||
869 | break; | ||
870 | } | ||
698 | 871 | ||
699 | /* OK to pass negative loff_t, we can catch out-of-range */ | 872 | if (copy_to_user(buf, page, retval)) { |
700 | file->f_mode |= FMODE_UNSIGNED_OFFSET; | 873 | ret = -EFAULT; |
874 | break; | ||
875 | } | ||
876 | |||
877 | ret += retval; | ||
878 | src += retval; | ||
879 | buf += retval; | ||
880 | count -= retval; | ||
881 | } | ||
882 | *ppos = src; | ||
701 | 883 | ||
884 | out_put: | ||
885 | mmput(mm); | ||
886 | out_free: | ||
887 | free_page((unsigned long) page); | ||
888 | out: | ||
889 | put_task_struct(task); | ||
890 | out_no_task: | ||
702 | return ret; | 891 | return ret; |
703 | } | 892 | } |
704 | 893 | ||
705 | static ssize_t mem_rw(struct file *file, char __user *buf, | 894 | #define mem_write NULL |
706 | size_t count, loff_t *ppos, int write) | 895 | |
896 | #ifndef mem_write | ||
897 | /* This is a security hazard */ | ||
898 | static ssize_t mem_write(struct file * file, const char __user *buf, | ||
899 | size_t count, loff_t *ppos) | ||
707 | { | 900 | { |
708 | struct mm_struct *mm = file->private_data; | 901 | int copied; |
709 | unsigned long addr = *ppos; | ||
710 | ssize_t copied; | ||
711 | char *page; | 902 | char *page; |
903 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | ||
904 | unsigned long dst = *ppos; | ||
905 | struct mm_struct *mm; | ||
712 | 906 | ||
713 | if (!mm) | 907 | copied = -ESRCH; |
714 | return 0; | 908 | if (!task) |
909 | goto out_no_task; | ||
715 | 910 | ||
911 | copied = -ENOMEM; | ||
716 | page = (char *)__get_free_page(GFP_TEMPORARY); | 912 | page = (char *)__get_free_page(GFP_TEMPORARY); |
717 | if (!page) | 913 | if (!page) |
718 | return -ENOMEM; | 914 | goto out_task; |
719 | 915 | ||
720 | copied = 0; | 916 | mm = check_mem_permission(task); |
721 | if (!atomic_inc_not_zero(&mm->mm_users)) | 917 | copied = PTR_ERR(mm); |
722 | goto free; | 918 | if (IS_ERR(mm)) |
919 | goto out_free; | ||
723 | 920 | ||
921 | copied = -EIO; | ||
922 | if (file->private_data != (void *)((long)current->self_exec_id)) | ||
923 | goto out_mm; | ||
924 | |||
925 | copied = 0; | ||
724 | while (count > 0) { | 926 | while (count > 0) { |
725 | int this_len = min_t(int, count, PAGE_SIZE); | 927 | int this_len, retval; |
726 | 928 | ||
727 | if (write && copy_from_user(page, buf, this_len)) { | 929 | this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; |
930 | if (copy_from_user(page, buf, this_len)) { | ||
728 | copied = -EFAULT; | 931 | copied = -EFAULT; |
729 | break; | 932 | break; |
730 | } | 933 | } |
731 | 934 | retval = access_remote_vm(mm, dst, page, this_len, 1); | |
732 | this_len = access_remote_vm(mm, addr, page, this_len, write); | 935 | if (!retval) { |
733 | if (!this_len) { | ||
734 | if (!copied) | 936 | if (!copied) |
735 | copied = -EIO; | 937 | copied = -EIO; |
736 | break; | 938 | break; |
737 | } | 939 | } |
738 | 940 | copied += retval; | |
739 | if (!write && copy_to_user(buf, page, this_len)) { | 941 | buf += retval; |
740 | copied = -EFAULT; | 942 | dst += retval; |
741 | break; | 943 | count -= retval; |
742 | } | ||
743 | |||
744 | buf += this_len; | ||
745 | addr += this_len; | ||
746 | copied += this_len; | ||
747 | count -= this_len; | ||
748 | } | 944 | } |
749 | *ppos = addr; | 945 | *ppos = dst; |
750 | 946 | ||
947 | out_mm: | ||
751 | mmput(mm); | 948 | mmput(mm); |
752 | free: | 949 | out_free: |
753 | free_page((unsigned long) page); | 950 | free_page((unsigned long) page); |
951 | out_task: | ||
952 | put_task_struct(task); | ||
953 | out_no_task: | ||
754 | return copied; | 954 | return copied; |
755 | } | 955 | } |
756 | 956 | #endif | |
757 | static ssize_t mem_read(struct file *file, char __user *buf, | ||
758 | size_t count, loff_t *ppos) | ||
759 | { | ||
760 | return mem_rw(file, buf, count, ppos, 0); | ||
761 | } | ||
762 | |||
763 | static ssize_t mem_write(struct file *file, const char __user *buf, | ||
764 | size_t count, loff_t *ppos) | ||
765 | { | ||
766 | return mem_rw(file, (char __user*)buf, count, ppos, 1); | ||
767 | } | ||
768 | 957 | ||
769 | loff_t mem_lseek(struct file *file, loff_t offset, int orig) | 958 | loff_t mem_lseek(struct file *file, loff_t offset, int orig) |
770 | { | 959 | { |
@@ -782,58 +971,49 @@ loff_t mem_lseek(struct file *file, loff_t offset, int orig) | |||
782 | return file->f_pos; | 971 | return file->f_pos; |
783 | } | 972 | } |
784 | 973 | ||
785 | static int mem_release(struct inode *inode, struct file *file) | ||
786 | { | ||
787 | struct mm_struct *mm = file->private_data; | ||
788 | if (mm) | ||
789 | mmdrop(mm); | ||
790 | return 0; | ||
791 | } | ||
792 | |||
793 | static const struct file_operations proc_mem_operations = { | 974 | static const struct file_operations proc_mem_operations = { |
794 | .llseek = mem_lseek, | 975 | .llseek = mem_lseek, |
795 | .read = mem_read, | 976 | .read = mem_read, |
796 | .write = mem_write, | 977 | .write = mem_write, |
797 | .open = mem_open, | 978 | .open = mem_open, |
798 | .release = mem_release, | ||
799 | }; | 979 | }; |
800 | 980 | ||
801 | static int environ_open(struct inode *inode, struct file *file) | ||
802 | { | ||
803 | return __mem_open(inode, file, PTRACE_MODE_READ); | ||
804 | } | ||
805 | |||
806 | static ssize_t environ_read(struct file *file, char __user *buf, | 981 | static ssize_t environ_read(struct file *file, char __user *buf, |
807 | size_t count, loff_t *ppos) | 982 | size_t count, loff_t *ppos) |
808 | { | 983 | { |
984 | struct task_struct *task = get_proc_task(file->f_dentry->d_inode); | ||
809 | char *page; | 985 | char *page; |
810 | unsigned long src = *ppos; | 986 | unsigned long src = *ppos; |
811 | int ret = 0; | 987 | int ret = -ESRCH; |
812 | struct mm_struct *mm = file->private_data; | 988 | struct mm_struct *mm; |
813 | 989 | ||
814 | if (!mm) | 990 | if (!task) |
815 | return 0; | 991 | goto out_no_task; |
816 | 992 | ||
993 | ret = -ENOMEM; | ||
817 | page = (char *)__get_free_page(GFP_TEMPORARY); | 994 | page = (char *)__get_free_page(GFP_TEMPORARY); |
818 | if (!page) | 995 | if (!page) |
819 | return -ENOMEM; | 996 | goto out; |
997 | |||
998 | |||
999 | mm = mm_for_maps(task); | ||
1000 | ret = PTR_ERR(mm); | ||
1001 | if (!mm || IS_ERR(mm)) | ||
1002 | goto out_free; | ||
820 | 1003 | ||
821 | ret = 0; | 1004 | ret = 0; |
822 | if (!atomic_inc_not_zero(&mm->mm_users)) | ||
823 | goto free; | ||
824 | while (count > 0) { | 1005 | while (count > 0) { |
825 | size_t this_len, max_len; | 1006 | int this_len, retval, max_len; |
826 | int retval; | ||
827 | |||
828 | if (src >= (mm->env_end - mm->env_start)) | ||
829 | break; | ||
830 | 1007 | ||
831 | this_len = mm->env_end - (mm->env_start + src); | 1008 | this_len = mm->env_end - (mm->env_start + src); |
832 | 1009 | ||
833 | max_len = min_t(size_t, PAGE_SIZE, count); | 1010 | if (this_len <= 0) |
834 | this_len = min(max_len, this_len); | 1011 | break; |
1012 | |||
1013 | max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; | ||
1014 | this_len = (this_len > max_len) ? max_len : this_len; | ||
835 | 1015 | ||
836 | retval = access_remote_vm(mm, (mm->env_start + src), | 1016 | retval = access_process_vm(task, (mm->env_start + src), |
837 | page, this_len, 0); | 1017 | page, this_len, 0); |
838 | 1018 | ||
839 | if (retval <= 0) { | 1019 | if (retval <= 0) { |
@@ -852,50 +1032,51 @@ static ssize_t environ_read(struct file *file, char __user *buf, | |||
852 | count -= retval; | 1032 | count -= retval; |
853 | } | 1033 | } |
854 | *ppos = src; | 1034 | *ppos = src; |
855 | mmput(mm); | ||
856 | 1035 | ||
857 | free: | 1036 | mmput(mm); |
1037 | out_free: | ||
858 | free_page((unsigned long) page); | 1038 | free_page((unsigned long) page); |
1039 | out: | ||
1040 | put_task_struct(task); | ||
1041 | out_no_task: | ||
859 | return ret; | 1042 | return ret; |
860 | } | 1043 | } |
861 | 1044 | ||
862 | static const struct file_operations proc_environ_operations = { | 1045 | static const struct file_operations proc_environ_operations = { |
863 | .open = environ_open, | ||
864 | .read = environ_read, | 1046 | .read = environ_read, |
865 | .llseek = generic_file_llseek, | 1047 | .llseek = generic_file_llseek, |
866 | .release = mem_release, | ||
867 | }; | 1048 | }; |
868 | 1049 | ||
869 | static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, | 1050 | static ssize_t oom_adjust_read(struct file *file, char __user *buf, |
870 | loff_t *ppos) | 1051 | size_t count, loff_t *ppos) |
871 | { | 1052 | { |
872 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | 1053 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); |
873 | char buffer[PROC_NUMBUF]; | 1054 | char buffer[PROC_NUMBUF]; |
874 | int oom_adj = OOM_ADJUST_MIN; | ||
875 | size_t len; | 1055 | size_t len; |
1056 | int oom_adjust = OOM_DISABLE; | ||
876 | unsigned long flags; | 1057 | unsigned long flags; |
877 | 1058 | ||
878 | if (!task) | 1059 | if (!task) |
879 | return -ESRCH; | 1060 | return -ESRCH; |
1061 | |||
880 | if (lock_task_sighand(task, &flags)) { | 1062 | if (lock_task_sighand(task, &flags)) { |
881 | if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) | 1063 | oom_adjust = task->signal->oom_adj; |
882 | oom_adj = OOM_ADJUST_MAX; | ||
883 | else | ||
884 | oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / | ||
885 | OOM_SCORE_ADJ_MAX; | ||
886 | unlock_task_sighand(task, &flags); | 1064 | unlock_task_sighand(task, &flags); |
887 | } | 1065 | } |
1066 | |||
888 | put_task_struct(task); | 1067 | put_task_struct(task); |
889 | len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); | 1068 | |
1069 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); | ||
1070 | |||
890 | return simple_read_from_buffer(buf, count, ppos, buffer, len); | 1071 | return simple_read_from_buffer(buf, count, ppos, buffer, len); |
891 | } | 1072 | } |
892 | 1073 | ||
893 | static ssize_t oom_adj_write(struct file *file, const char __user *buf, | 1074 | static ssize_t oom_adjust_write(struct file *file, const char __user *buf, |
894 | size_t count, loff_t *ppos) | 1075 | size_t count, loff_t *ppos) |
895 | { | 1076 | { |
896 | struct task_struct *task; | 1077 | struct task_struct *task; |
897 | char buffer[PROC_NUMBUF]; | 1078 | char buffer[PROC_NUMBUF]; |
898 | int oom_adj; | 1079 | int oom_adjust; |
899 | unsigned long flags; | 1080 | unsigned long flags; |
900 | int err; | 1081 | int err; |
901 | 1082 | ||
@@ -907,11 +1088,11 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf, | |||
907 | goto out; | 1088 | goto out; |
908 | } | 1089 | } |
909 | 1090 | ||
910 | err = kstrtoint(strstrip(buffer), 0, &oom_adj); | 1091 | err = kstrtoint(strstrip(buffer), 0, &oom_adjust); |
911 | if (err) | 1092 | if (err) |
912 | goto out; | 1093 | goto out; |
913 | if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && | 1094 | if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && |
914 | oom_adj != OOM_DISABLE) { | 1095 | oom_adjust != OOM_DISABLE) { |
915 | err = -EINVAL; | 1096 | err = -EINVAL; |
916 | goto out; | 1097 | goto out; |
917 | } | 1098 | } |
@@ -933,31 +1114,35 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf, | |||
933 | goto err_task_lock; | 1114 | goto err_task_lock; |
934 | } | 1115 | } |
935 | 1116 | ||
936 | /* | 1117 | if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) { |
937 | * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum | ||
938 | * value is always attainable. | ||
939 | */ | ||
940 | if (oom_adj == OOM_ADJUST_MAX) | ||
941 | oom_adj = OOM_SCORE_ADJ_MAX; | ||
942 | else | ||
943 | oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; | ||
944 | |||
945 | if (oom_adj < task->signal->oom_score_adj && | ||
946 | !capable(CAP_SYS_RESOURCE)) { | ||
947 | err = -EACCES; | 1118 | err = -EACCES; |
948 | goto err_sighand; | 1119 | goto err_sighand; |
949 | } | 1120 | } |
950 | 1121 | ||
1122 | if (oom_adjust != task->signal->oom_adj) { | ||
1123 | if (oom_adjust == OOM_DISABLE) | ||
1124 | atomic_inc(&task->mm->oom_disable_count); | ||
1125 | if (task->signal->oom_adj == OOM_DISABLE) | ||
1126 | atomic_dec(&task->mm->oom_disable_count); | ||
1127 | } | ||
1128 | |||
951 | /* | 1129 | /* |
952 | * /proc/pid/oom_adj is provided for legacy purposes, ask users to use | 1130 | * Warn that /proc/pid/oom_adj is deprecated, see |
953 | * /proc/pid/oom_score_adj instead. | 1131 | * Documentation/feature-removal-schedule.txt. |
954 | */ | 1132 | */ |
955 | printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", | 1133 | printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", |
956 | current->comm, task_pid_nr(current), task_pid_nr(task), | 1134 | current->comm, task_pid_nr(current), task_pid_nr(task), |
957 | task_pid_nr(task)); | 1135 | task_pid_nr(task)); |
958 | 1136 | task->signal->oom_adj = oom_adjust; | |
959 | task->signal->oom_score_adj = oom_adj; | 1137 | /* |
960 | trace_oom_score_adj_update(task); | 1138 | * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum |
1139 | * value is always attainable. | ||
1140 | */ | ||
1141 | if (task->signal->oom_adj == OOM_ADJUST_MAX) | ||
1142 | task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX; | ||
1143 | else | ||
1144 | task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / | ||
1145 | -OOM_DISABLE; | ||
961 | err_sighand: | 1146 | err_sighand: |
962 | unlock_task_sighand(task, &flags); | 1147 | unlock_task_sighand(task, &flags); |
963 | err_task_lock: | 1148 | err_task_lock: |
@@ -967,9 +1152,41 @@ out: | |||
967 | return err < 0 ? err : count; | 1152 | return err < 0 ? err : count; |
968 | } | 1153 | } |
969 | 1154 | ||
970 | static const struct file_operations proc_oom_adj_operations = { | 1155 | static int oom_adjust_permission(struct inode *inode, int mask) |
971 | .read = oom_adj_read, | 1156 | { |
972 | .write = oom_adj_write, | 1157 | uid_t uid; |
1158 | struct task_struct *p; | ||
1159 | |||
1160 | if (mask & MAY_NOT_BLOCK) | ||
1161 | return -ECHILD; | ||
1162 | |||
1163 | p = get_proc_task(inode); | ||
1164 | if(p) { | ||
1165 | uid = task_uid(p); | ||
1166 | put_task_struct(p); | ||
1167 | } | ||
1168 | |||
1169 | /* | ||
1170 | * System Server (uid == 1000) is granted access to oom_adj of all | ||
1171 | * android applications (uid > 10000) as and services (uid >= 1000) | ||
1172 | */ | ||
1173 | if (p && (current_fsuid() == 1000) && (uid >= 1000)) { | ||
1174 | if (inode->i_mode >> 6 & mask) { | ||
1175 | return 0; | ||
1176 | } | ||
1177 | } | ||
1178 | |||
1179 | /* Fall back to default. */ | ||
1180 | return generic_permission(inode, mask); | ||
1181 | } | ||
1182 | |||
1183 | static const struct inode_operations proc_oom_adjust_inode_operations = { | ||
1184 | .permission = oom_adjust_permission, | ||
1185 | }; | ||
1186 | |||
1187 | static const struct file_operations proc_oom_adjust_operations = { | ||
1188 | .read = oom_adjust_read, | ||
1189 | .write = oom_adjust_write, | ||
973 | .llseek = generic_file_llseek, | 1190 | .llseek = generic_file_llseek, |
974 | }; | 1191 | }; |
975 | 1192 | ||
@@ -978,7 +1195,7 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf, | |||
978 | { | 1195 | { |
979 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | 1196 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); |
980 | char buffer[PROC_NUMBUF]; | 1197 | char buffer[PROC_NUMBUF]; |
981 | short oom_score_adj = OOM_SCORE_ADJ_MIN; | 1198 | int oom_score_adj = OOM_SCORE_ADJ_MIN; |
982 | unsigned long flags; | 1199 | unsigned long flags; |
983 | size_t len; | 1200 | size_t len; |
984 | 1201 | ||
@@ -989,7 +1206,7 @@ static ssize_t oom_score_adj_read(struct file *file, char __user *buf, | |||
989 | unlock_task_sighand(task, &flags); | 1206 | unlock_task_sighand(task, &flags); |
990 | } | 1207 | } |
991 | put_task_struct(task); | 1208 | put_task_struct(task); |
992 | len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj); | 1209 | len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj); |
993 | return simple_read_from_buffer(buf, count, ppos, buffer, len); | 1210 | return simple_read_from_buffer(buf, count, ppos, buffer, len); |
994 | } | 1211 | } |
995 | 1212 | ||
@@ -1036,17 +1253,30 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, | |||
1036 | goto err_task_lock; | 1253 | goto err_task_lock; |
1037 | } | 1254 | } |
1038 | 1255 | ||
1039 | if ((short)oom_score_adj < task->signal->oom_score_adj_min && | 1256 | if (oom_score_adj < task->signal->oom_score_adj_min && |
1040 | !capable(CAP_SYS_RESOURCE)) { | 1257 | !capable(CAP_SYS_RESOURCE)) { |
1041 | err = -EACCES; | 1258 | err = -EACCES; |
1042 | goto err_sighand; | 1259 | goto err_sighand; |
1043 | } | 1260 | } |
1044 | 1261 | ||
1045 | task->signal->oom_score_adj = (short)oom_score_adj; | 1262 | if (oom_score_adj != task->signal->oom_score_adj) { |
1263 | if (oom_score_adj == OOM_SCORE_ADJ_MIN) | ||
1264 | atomic_inc(&task->mm->oom_disable_count); | ||
1265 | if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | ||
1266 | atomic_dec(&task->mm->oom_disable_count); | ||
1267 | } | ||
1268 | task->signal->oom_score_adj = oom_score_adj; | ||
1046 | if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) | 1269 | if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) |
1047 | task->signal->oom_score_adj_min = (short)oom_score_adj; | 1270 | task->signal->oom_score_adj_min = oom_score_adj; |
1048 | trace_oom_score_adj_update(task); | 1271 | /* |
1049 | 1272 | * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is | |
1273 | * always attainable. | ||
1274 | */ | ||
1275 | if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | ||
1276 | task->signal->oom_adj = OOM_DISABLE; | ||
1277 | else | ||
1278 | task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) / | ||
1279 | OOM_SCORE_ADJ_MAX; | ||
1050 | err_sighand: | 1280 | err_sighand: |
1051 | unlock_task_sighand(task, &flags); | 1281 | unlock_task_sighand(task, &flags); |
1052 | err_task_lock: | 1282 | err_task_lock: |
@@ -1075,8 +1305,7 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf, | |||
1075 | if (!task) | 1305 | if (!task) |
1076 | return -ESRCH; | 1306 | return -ESRCH; |
1077 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", | 1307 | length = scnprintf(tmpbuf, TMPBUFLEN, "%u", |
1078 | from_kuid(file->f_cred->user_ns, | 1308 | audit_get_loginuid(task)); |
1079 | audit_get_loginuid(task))); | ||
1080 | put_task_struct(task); | 1309 | put_task_struct(task); |
1081 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); | 1310 | return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); |
1082 | } | 1311 | } |
@@ -1088,7 +1317,9 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
1088 | char *page, *tmp; | 1317 | char *page, *tmp; |
1089 | ssize_t length; | 1318 | ssize_t length; |
1090 | uid_t loginuid; | 1319 | uid_t loginuid; |
1091 | kuid_t kloginuid; | 1320 | |
1321 | if (!capable(CAP_AUDIT_CONTROL)) | ||
1322 | return -EPERM; | ||
1092 | 1323 | ||
1093 | rcu_read_lock(); | 1324 | rcu_read_lock(); |
1094 | if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { | 1325 | if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { |
@@ -1118,13 +1349,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
1118 | goto out_free_page; | 1349 | goto out_free_page; |
1119 | 1350 | ||
1120 | } | 1351 | } |
1121 | kloginuid = make_kuid(file->f_cred->user_ns, loginuid); | 1352 | length = audit_set_loginuid(current, loginuid); |
1122 | if (!uid_valid(kloginuid)) { | ||
1123 | length = -EINVAL; | ||
1124 | goto out_free_page; | ||
1125 | } | ||
1126 | |||
1127 | length = audit_set_loginuid(kloginuid); | ||
1128 | if (likely(length == 0)) | 1353 | if (likely(length == 0)) |
1129 | length = count; | 1354 | length = count; |
1130 | 1355 | ||
@@ -1308,7 +1533,8 @@ sched_autogroup_write(struct file *file, const char __user *buf, | |||
1308 | if (!p) | 1533 | if (!p) |
1309 | return -ESRCH; | 1534 | return -ESRCH; |
1310 | 1535 | ||
1311 | err = proc_sched_autogroup_set_nice(p, nice); | 1536 | err = nice; |
1537 | err = proc_sched_autogroup_set_nice(p, &err); | ||
1312 | if (err) | 1538 | if (err) |
1313 | count = err; | 1539 | count = err; |
1314 | 1540 | ||
@@ -1398,13 +1624,13 @@ static const struct file_operations proc_pid_set_comm_operations = { | |||
1398 | .release = single_release, | 1624 | .release = single_release, |
1399 | }; | 1625 | }; |
1400 | 1626 | ||
1401 | static int proc_exe_link(struct dentry *dentry, struct path *exe_path) | 1627 | static int proc_exe_link(struct inode *inode, struct path *exe_path) |
1402 | { | 1628 | { |
1403 | struct task_struct *task; | 1629 | struct task_struct *task; |
1404 | struct mm_struct *mm; | 1630 | struct mm_struct *mm; |
1405 | struct file *exe_file; | 1631 | struct file *exe_file; |
1406 | 1632 | ||
1407 | task = get_proc_task(dentry->d_inode); | 1633 | task = get_proc_task(inode); |
1408 | if (!task) | 1634 | if (!task) |
1409 | return -ENOENT; | 1635 | return -ENOENT; |
1410 | mm = get_task_mm(task); | 1636 | mm = get_task_mm(task); |
@@ -1425,19 +1651,16 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path) | |||
1425 | static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) | 1651 | static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) |
1426 | { | 1652 | { |
1427 | struct inode *inode = dentry->d_inode; | 1653 | struct inode *inode = dentry->d_inode; |
1428 | struct path path; | ||
1429 | int error = -EACCES; | 1654 | int error = -EACCES; |
1430 | 1655 | ||
1656 | /* We don't need a base pointer in the /proc filesystem */ | ||
1657 | path_put(&nd->path); | ||
1658 | |||
1431 | /* Are we allowed to snoop on the tasks file descriptors? */ | 1659 | /* Are we allowed to snoop on the tasks file descriptors? */ |
1432 | if (!proc_fd_access_allowed(inode)) | 1660 | if (!proc_fd_access_allowed(inode)) |
1433 | goto out; | 1661 | goto out; |
1434 | 1662 | ||
1435 | error = PROC_I(inode)->op.proc_get_link(dentry, &path); | 1663 | error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); |
1436 | if (error) | ||
1437 | goto out; | ||
1438 | |||
1439 | nd_jump_link(nd, &path); | ||
1440 | return NULL; | ||
1441 | out: | 1664 | out: |
1442 | return ERR_PTR(error); | 1665 | return ERR_PTR(error); |
1443 | } | 1666 | } |
@@ -1476,7 +1699,7 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b | |||
1476 | if (!proc_fd_access_allowed(inode)) | 1699 | if (!proc_fd_access_allowed(inode)) |
1477 | goto out; | 1700 | goto out; |
1478 | 1701 | ||
1479 | error = PROC_I(inode)->op.proc_get_link(dentry, &path); | 1702 | error = PROC_I(inode)->op.proc_get_link(inode, &path); |
1480 | if (error) | 1703 | if (error) |
1481 | goto out; | 1704 | goto out; |
1482 | 1705 | ||
@@ -1486,7 +1709,7 @@ out: | |||
1486 | return error; | 1709 | return error; |
1487 | } | 1710 | } |
1488 | 1711 | ||
1489 | const struct inode_operations proc_pid_link_inode_operations = { | 1712 | static const struct inode_operations proc_pid_link_inode_operations = { |
1490 | .readlink = proc_pid_readlink, | 1713 | .readlink = proc_pid_readlink, |
1491 | .follow_link = proc_pid_follow_link, | 1714 | .follow_link = proc_pid_follow_link, |
1492 | .setattr = proc_setattr, | 1715 | .setattr = proc_setattr, |
@@ -1495,6 +1718,21 @@ const struct inode_operations proc_pid_link_inode_operations = { | |||
1495 | 1718 | ||
1496 | /* building an inode */ | 1719 | /* building an inode */ |
1497 | 1720 | ||
1721 | static int task_dumpable(struct task_struct *task) | ||
1722 | { | ||
1723 | int dumpable = 0; | ||
1724 | struct mm_struct *mm; | ||
1725 | |||
1726 | task_lock(task); | ||
1727 | mm = task->mm; | ||
1728 | if (mm) | ||
1729 | dumpable = get_dumpable(mm); | ||
1730 | task_unlock(task); | ||
1731 | if(dumpable == 1) | ||
1732 | return 1; | ||
1733 | return 0; | ||
1734 | } | ||
1735 | |||
1498 | struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) | 1736 | struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) |
1499 | { | 1737 | { |
1500 | struct inode * inode; | 1738 | struct inode * inode; |
@@ -1542,23 +1780,14 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
1542 | struct inode *inode = dentry->d_inode; | 1780 | struct inode *inode = dentry->d_inode; |
1543 | struct task_struct *task; | 1781 | struct task_struct *task; |
1544 | const struct cred *cred; | 1782 | const struct cred *cred; |
1545 | struct pid_namespace *pid = dentry->d_sb->s_fs_info; | ||
1546 | 1783 | ||
1547 | generic_fillattr(inode, stat); | 1784 | generic_fillattr(inode, stat); |
1548 | 1785 | ||
1549 | rcu_read_lock(); | 1786 | rcu_read_lock(); |
1550 | stat->uid = GLOBAL_ROOT_UID; | 1787 | stat->uid = 0; |
1551 | stat->gid = GLOBAL_ROOT_GID; | 1788 | stat->gid = 0; |
1552 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | 1789 | task = pid_task(proc_pid(inode), PIDTYPE_PID); |
1553 | if (task) { | 1790 | if (task) { |
1554 | if (!has_pid_permissions(pid, task, 2)) { | ||
1555 | rcu_read_unlock(); | ||
1556 | /* | ||
1557 | * This doesn't prevent learning whether PID exists, | ||
1558 | * it only makes getattr() consistent with readdir(). | ||
1559 | */ | ||
1560 | return -ENOENT; | ||
1561 | } | ||
1562 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || | 1791 | if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || |
1563 | task_dumpable(task)) { | 1792 | task_dumpable(task)) { |
1564 | cred = __task_cred(task); | 1793 | cred = __task_cred(task); |
@@ -1587,13 +1816,13 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
1587 | * made this apply to all per process world readable and executable | 1816 | * made this apply to all per process world readable and executable |
1588 | * directories. | 1817 | * directories. |
1589 | */ | 1818 | */ |
1590 | int pid_revalidate(struct dentry *dentry, unsigned int flags) | 1819 | int pid_revalidate(struct dentry *dentry, struct nameidata *nd) |
1591 | { | 1820 | { |
1592 | struct inode *inode; | 1821 | struct inode *inode; |
1593 | struct task_struct *task; | 1822 | struct task_struct *task; |
1594 | const struct cred *cred; | 1823 | const struct cred *cred; |
1595 | 1824 | ||
1596 | if (flags & LOOKUP_RCU) | 1825 | if (nd && nd->flags & LOOKUP_RCU) |
1597 | return -ECHILD; | 1826 | return -ECHILD; |
1598 | 1827 | ||
1599 | inode = dentry->d_inode; | 1828 | inode = dentry->d_inode; |
@@ -1608,8 +1837,8 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) | |||
1608 | inode->i_gid = cred->egid; | 1837 | inode->i_gid = cred->egid; |
1609 | rcu_read_unlock(); | 1838 | rcu_read_unlock(); |
1610 | } else { | 1839 | } else { |
1611 | inode->i_uid = GLOBAL_ROOT_UID; | 1840 | inode->i_uid = 0; |
1612 | inode->i_gid = GLOBAL_ROOT_GID; | 1841 | inode->i_gid = 0; |
1613 | } | 1842 | } |
1614 | inode->i_mode &= ~(S_ISUID | S_ISGID); | 1843 | inode->i_mode &= ~(S_ISUID | S_ISGID); |
1615 | security_task_to_inode(task, inode); | 1844 | security_task_to_inode(task, inode); |
@@ -1620,6 +1849,15 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) | |||
1620 | return 0; | 1849 | return 0; |
1621 | } | 1850 | } |
1622 | 1851 | ||
1852 | static int pid_delete_dentry(const struct dentry * dentry) | ||
1853 | { | ||
1854 | /* Is the task we represent dead? | ||
1855 | * If so, then don't put the dentry on the lru list, | ||
1856 | * kill it immediately. | ||
1857 | */ | ||
1858 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; | ||
1859 | } | ||
1860 | |||
1623 | const struct dentry_operations pid_dentry_operations = | 1861 | const struct dentry_operations pid_dentry_operations = |
1624 | { | 1862 | { |
1625 | .d_revalidate = pid_revalidate, | 1863 | .d_revalidate = pid_revalidate, |
@@ -1682,337 +1920,375 @@ end_instantiate: | |||
1682 | return filldir(dirent, name, len, filp->f_pos, ino, type); | 1920 | return filldir(dirent, name, len, filp->f_pos, ino, type); |
1683 | } | 1921 | } |
1684 | 1922 | ||
1685 | #ifdef CONFIG_CHECKPOINT_RESTORE | 1923 | static unsigned name_to_int(struct dentry *dentry) |
1924 | { | ||
1925 | const char *name = dentry->d_name.name; | ||
1926 | int len = dentry->d_name.len; | ||
1927 | unsigned n = 0; | ||
1686 | 1928 | ||
1687 | /* | 1929 | if (len > 1 && *name == '0') |
1688 | * dname_to_vma_addr - maps a dentry name into two unsigned longs | 1930 | goto out; |
1689 | * which represent vma start and end addresses. | 1931 | while (len-- > 0) { |
1690 | */ | 1932 | unsigned c = *name++ - '0'; |
1691 | static int dname_to_vma_addr(struct dentry *dentry, | 1933 | if (c > 9) |
1692 | unsigned long *start, unsigned long *end) | 1934 | goto out; |
1935 | if (n >= (~0U-9)/10) | ||
1936 | goto out; | ||
1937 | n *= 10; | ||
1938 | n += c; | ||
1939 | } | ||
1940 | return n; | ||
1941 | out: | ||
1942 | return ~0U; | ||
1943 | } | ||
1944 | |||
1945 | #define PROC_FDINFO_MAX 64 | ||
1946 | |||
1947 | static int proc_fd_info(struct inode *inode, struct path *path, char *info) | ||
1693 | { | 1948 | { |
1694 | if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2) | 1949 | struct task_struct *task = get_proc_task(inode); |
1695 | return -EINVAL; | 1950 | struct files_struct *files = NULL; |
1951 | struct file *file; | ||
1952 | int fd = proc_fd(inode); | ||
1696 | 1953 | ||
1697 | return 0; | 1954 | if (task) { |
1955 | files = get_files_struct(task); | ||
1956 | put_task_struct(task); | ||
1957 | } | ||
1958 | if (files) { | ||
1959 | /* | ||
1960 | * We are not taking a ref to the file structure, so we must | ||
1961 | * hold ->file_lock. | ||
1962 | */ | ||
1963 | spin_lock(&files->file_lock); | ||
1964 | file = fcheck_files(files, fd); | ||
1965 | if (file) { | ||
1966 | unsigned int f_flags; | ||
1967 | struct fdtable *fdt; | ||
1968 | |||
1969 | fdt = files_fdtable(files); | ||
1970 | f_flags = file->f_flags & ~O_CLOEXEC; | ||
1971 | if (FD_ISSET(fd, fdt->close_on_exec)) | ||
1972 | f_flags |= O_CLOEXEC; | ||
1973 | |||
1974 | if (path) { | ||
1975 | *path = file->f_path; | ||
1976 | path_get(&file->f_path); | ||
1977 | } | ||
1978 | if (info) | ||
1979 | snprintf(info, PROC_FDINFO_MAX, | ||
1980 | "pos:\t%lli\n" | ||
1981 | "flags:\t0%o\n", | ||
1982 | (long long) file->f_pos, | ||
1983 | f_flags); | ||
1984 | spin_unlock(&files->file_lock); | ||
1985 | put_files_struct(files); | ||
1986 | return 0; | ||
1987 | } | ||
1988 | spin_unlock(&files->file_lock); | ||
1989 | put_files_struct(files); | ||
1990 | } | ||
1991 | return -ENOENT; | ||
1698 | } | 1992 | } |
1699 | 1993 | ||
1700 | static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) | 1994 | static int proc_fd_link(struct inode *inode, struct path *path) |
1701 | { | 1995 | { |
1702 | unsigned long vm_start, vm_end; | 1996 | return proc_fd_info(inode, path, NULL); |
1703 | bool exact_vma_exists = false; | 1997 | } |
1704 | struct mm_struct *mm = NULL; | 1998 | |
1999 | static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | ||
2000 | { | ||
2001 | struct inode *inode; | ||
1705 | struct task_struct *task; | 2002 | struct task_struct *task; |
2003 | int fd; | ||
2004 | struct files_struct *files; | ||
1706 | const struct cred *cred; | 2005 | const struct cred *cred; |
1707 | struct inode *inode; | ||
1708 | int status = 0; | ||
1709 | 2006 | ||
1710 | if (flags & LOOKUP_RCU) | 2007 | if (nd && nd->flags & LOOKUP_RCU) |
1711 | return -ECHILD; | 2008 | return -ECHILD; |
1712 | 2009 | ||
1713 | if (!capable(CAP_SYS_ADMIN)) { | ||
1714 | status = -EACCES; | ||
1715 | goto out_notask; | ||
1716 | } | ||
1717 | |||
1718 | inode = dentry->d_inode; | 2010 | inode = dentry->d_inode; |
1719 | task = get_proc_task(inode); | 2011 | task = get_proc_task(inode); |
1720 | if (!task) | 2012 | fd = proc_fd(inode); |
1721 | goto out_notask; | ||
1722 | |||
1723 | mm = mm_access(task, PTRACE_MODE_READ); | ||
1724 | if (IS_ERR_OR_NULL(mm)) | ||
1725 | goto out; | ||
1726 | |||
1727 | if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { | ||
1728 | down_read(&mm->mmap_sem); | ||
1729 | exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); | ||
1730 | up_read(&mm->mmap_sem); | ||
1731 | } | ||
1732 | |||
1733 | mmput(mm); | ||
1734 | 2013 | ||
1735 | if (exact_vma_exists) { | 2014 | if (task) { |
1736 | if (task_dumpable(task)) { | 2015 | files = get_files_struct(task); |
2016 | if (files) { | ||
1737 | rcu_read_lock(); | 2017 | rcu_read_lock(); |
1738 | cred = __task_cred(task); | 2018 | if (fcheck_files(files, fd)) { |
1739 | inode->i_uid = cred->euid; | 2019 | rcu_read_unlock(); |
1740 | inode->i_gid = cred->egid; | 2020 | put_files_struct(files); |
2021 | if (task_dumpable(task)) { | ||
2022 | rcu_read_lock(); | ||
2023 | cred = __task_cred(task); | ||
2024 | inode->i_uid = cred->euid; | ||
2025 | inode->i_gid = cred->egid; | ||
2026 | rcu_read_unlock(); | ||
2027 | } else { | ||
2028 | inode->i_uid = 0; | ||
2029 | inode->i_gid = 0; | ||
2030 | } | ||
2031 | inode->i_mode &= ~(S_ISUID | S_ISGID); | ||
2032 | security_task_to_inode(task, inode); | ||
2033 | put_task_struct(task); | ||
2034 | return 1; | ||
2035 | } | ||
1741 | rcu_read_unlock(); | 2036 | rcu_read_unlock(); |
1742 | } else { | 2037 | put_files_struct(files); |
1743 | inode->i_uid = GLOBAL_ROOT_UID; | ||
1744 | inode->i_gid = GLOBAL_ROOT_GID; | ||
1745 | } | 2038 | } |
1746 | security_task_to_inode(task, inode); | 2039 | put_task_struct(task); |
1747 | status = 1; | ||
1748 | } | 2040 | } |
1749 | 2041 | d_drop(dentry); | |
1750 | out: | 2042 | return 0; |
1751 | put_task_struct(task); | ||
1752 | |||
1753 | out_notask: | ||
1754 | if (status <= 0) | ||
1755 | d_drop(dentry); | ||
1756 | |||
1757 | return status; | ||
1758 | } | 2043 | } |
1759 | 2044 | ||
1760 | static const struct dentry_operations tid_map_files_dentry_operations = { | 2045 | static const struct dentry_operations tid_fd_dentry_operations = |
1761 | .d_revalidate = map_files_d_revalidate, | ||
1762 | .d_delete = pid_delete_dentry, | ||
1763 | }; | ||
1764 | |||
1765 | static int proc_map_files_get_link(struct dentry *dentry, struct path *path) | ||
1766 | { | 2046 | { |
1767 | unsigned long vm_start, vm_end; | 2047 | .d_revalidate = tid_fd_revalidate, |
1768 | struct vm_area_struct *vma; | 2048 | .d_delete = pid_delete_dentry, |
1769 | struct task_struct *task; | ||
1770 | struct mm_struct *mm; | ||
1771 | int rc; | ||
1772 | |||
1773 | rc = -ENOENT; | ||
1774 | task = get_proc_task(dentry->d_inode); | ||
1775 | if (!task) | ||
1776 | goto out; | ||
1777 | |||
1778 | mm = get_task_mm(task); | ||
1779 | put_task_struct(task); | ||
1780 | if (!mm) | ||
1781 | goto out; | ||
1782 | |||
1783 | rc = dname_to_vma_addr(dentry, &vm_start, &vm_end); | ||
1784 | if (rc) | ||
1785 | goto out_mmput; | ||
1786 | |||
1787 | down_read(&mm->mmap_sem); | ||
1788 | vma = find_exact_vma(mm, vm_start, vm_end); | ||
1789 | if (vma && vma->vm_file) { | ||
1790 | *path = vma->vm_file->f_path; | ||
1791 | path_get(path); | ||
1792 | rc = 0; | ||
1793 | } | ||
1794 | up_read(&mm->mmap_sem); | ||
1795 | |||
1796 | out_mmput: | ||
1797 | mmput(mm); | ||
1798 | out: | ||
1799 | return rc; | ||
1800 | } | ||
1801 | |||
1802 | struct map_files_info { | ||
1803 | fmode_t mode; | ||
1804 | unsigned long len; | ||
1805 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ | ||
1806 | }; | 2049 | }; |
1807 | 2050 | ||
1808 | static struct dentry * | 2051 | static struct dentry *proc_fd_instantiate(struct inode *dir, |
1809 | proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | 2052 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
1810 | struct task_struct *task, const void *ptr) | ||
1811 | { | 2053 | { |
1812 | fmode_t mode = (fmode_t)(unsigned long)ptr; | 2054 | unsigned fd = *(const unsigned *)ptr; |
1813 | struct proc_inode *ei; | 2055 | struct file *file; |
1814 | struct inode *inode; | 2056 | struct files_struct *files; |
2057 | struct inode *inode; | ||
2058 | struct proc_inode *ei; | ||
2059 | struct dentry *error = ERR_PTR(-ENOENT); | ||
1815 | 2060 | ||
1816 | inode = proc_pid_make_inode(dir->i_sb, task); | 2061 | inode = proc_pid_make_inode(dir->i_sb, task); |
1817 | if (!inode) | 2062 | if (!inode) |
1818 | return ERR_PTR(-ENOENT); | 2063 | goto out; |
1819 | |||
1820 | ei = PROC_I(inode); | 2064 | ei = PROC_I(inode); |
1821 | ei->op.proc_get_link = proc_map_files_get_link; | 2065 | ei->fd = fd; |
1822 | 2066 | files = get_files_struct(task); | |
1823 | inode->i_op = &proc_pid_link_inode_operations; | 2067 | if (!files) |
1824 | inode->i_size = 64; | 2068 | goto out_iput; |
1825 | inode->i_mode = S_IFLNK; | 2069 | inode->i_mode = S_IFLNK; |
1826 | 2070 | ||
1827 | if (mode & FMODE_READ) | 2071 | /* |
1828 | inode->i_mode |= S_IRUSR; | 2072 | * We are not taking a ref to the file structure, so we must |
1829 | if (mode & FMODE_WRITE) | 2073 | * hold ->file_lock. |
1830 | inode->i_mode |= S_IWUSR; | 2074 | */ |
2075 | spin_lock(&files->file_lock); | ||
2076 | file = fcheck_files(files, fd); | ||
2077 | if (!file) | ||
2078 | goto out_unlock; | ||
2079 | if (file->f_mode & FMODE_READ) | ||
2080 | inode->i_mode |= S_IRUSR | S_IXUSR; | ||
2081 | if (file->f_mode & FMODE_WRITE) | ||
2082 | inode->i_mode |= S_IWUSR | S_IXUSR; | ||
2083 | spin_unlock(&files->file_lock); | ||
2084 | put_files_struct(files); | ||
1831 | 2085 | ||
1832 | d_set_d_op(dentry, &tid_map_files_dentry_operations); | 2086 | inode->i_op = &proc_pid_link_inode_operations; |
2087 | inode->i_size = 64; | ||
2088 | ei->op.proc_get_link = proc_fd_link; | ||
2089 | d_set_d_op(dentry, &tid_fd_dentry_operations); | ||
1833 | d_add(dentry, inode); | 2090 | d_add(dentry, inode); |
2091 | /* Close the race of the process dying before we return the dentry */ | ||
2092 | if (tid_fd_revalidate(dentry, NULL)) | ||
2093 | error = NULL; | ||
1834 | 2094 | ||
1835 | return NULL; | 2095 | out: |
2096 | return error; | ||
2097 | out_unlock: | ||
2098 | spin_unlock(&files->file_lock); | ||
2099 | put_files_struct(files); | ||
2100 | out_iput: | ||
2101 | iput(inode); | ||
2102 | goto out; | ||
1836 | } | 2103 | } |
1837 | 2104 | ||
1838 | static struct dentry *proc_map_files_lookup(struct inode *dir, | 2105 | static struct dentry *proc_lookupfd_common(struct inode *dir, |
1839 | struct dentry *dentry, unsigned int flags) | 2106 | struct dentry *dentry, |
2107 | instantiate_t instantiate) | ||
1840 | { | 2108 | { |
1841 | unsigned long vm_start, vm_end; | 2109 | struct task_struct *task = get_proc_task(dir); |
1842 | struct vm_area_struct *vma; | 2110 | unsigned fd = name_to_int(dentry); |
1843 | struct task_struct *task; | 2111 | struct dentry *result = ERR_PTR(-ENOENT); |
1844 | struct dentry *result; | ||
1845 | struct mm_struct *mm; | ||
1846 | |||
1847 | result = ERR_PTR(-EACCES); | ||
1848 | if (!capable(CAP_SYS_ADMIN)) | ||
1849 | goto out; | ||
1850 | 2112 | ||
1851 | result = ERR_PTR(-ENOENT); | ||
1852 | task = get_proc_task(dir); | ||
1853 | if (!task) | 2113 | if (!task) |
2114 | goto out_no_task; | ||
2115 | if (fd == ~0U) | ||
1854 | goto out; | 2116 | goto out; |
1855 | 2117 | ||
1856 | result = ERR_PTR(-EACCES); | 2118 | result = instantiate(dir, dentry, task, &fd); |
1857 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 2119 | out: |
1858 | goto out_put_task; | 2120 | put_task_struct(task); |
2121 | out_no_task: | ||
2122 | return result; | ||
2123 | } | ||
1859 | 2124 | ||
1860 | result = ERR_PTR(-ENOENT); | 2125 | static int proc_readfd_common(struct file * filp, void * dirent, |
1861 | if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) | 2126 | filldir_t filldir, instantiate_t instantiate) |
1862 | goto out_put_task; | 2127 | { |
2128 | struct dentry *dentry = filp->f_path.dentry; | ||
2129 | struct inode *inode = dentry->d_inode; | ||
2130 | struct task_struct *p = get_proc_task(inode); | ||
2131 | unsigned int fd, ino; | ||
2132 | int retval; | ||
2133 | struct files_struct * files; | ||
1863 | 2134 | ||
1864 | mm = get_task_mm(task); | 2135 | retval = -ENOENT; |
1865 | if (!mm) | 2136 | if (!p) |
1866 | goto out_put_task; | 2137 | goto out_no_task; |
2138 | retval = 0; | ||
1867 | 2139 | ||
1868 | down_read(&mm->mmap_sem); | 2140 | fd = filp->f_pos; |
1869 | vma = find_exact_vma(mm, vm_start, vm_end); | 2141 | switch (fd) { |
1870 | if (!vma) | 2142 | case 0: |
1871 | goto out_no_vma; | 2143 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) |
2144 | goto out; | ||
2145 | filp->f_pos++; | ||
2146 | case 1: | ||
2147 | ino = parent_ino(dentry); | ||
2148 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
2149 | goto out; | ||
2150 | filp->f_pos++; | ||
2151 | default: | ||
2152 | files = get_files_struct(p); | ||
2153 | if (!files) | ||
2154 | goto out; | ||
2155 | rcu_read_lock(); | ||
2156 | for (fd = filp->f_pos-2; | ||
2157 | fd < files_fdtable(files)->max_fds; | ||
2158 | fd++, filp->f_pos++) { | ||
2159 | char name[PROC_NUMBUF]; | ||
2160 | int len; | ||
1872 | 2161 | ||
1873 | if (vma->vm_file) | 2162 | if (!fcheck_files(files, fd)) |
1874 | result = proc_map_files_instantiate(dir, dentry, task, | 2163 | continue; |
1875 | (void *)(unsigned long)vma->vm_file->f_mode); | 2164 | rcu_read_unlock(); |
1876 | 2165 | ||
1877 | out_no_vma: | 2166 | len = snprintf(name, sizeof(name), "%d", fd); |
1878 | up_read(&mm->mmap_sem); | 2167 | if (proc_fill_cache(filp, dirent, filldir, |
1879 | mmput(mm); | 2168 | name, len, instantiate, |
1880 | out_put_task: | 2169 | p, &fd) < 0) { |
1881 | put_task_struct(task); | 2170 | rcu_read_lock(); |
2171 | break; | ||
2172 | } | ||
2173 | rcu_read_lock(); | ||
2174 | } | ||
2175 | rcu_read_unlock(); | ||
2176 | put_files_struct(files); | ||
2177 | } | ||
1882 | out: | 2178 | out: |
1883 | return result; | 2179 | put_task_struct(p); |
2180 | out_no_task: | ||
2181 | return retval; | ||
1884 | } | 2182 | } |
1885 | 2183 | ||
1886 | static const struct inode_operations proc_map_files_inode_operations = { | 2184 | static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, |
1887 | .lookup = proc_map_files_lookup, | 2185 | struct nameidata *nd) |
1888 | .permission = proc_fd_permission, | ||
1889 | .setattr = proc_setattr, | ||
1890 | }; | ||
1891 | |||
1892 | static int | ||
1893 | proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
1894 | { | 2186 | { |
1895 | struct dentry *dentry = filp->f_path.dentry; | 2187 | return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); |
1896 | struct inode *inode = dentry->d_inode; | 2188 | } |
1897 | struct vm_area_struct *vma; | ||
1898 | struct task_struct *task; | ||
1899 | struct mm_struct *mm; | ||
1900 | ino_t ino; | ||
1901 | int ret; | ||
1902 | 2189 | ||
1903 | ret = -EACCES; | 2190 | static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) |
1904 | if (!capable(CAP_SYS_ADMIN)) | 2191 | { |
1905 | goto out; | 2192 | return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); |
2193 | } | ||
1906 | 2194 | ||
1907 | ret = -ENOENT; | 2195 | static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, |
1908 | task = get_proc_task(inode); | 2196 | size_t len, loff_t *ppos) |
1909 | if (!task) | 2197 | { |
1910 | goto out; | 2198 | char tmp[PROC_FDINFO_MAX]; |
2199 | int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); | ||
2200 | if (!err) | ||
2201 | err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); | ||
2202 | return err; | ||
2203 | } | ||
1911 | 2204 | ||
1912 | ret = -EACCES; | 2205 | static const struct file_operations proc_fdinfo_file_operations = { |
1913 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 2206 | .open = nonseekable_open, |
1914 | goto out_put_task; | 2207 | .read = proc_fdinfo_read, |
2208 | .llseek = no_llseek, | ||
2209 | }; | ||
1915 | 2210 | ||
1916 | ret = 0; | 2211 | static const struct file_operations proc_fd_operations = { |
1917 | switch (filp->f_pos) { | 2212 | .read = generic_read_dir, |
1918 | case 0: | 2213 | .readdir = proc_readfd, |
1919 | ino = inode->i_ino; | 2214 | .llseek = default_llseek, |
1920 | if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0) | 2215 | }; |
1921 | goto out_put_task; | ||
1922 | filp->f_pos++; | ||
1923 | case 1: | ||
1924 | ino = parent_ino(dentry); | ||
1925 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
1926 | goto out_put_task; | ||
1927 | filp->f_pos++; | ||
1928 | default: | ||
1929 | { | ||
1930 | unsigned long nr_files, pos, i; | ||
1931 | struct flex_array *fa = NULL; | ||
1932 | struct map_files_info info; | ||
1933 | struct map_files_info *p; | ||
1934 | 2216 | ||
1935 | mm = get_task_mm(task); | 2217 | /* |
1936 | if (!mm) | 2218 | * /proc/pid/fd needs a special permission handler so that a process can still |
1937 | goto out_put_task; | 2219 | * access /proc/self/fd after it has executed a setuid(). |
1938 | down_read(&mm->mmap_sem); | 2220 | */ |
2221 | static int proc_fd_permission(struct inode *inode, int mask) | ||
2222 | { | ||
2223 | int rv = generic_permission(inode, mask); | ||
2224 | if (rv == 0) | ||
2225 | return 0; | ||
2226 | if (task_pid(current) == proc_pid(inode)) | ||
2227 | rv = 0; | ||
2228 | return rv; | ||
2229 | } | ||
1939 | 2230 | ||
1940 | nr_files = 0; | 2231 | /* |
2232 | * proc directories can do almost nothing.. | ||
2233 | */ | ||
2234 | static const struct inode_operations proc_fd_inode_operations = { | ||
2235 | .lookup = proc_lookupfd, | ||
2236 | .permission = proc_fd_permission, | ||
2237 | .setattr = proc_setattr, | ||
2238 | }; | ||
1941 | 2239 | ||
1942 | /* | 2240 | static struct dentry *proc_fdinfo_instantiate(struct inode *dir, |
1943 | * We need two passes here: | 2241 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
1944 | * | 2242 | { |
1945 | * 1) Collect vmas of mapped files with mmap_sem taken | 2243 | unsigned fd = *(unsigned *)ptr; |
1946 | * 2) Release mmap_sem and instantiate entries | 2244 | struct inode *inode; |
1947 | * | 2245 | struct proc_inode *ei; |
1948 | * otherwise we get lockdep complained, since filldir() | 2246 | struct dentry *error = ERR_PTR(-ENOENT); |
1949 | * routine might require mmap_sem taken in might_fault(). | ||
1950 | */ | ||
1951 | 2247 | ||
1952 | for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { | 2248 | inode = proc_pid_make_inode(dir->i_sb, task); |
1953 | if (vma->vm_file && ++pos > filp->f_pos) | 2249 | if (!inode) |
1954 | nr_files++; | 2250 | goto out; |
1955 | } | 2251 | ei = PROC_I(inode); |
2252 | ei->fd = fd; | ||
2253 | inode->i_mode = S_IFREG | S_IRUSR; | ||
2254 | inode->i_fop = &proc_fdinfo_file_operations; | ||
2255 | d_set_d_op(dentry, &tid_fd_dentry_operations); | ||
2256 | d_add(dentry, inode); | ||
2257 | /* Close the race of the process dying before we return the dentry */ | ||
2258 | if (tid_fd_revalidate(dentry, NULL)) | ||
2259 | error = NULL; | ||
1956 | 2260 | ||
1957 | if (nr_files) { | 2261 | out: |
1958 | fa = flex_array_alloc(sizeof(info), nr_files, | 2262 | return error; |
1959 | GFP_KERNEL); | 2263 | } |
1960 | if (!fa || flex_array_prealloc(fa, 0, nr_files, | ||
1961 | GFP_KERNEL)) { | ||
1962 | ret = -ENOMEM; | ||
1963 | if (fa) | ||
1964 | flex_array_free(fa); | ||
1965 | up_read(&mm->mmap_sem); | ||
1966 | mmput(mm); | ||
1967 | goto out_put_task; | ||
1968 | } | ||
1969 | for (i = 0, vma = mm->mmap, pos = 2; vma; | ||
1970 | vma = vma->vm_next) { | ||
1971 | if (!vma->vm_file) | ||
1972 | continue; | ||
1973 | if (++pos <= filp->f_pos) | ||
1974 | continue; | ||
1975 | 2264 | ||
1976 | info.mode = vma->vm_file->f_mode; | 2265 | static struct dentry *proc_lookupfdinfo(struct inode *dir, |
1977 | info.len = snprintf(info.name, | 2266 | struct dentry *dentry, |
1978 | sizeof(info.name), "%lx-%lx", | 2267 | struct nameidata *nd) |
1979 | vma->vm_start, vma->vm_end); | 2268 | { |
1980 | if (flex_array_put(fa, i++, &info, GFP_KERNEL)) | 2269 | return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); |
1981 | BUG(); | 2270 | } |
1982 | } | ||
1983 | } | ||
1984 | up_read(&mm->mmap_sem); | ||
1985 | |||
1986 | for (i = 0; i < nr_files; i++) { | ||
1987 | p = flex_array_get(fa, i); | ||
1988 | ret = proc_fill_cache(filp, dirent, filldir, | ||
1989 | p->name, p->len, | ||
1990 | proc_map_files_instantiate, | ||
1991 | task, | ||
1992 | (void *)(unsigned long)p->mode); | ||
1993 | if (ret) | ||
1994 | break; | ||
1995 | filp->f_pos++; | ||
1996 | } | ||
1997 | if (fa) | ||
1998 | flex_array_free(fa); | ||
1999 | mmput(mm); | ||
2000 | } | ||
2001 | } | ||
2002 | 2271 | ||
2003 | out_put_task: | 2272 | static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) |
2004 | put_task_struct(task); | 2273 | { |
2005 | out: | 2274 | return proc_readfd_common(filp, dirent, filldir, |
2006 | return ret; | 2275 | proc_fdinfo_instantiate); |
2007 | } | 2276 | } |
2008 | 2277 | ||
2009 | static const struct file_operations proc_map_files_operations = { | 2278 | static const struct file_operations proc_fdinfo_operations = { |
2010 | .read = generic_read_dir, | 2279 | .read = generic_read_dir, |
2011 | .readdir = proc_map_files_readdir, | 2280 | .readdir = proc_readfdinfo, |
2012 | .llseek = default_llseek, | 2281 | .llseek = default_llseek, |
2013 | }; | 2282 | }; |
2014 | 2283 | ||
2015 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | 2284 | /* |
2285 | * proc directories can do almost nothing.. | ||
2286 | */ | ||
2287 | static const struct inode_operations proc_fdinfo_inode_operations = { | ||
2288 | .lookup = proc_lookupfdinfo, | ||
2289 | .setattr = proc_setattr, | ||
2290 | }; | ||
2291 | |||
2016 | 2292 | ||
2017 | static struct dentry *proc_pident_instantiate(struct inode *dir, | 2293 | static struct dentry *proc_pident_instantiate(struct inode *dir, |
2018 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 2294 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
@@ -2029,7 +2305,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, | |||
2029 | ei = PROC_I(inode); | 2305 | ei = PROC_I(inode); |
2030 | inode->i_mode = p->mode; | 2306 | inode->i_mode = p->mode; |
2031 | if (S_ISDIR(inode->i_mode)) | 2307 | if (S_ISDIR(inode->i_mode)) |
2032 | set_nlink(inode, 2); /* Use getattr to fix if necessary */ | 2308 | inode->i_nlink = 2; /* Use getattr to fix if necessary */ |
2033 | if (p->iop) | 2309 | if (p->iop) |
2034 | inode->i_op = p->iop; | 2310 | inode->i_op = p->iop; |
2035 | if (p->fop) | 2311 | if (p->fop) |
@@ -2038,7 +2314,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, | |||
2038 | d_set_d_op(dentry, &pid_dentry_operations); | 2314 | d_set_d_op(dentry, &pid_dentry_operations); |
2039 | d_add(dentry, inode); | 2315 | d_add(dentry, inode); |
2040 | /* Close the race of the process dying before we return the dentry */ | 2316 | /* Close the race of the process dying before we return the dentry */ |
2041 | if (pid_revalidate(dentry, 0)) | 2317 | if (pid_revalidate(dentry, NULL)) |
2042 | error = NULL; | 2318 | error = NULL; |
2043 | out: | 2319 | out: |
2044 | return error; | 2320 | return error; |
@@ -2238,7 +2514,7 @@ static const struct file_operations proc_attr_dir_operations = { | |||
2238 | }; | 2514 | }; |
2239 | 2515 | ||
2240 | static struct dentry *proc_attr_dir_lookup(struct inode *dir, | 2516 | static struct dentry *proc_attr_dir_lookup(struct inode *dir, |
2241 | struct dentry *dentry, unsigned int flags) | 2517 | struct dentry *dentry, struct nameidata *nd) |
2242 | { | 2518 | { |
2243 | return proc_pident_lookup(dir, dentry, | 2519 | return proc_pident_lookup(dir, dentry, |
2244 | attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); | 2520 | attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); |
@@ -2338,6 +2614,145 @@ static const struct file_operations proc_coredump_filter_operations = { | |||
2338 | }; | 2614 | }; |
2339 | #endif | 2615 | #endif |
2340 | 2616 | ||
2617 | /* | ||
2618 | * /proc/self: | ||
2619 | */ | ||
2620 | static int proc_self_readlink(struct dentry *dentry, char __user *buffer, | ||
2621 | int buflen) | ||
2622 | { | ||
2623 | struct pid_namespace *ns = dentry->d_sb->s_fs_info; | ||
2624 | pid_t tgid = task_tgid_nr_ns(current, ns); | ||
2625 | char tmp[PROC_NUMBUF]; | ||
2626 | if (!tgid) | ||
2627 | return -ENOENT; | ||
2628 | sprintf(tmp, "%d", tgid); | ||
2629 | return vfs_readlink(dentry,buffer,buflen,tmp); | ||
2630 | } | ||
2631 | |||
2632 | static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
2633 | { | ||
2634 | struct pid_namespace *ns = dentry->d_sb->s_fs_info; | ||
2635 | pid_t tgid = task_tgid_nr_ns(current, ns); | ||
2636 | char *name = ERR_PTR(-ENOENT); | ||
2637 | if (tgid) { | ||
2638 | name = __getname(); | ||
2639 | if (!name) | ||
2640 | name = ERR_PTR(-ENOMEM); | ||
2641 | else | ||
2642 | sprintf(name, "%d", tgid); | ||
2643 | } | ||
2644 | nd_set_link(nd, name); | ||
2645 | return NULL; | ||
2646 | } | ||
2647 | |||
2648 | static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, | ||
2649 | void *cookie) | ||
2650 | { | ||
2651 | char *s = nd_get_link(nd); | ||
2652 | if (!IS_ERR(s)) | ||
2653 | __putname(s); | ||
2654 | } | ||
2655 | |||
2656 | static const struct inode_operations proc_self_inode_operations = { | ||
2657 | .readlink = proc_self_readlink, | ||
2658 | .follow_link = proc_self_follow_link, | ||
2659 | .put_link = proc_self_put_link, | ||
2660 | }; | ||
2661 | |||
2662 | /* | ||
2663 | * proc base | ||
2664 | * | ||
2665 | * These are the directory entries in the root directory of /proc | ||
2666 | * that properly belong to the /proc filesystem, as they describe | ||
2667 | * describe something that is process related. | ||
2668 | */ | ||
2669 | static const struct pid_entry proc_base_stuff[] = { | ||
2670 | NOD("self", S_IFLNK|S_IRWXUGO, | ||
2671 | &proc_self_inode_operations, NULL, {}), | ||
2672 | }; | ||
2673 | |||
2674 | static struct dentry *proc_base_instantiate(struct inode *dir, | ||
2675 | struct dentry *dentry, struct task_struct *task, const void *ptr) | ||
2676 | { | ||
2677 | const struct pid_entry *p = ptr; | ||
2678 | struct inode *inode; | ||
2679 | struct proc_inode *ei; | ||
2680 | struct dentry *error; | ||
2681 | |||
2682 | /* Allocate the inode */ | ||
2683 | error = ERR_PTR(-ENOMEM); | ||
2684 | inode = new_inode(dir->i_sb); | ||
2685 | if (!inode) | ||
2686 | goto out; | ||
2687 | |||
2688 | /* Initialize the inode */ | ||
2689 | ei = PROC_I(inode); | ||
2690 | inode->i_ino = get_next_ino(); | ||
2691 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
2692 | |||
2693 | /* | ||
2694 | * grab the reference to the task. | ||
2695 | */ | ||
2696 | ei->pid = get_task_pid(task, PIDTYPE_PID); | ||
2697 | if (!ei->pid) | ||
2698 | goto out_iput; | ||
2699 | |||
2700 | inode->i_mode = p->mode; | ||
2701 | if (S_ISDIR(inode->i_mode)) | ||
2702 | inode->i_nlink = 2; | ||
2703 | if (S_ISLNK(inode->i_mode)) | ||
2704 | inode->i_size = 64; | ||
2705 | if (p->iop) | ||
2706 | inode->i_op = p->iop; | ||
2707 | if (p->fop) | ||
2708 | inode->i_fop = p->fop; | ||
2709 | ei->op = p->op; | ||
2710 | d_add(dentry, inode); | ||
2711 | error = NULL; | ||
2712 | out: | ||
2713 | return error; | ||
2714 | out_iput: | ||
2715 | iput(inode); | ||
2716 | goto out; | ||
2717 | } | ||
2718 | |||
2719 | static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) | ||
2720 | { | ||
2721 | struct dentry *error; | ||
2722 | struct task_struct *task = get_proc_task(dir); | ||
2723 | const struct pid_entry *p, *last; | ||
2724 | |||
2725 | error = ERR_PTR(-ENOENT); | ||
2726 | |||
2727 | if (!task) | ||
2728 | goto out_no_task; | ||
2729 | |||
2730 | /* Lookup the directory entry */ | ||
2731 | last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1]; | ||
2732 | for (p = proc_base_stuff; p <= last; p++) { | ||
2733 | if (p->len != dentry->d_name.len) | ||
2734 | continue; | ||
2735 | if (!memcmp(dentry->d_name.name, p->name, p->len)) | ||
2736 | break; | ||
2737 | } | ||
2738 | if (p > last) | ||
2739 | goto out; | ||
2740 | |||
2741 | error = proc_base_instantiate(dir, dentry, task, p); | ||
2742 | |||
2743 | out: | ||
2744 | put_task_struct(task); | ||
2745 | out_no_task: | ||
2746 | return error; | ||
2747 | } | ||
2748 | |||
2749 | static int proc_base_fill_cache(struct file *filp, void *dirent, | ||
2750 | filldir_t filldir, struct task_struct *task, const struct pid_entry *p) | ||
2751 | { | ||
2752 | return proc_fill_cache(filp, dirent, filldir, p->name, p->len, | ||
2753 | proc_base_instantiate, task, p); | ||
2754 | } | ||
2755 | |||
2341 | #ifdef CONFIG_TASK_IO_ACCOUNTING | 2756 | #ifdef CONFIG_TASK_IO_ACCOUNTING |
2342 | static int do_io_accounting(struct task_struct *task, char *buffer, int whole) | 2757 | static int do_io_accounting(struct task_struct *task, char *buffer, int whole) |
2343 | { | 2758 | { |
@@ -2394,87 +2809,6 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) | |||
2394 | } | 2809 | } |
2395 | #endif /* CONFIG_TASK_IO_ACCOUNTING */ | 2810 | #endif /* CONFIG_TASK_IO_ACCOUNTING */ |
2396 | 2811 | ||
2397 | #ifdef CONFIG_USER_NS | ||
2398 | static int proc_id_map_open(struct inode *inode, struct file *file, | ||
2399 | struct seq_operations *seq_ops) | ||
2400 | { | ||
2401 | struct user_namespace *ns = NULL; | ||
2402 | struct task_struct *task; | ||
2403 | struct seq_file *seq; | ||
2404 | int ret = -EINVAL; | ||
2405 | |||
2406 | task = get_proc_task(inode); | ||
2407 | if (task) { | ||
2408 | rcu_read_lock(); | ||
2409 | ns = get_user_ns(task_cred_xxx(task, user_ns)); | ||
2410 | rcu_read_unlock(); | ||
2411 | put_task_struct(task); | ||
2412 | } | ||
2413 | if (!ns) | ||
2414 | goto err; | ||
2415 | |||
2416 | ret = seq_open(file, seq_ops); | ||
2417 | if (ret) | ||
2418 | goto err_put_ns; | ||
2419 | |||
2420 | seq = file->private_data; | ||
2421 | seq->private = ns; | ||
2422 | |||
2423 | return 0; | ||
2424 | err_put_ns: | ||
2425 | put_user_ns(ns); | ||
2426 | err: | ||
2427 | return ret; | ||
2428 | } | ||
2429 | |||
2430 | static int proc_id_map_release(struct inode *inode, struct file *file) | ||
2431 | { | ||
2432 | struct seq_file *seq = file->private_data; | ||
2433 | struct user_namespace *ns = seq->private; | ||
2434 | put_user_ns(ns); | ||
2435 | return seq_release(inode, file); | ||
2436 | } | ||
2437 | |||
2438 | static int proc_uid_map_open(struct inode *inode, struct file *file) | ||
2439 | { | ||
2440 | return proc_id_map_open(inode, file, &proc_uid_seq_operations); | ||
2441 | } | ||
2442 | |||
2443 | static int proc_gid_map_open(struct inode *inode, struct file *file) | ||
2444 | { | ||
2445 | return proc_id_map_open(inode, file, &proc_gid_seq_operations); | ||
2446 | } | ||
2447 | |||
2448 | static int proc_projid_map_open(struct inode *inode, struct file *file) | ||
2449 | { | ||
2450 | return proc_id_map_open(inode, file, &proc_projid_seq_operations); | ||
2451 | } | ||
2452 | |||
2453 | static const struct file_operations proc_uid_map_operations = { | ||
2454 | .open = proc_uid_map_open, | ||
2455 | .write = proc_uid_map_write, | ||
2456 | .read = seq_read, | ||
2457 | .llseek = seq_lseek, | ||
2458 | .release = proc_id_map_release, | ||
2459 | }; | ||
2460 | |||
2461 | static const struct file_operations proc_gid_map_operations = { | ||
2462 | .open = proc_gid_map_open, | ||
2463 | .write = proc_gid_map_write, | ||
2464 | .read = seq_read, | ||
2465 | .llseek = seq_lseek, | ||
2466 | .release = proc_id_map_release, | ||
2467 | }; | ||
2468 | |||
2469 | static const struct file_operations proc_projid_map_operations = { | ||
2470 | .open = proc_projid_map_open, | ||
2471 | .write = proc_projid_map_write, | ||
2472 | .read = seq_read, | ||
2473 | .llseek = seq_lseek, | ||
2474 | .release = proc_id_map_release, | ||
2475 | }; | ||
2476 | #endif /* CONFIG_USER_NS */ | ||
2477 | |||
2478 | static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, | 2812 | static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, |
2479 | struct pid *pid, struct task_struct *task) | 2813 | struct pid *pid, struct task_struct *task) |
2480 | { | 2814 | { |
@@ -2495,9 +2829,6 @@ static const struct inode_operations proc_task_inode_operations; | |||
2495 | static const struct pid_entry tgid_base_stuff[] = { | 2829 | static const struct pid_entry tgid_base_stuff[] = { |
2496 | DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), | 2830 | DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), |
2497 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), | 2831 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), |
2498 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
2499 | DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), | ||
2500 | #endif | ||
2501 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), | 2832 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), |
2502 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), | 2833 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), |
2503 | #ifdef CONFIG_NET | 2834 | #ifdef CONFIG_NET |
@@ -2521,9 +2852,9 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2521 | INF("cmdline", S_IRUGO, proc_pid_cmdline), | 2852 | INF("cmdline", S_IRUGO, proc_pid_cmdline), |
2522 | ONE("stat", S_IRUGO, proc_tgid_stat), | 2853 | ONE("stat", S_IRUGO, proc_tgid_stat), |
2523 | ONE("statm", S_IRUGO, proc_pid_statm), | 2854 | ONE("statm", S_IRUGO, proc_pid_statm), |
2524 | REG("maps", S_IRUGO, proc_pid_maps_operations), | 2855 | REG("maps", S_IRUGO, proc_maps_operations), |
2525 | #ifdef CONFIG_NUMA | 2856 | #ifdef CONFIG_NUMA |
2526 | REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), | 2857 | REG("numa_maps", S_IRUGO, proc_numa_maps_operations), |
2527 | #endif | 2858 | #endif |
2528 | REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), | 2859 | REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), |
2529 | LNK("cwd", proc_cwd_link), | 2860 | LNK("cwd", proc_cwd_link), |
@@ -2534,7 +2865,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2534 | REG("mountstats", S_IRUSR, proc_mountstats_operations), | 2865 | REG("mountstats", S_IRUSR, proc_mountstats_operations), |
2535 | #ifdef CONFIG_PROC_PAGE_MONITOR | 2866 | #ifdef CONFIG_PROC_PAGE_MONITOR |
2536 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), | 2867 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), |
2537 | REG("smaps", S_IRUGO, proc_pid_smaps_operations), | 2868 | REG("smaps", S_IRUGO, proc_smaps_operations), |
2538 | REG("pagemap", S_IRUGO, proc_pagemap_operations), | 2869 | REG("pagemap", S_IRUGO, proc_pagemap_operations), |
2539 | #endif | 2870 | #endif |
2540 | #ifdef CONFIG_SECURITY | 2871 | #ifdef CONFIG_SECURITY |
@@ -2559,7 +2890,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2559 | REG("cgroup", S_IRUGO, proc_cgroup_operations), | 2890 | REG("cgroup", S_IRUGO, proc_cgroup_operations), |
2560 | #endif | 2891 | #endif |
2561 | INF("oom_score", S_IRUGO, proc_oom_score), | 2892 | INF("oom_score", S_IRUGO, proc_oom_score), |
2562 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), | 2893 | ANDROID("oom_adj",S_IRUGO|S_IWUSR, oom_adjust), |
2563 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), | 2894 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), |
2564 | #ifdef CONFIG_AUDITSYSCALL | 2895 | #ifdef CONFIG_AUDITSYSCALL |
2565 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), | 2896 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), |
@@ -2577,11 +2908,6 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2577 | #ifdef CONFIG_HARDWALL | 2908 | #ifdef CONFIG_HARDWALL |
2578 | INF("hardwall", S_IRUGO, proc_pid_hardwall), | 2909 | INF("hardwall", S_IRUGO, proc_pid_hardwall), |
2579 | #endif | 2910 | #endif |
2580 | #ifdef CONFIG_USER_NS | ||
2581 | REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), | ||
2582 | REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), | ||
2583 | REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), | ||
2584 | #endif | ||
2585 | }; | 2911 | }; |
2586 | 2912 | ||
2587 | static int proc_tgid_base_readdir(struct file * filp, | 2913 | static int proc_tgid_base_readdir(struct file * filp, |
@@ -2597,8 +2923,7 @@ static const struct file_operations proc_tgid_base_operations = { | |||
2597 | .llseek = default_llseek, | 2923 | .llseek = default_llseek, |
2598 | }; | 2924 | }; |
2599 | 2925 | ||
2600 | static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | 2926 | static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ |
2601 | { | ||
2602 | return proc_pident_lookup(dir, dentry, | 2927 | return proc_pident_lookup(dir, dentry, |
2603 | tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); | 2928 | tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); |
2604 | } | 2929 | } |
@@ -2607,7 +2932,6 @@ static const struct inode_operations proc_tgid_base_inode_operations = { | |||
2607 | .lookup = proc_tgid_base_lookup, | 2932 | .lookup = proc_tgid_base_lookup, |
2608 | .getattr = pid_getattr, | 2933 | .getattr = pid_getattr, |
2609 | .setattr = proc_setattr, | 2934 | .setattr = proc_setattr, |
2610 | .permission = proc_pid_permission, | ||
2611 | }; | 2935 | }; |
2612 | 2936 | ||
2613 | static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) | 2937 | static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) |
@@ -2692,6 +3016,10 @@ void proc_flush_task(struct task_struct *task) | |||
2692 | proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, | 3016 | proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, |
2693 | tgid->numbers[i].nr); | 3017 | tgid->numbers[i].nr); |
2694 | } | 3018 | } |
3019 | |||
3020 | upid = &pid->numbers[pid->level]; | ||
3021 | if (upid->nr == 1) | ||
3022 | pid_ns_release_proc(upid->ns); | ||
2695 | } | 3023 | } |
2696 | 3024 | ||
2697 | static struct dentry *proc_pid_instantiate(struct inode *dir, | 3025 | static struct dentry *proc_pid_instantiate(struct inode *dir, |
@@ -2710,26 +3038,30 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, | |||
2710 | inode->i_fop = &proc_tgid_base_operations; | 3038 | inode->i_fop = &proc_tgid_base_operations; |
2711 | inode->i_flags|=S_IMMUTABLE; | 3039 | inode->i_flags|=S_IMMUTABLE; |
2712 | 3040 | ||
2713 | set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff, | 3041 | inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, |
2714 | ARRAY_SIZE(tgid_base_stuff))); | 3042 | ARRAY_SIZE(tgid_base_stuff)); |
2715 | 3043 | ||
2716 | d_set_d_op(dentry, &pid_dentry_operations); | 3044 | d_set_d_op(dentry, &pid_dentry_operations); |
2717 | 3045 | ||
2718 | d_add(dentry, inode); | 3046 | d_add(dentry, inode); |
2719 | /* Close the race of the process dying before we return the dentry */ | 3047 | /* Close the race of the process dying before we return the dentry */ |
2720 | if (pid_revalidate(dentry, 0)) | 3048 | if (pid_revalidate(dentry, NULL)) |
2721 | error = NULL; | 3049 | error = NULL; |
2722 | out: | 3050 | out: |
2723 | return error; | 3051 | return error; |
2724 | } | 3052 | } |
2725 | 3053 | ||
2726 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) | 3054 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) |
2727 | { | 3055 | { |
2728 | struct dentry *result = NULL; | 3056 | struct dentry *result; |
2729 | struct task_struct *task; | 3057 | struct task_struct *task; |
2730 | unsigned tgid; | 3058 | unsigned tgid; |
2731 | struct pid_namespace *ns; | 3059 | struct pid_namespace *ns; |
2732 | 3060 | ||
3061 | result = proc_base_lookup(dir, dentry); | ||
3062 | if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) | ||
3063 | goto out; | ||
3064 | |||
2733 | tgid = name_to_int(dentry); | 3065 | tgid = name_to_int(dentry); |
2734 | if (tgid == ~0U) | 3066 | if (tgid == ~0U) |
2735 | goto out; | 3067 | goto out; |
@@ -2792,7 +3124,7 @@ retry: | |||
2792 | return iter; | 3124 | return iter; |
2793 | } | 3125 | } |
2794 | 3126 | ||
2795 | #define TGID_OFFSET (FIRST_PROCESS_ENTRY) | 3127 | #define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) |
2796 | 3128 | ||
2797 | static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | 3129 | static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, |
2798 | struct tgid_iter iter) | 3130 | struct tgid_iter iter) |
@@ -2803,21 +3135,27 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi | |||
2803 | proc_pid_instantiate, iter.task, NULL); | 3135 | proc_pid_instantiate, iter.task, NULL); |
2804 | } | 3136 | } |
2805 | 3137 | ||
2806 | static int fake_filldir(void *buf, const char *name, int namelen, | ||
2807 | loff_t offset, u64 ino, unsigned d_type) | ||
2808 | { | ||
2809 | return 0; | ||
2810 | } | ||
2811 | |||
2812 | /* for the /proc/ directory itself, after non-process stuff has been done */ | 3138 | /* for the /proc/ directory itself, after non-process stuff has been done */ |
2813 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | 3139 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) |
2814 | { | 3140 | { |
3141 | unsigned int nr; | ||
3142 | struct task_struct *reaper; | ||
2815 | struct tgid_iter iter; | 3143 | struct tgid_iter iter; |
2816 | struct pid_namespace *ns; | 3144 | struct pid_namespace *ns; |
2817 | filldir_t __filldir; | ||
2818 | 3145 | ||
2819 | if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) | 3146 | if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) |
2820 | goto out; | 3147 | goto out_no_task; |
3148 | nr = filp->f_pos - FIRST_PROCESS_ENTRY; | ||
3149 | |||
3150 | reaper = get_proc_task(filp->f_path.dentry->d_inode); | ||
3151 | if (!reaper) | ||
3152 | goto out_no_task; | ||
3153 | |||
3154 | for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) { | ||
3155 | const struct pid_entry *p = &proc_base_stuff[nr]; | ||
3156 | if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0) | ||
3157 | goto out; | ||
3158 | } | ||
2821 | 3159 | ||
2822 | ns = filp->f_dentry->d_sb->s_fs_info; | 3160 | ns = filp->f_dentry->d_sb->s_fs_info; |
2823 | iter.task = NULL; | 3161 | iter.task = NULL; |
@@ -2825,19 +3163,16 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
2825 | for (iter = next_tgid(ns, iter); | 3163 | for (iter = next_tgid(ns, iter); |
2826 | iter.task; | 3164 | iter.task; |
2827 | iter.tgid += 1, iter = next_tgid(ns, iter)) { | 3165 | iter.tgid += 1, iter = next_tgid(ns, iter)) { |
2828 | if (has_pid_permissions(ns, iter.task, 2)) | ||
2829 | __filldir = filldir; | ||
2830 | else | ||
2831 | __filldir = fake_filldir; | ||
2832 | |||
2833 | filp->f_pos = iter.tgid + TGID_OFFSET; | 3166 | filp->f_pos = iter.tgid + TGID_OFFSET; |
2834 | if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) { | 3167 | if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { |
2835 | put_task_struct(iter.task); | 3168 | put_task_struct(iter.task); |
2836 | goto out; | 3169 | goto out; |
2837 | } | 3170 | } |
2838 | } | 3171 | } |
2839 | filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; | 3172 | filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; |
2840 | out: | 3173 | out: |
3174 | put_task_struct(reaper); | ||
3175 | out_no_task: | ||
2841 | return 0; | 3176 | return 0; |
2842 | } | 3177 | } |
2843 | 3178 | ||
@@ -2863,12 +3198,9 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2863 | INF("cmdline", S_IRUGO, proc_pid_cmdline), | 3198 | INF("cmdline", S_IRUGO, proc_pid_cmdline), |
2864 | ONE("stat", S_IRUGO, proc_tid_stat), | 3199 | ONE("stat", S_IRUGO, proc_tid_stat), |
2865 | ONE("statm", S_IRUGO, proc_pid_statm), | 3200 | ONE("statm", S_IRUGO, proc_pid_statm), |
2866 | REG("maps", S_IRUGO, proc_tid_maps_operations), | 3201 | REG("maps", S_IRUGO, proc_maps_operations), |
2867 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
2868 | REG("children", S_IRUGO, proc_tid_children_operations), | ||
2869 | #endif | ||
2870 | #ifdef CONFIG_NUMA | 3202 | #ifdef CONFIG_NUMA |
2871 | REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations), | 3203 | REG("numa_maps", S_IRUGO, proc_numa_maps_operations), |
2872 | #endif | 3204 | #endif |
2873 | REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), | 3205 | REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), |
2874 | LNK("cwd", proc_cwd_link), | 3206 | LNK("cwd", proc_cwd_link), |
@@ -2878,7 +3210,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2878 | REG("mountinfo", S_IRUGO, proc_mountinfo_operations), | 3210 | REG("mountinfo", S_IRUGO, proc_mountinfo_operations), |
2879 | #ifdef CONFIG_PROC_PAGE_MONITOR | 3211 | #ifdef CONFIG_PROC_PAGE_MONITOR |
2880 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), | 3212 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), |
2881 | REG("smaps", S_IRUGO, proc_tid_smaps_operations), | 3213 | REG("smaps", S_IRUGO, proc_smaps_operations), |
2882 | REG("pagemap", S_IRUGO, proc_pagemap_operations), | 3214 | REG("pagemap", S_IRUGO, proc_pagemap_operations), |
2883 | #endif | 3215 | #endif |
2884 | #ifdef CONFIG_SECURITY | 3216 | #ifdef CONFIG_SECURITY |
@@ -2903,7 +3235,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2903 | REG("cgroup", S_IRUGO, proc_cgroup_operations), | 3235 | REG("cgroup", S_IRUGO, proc_cgroup_operations), |
2904 | #endif | 3236 | #endif |
2905 | INF("oom_score", S_IRUGO, proc_oom_score), | 3237 | INF("oom_score", S_IRUGO, proc_oom_score), |
2906 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), | 3238 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), |
2907 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), | 3239 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), |
2908 | #ifdef CONFIG_AUDITSYSCALL | 3240 | #ifdef CONFIG_AUDITSYSCALL |
2909 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), | 3241 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), |
@@ -2918,11 +3250,6 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2918 | #ifdef CONFIG_HARDWALL | 3250 | #ifdef CONFIG_HARDWALL |
2919 | INF("hardwall", S_IRUGO, proc_pid_hardwall), | 3251 | INF("hardwall", S_IRUGO, proc_pid_hardwall), |
2920 | #endif | 3252 | #endif |
2921 | #ifdef CONFIG_USER_NS | ||
2922 | REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), | ||
2923 | REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), | ||
2924 | REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), | ||
2925 | #endif | ||
2926 | }; | 3253 | }; |
2927 | 3254 | ||
2928 | static int proc_tid_base_readdir(struct file * filp, | 3255 | static int proc_tid_base_readdir(struct file * filp, |
@@ -2932,8 +3259,7 @@ static int proc_tid_base_readdir(struct file * filp, | |||
2932 | tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); | 3259 | tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); |
2933 | } | 3260 | } |
2934 | 3261 | ||
2935 | static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | 3262 | static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ |
2936 | { | ||
2937 | return proc_pident_lookup(dir, dentry, | 3263 | return proc_pident_lookup(dir, dentry, |
2938 | tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); | 3264 | tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); |
2939 | } | 3265 | } |
@@ -2964,20 +3290,20 @@ static struct dentry *proc_task_instantiate(struct inode *dir, | |||
2964 | inode->i_fop = &proc_tid_base_operations; | 3290 | inode->i_fop = &proc_tid_base_operations; |
2965 | inode->i_flags|=S_IMMUTABLE; | 3291 | inode->i_flags|=S_IMMUTABLE; |
2966 | 3292 | ||
2967 | set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff, | 3293 | inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, |
2968 | ARRAY_SIZE(tid_base_stuff))); | 3294 | ARRAY_SIZE(tid_base_stuff)); |
2969 | 3295 | ||
2970 | d_set_d_op(dentry, &pid_dentry_operations); | 3296 | d_set_d_op(dentry, &pid_dentry_operations); |
2971 | 3297 | ||
2972 | d_add(dentry, inode); | 3298 | d_add(dentry, inode); |
2973 | /* Close the race of the process dying before we return the dentry */ | 3299 | /* Close the race of the process dying before we return the dentry */ |
2974 | if (pid_revalidate(dentry, 0)) | 3300 | if (pid_revalidate(dentry, NULL)) |
2975 | error = NULL; | 3301 | error = NULL; |
2976 | out: | 3302 | out: |
2977 | return error; | 3303 | return error; |
2978 | } | 3304 | } |
2979 | 3305 | ||
2980 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) | 3306 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) |
2981 | { | 3307 | { |
2982 | struct dentry *result = ERR_PTR(-ENOENT); | 3308 | struct dentry *result = ERR_PTR(-ENOENT); |
2983 | struct task_struct *task; | 3309 | struct task_struct *task; |
@@ -3173,7 +3499,6 @@ static const struct inode_operations proc_task_inode_operations = { | |||
3173 | .lookup = proc_task_lookup, | 3499 | .lookup = proc_task_lookup, |
3174 | .getattr = proc_task_getattr, | 3500 | .getattr = proc_task_getattr, |
3175 | .setattr = proc_setattr, | 3501 | .setattr = proc_setattr, |
3176 | .permission = proc_pid_permission, | ||
3177 | }; | 3502 | }; |
3178 | 3503 | ||
3179 | static const struct file_operations proc_task_operations = { | 3504 | static const struct file_operations proc_task_operations = { |
diff --git a/fs/proc/fd.c b/fs/proc/fd.c deleted file mode 100644 index d7a4a28ef63..00000000000 --- a/fs/proc/fd.c +++ /dev/null | |||
@@ -1,369 +0,0 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/dcache.h> | ||
4 | #include <linux/path.h> | ||
5 | #include <linux/fdtable.h> | ||
6 | #include <linux/namei.h> | ||
7 | #include <linux/pid.h> | ||
8 | #include <linux/security.h> | ||
9 | #include <linux/file.h> | ||
10 | #include <linux/seq_file.h> | ||
11 | |||
12 | #include <linux/proc_fs.h> | ||
13 | |||
14 | #include "internal.h" | ||
15 | #include "fd.h" | ||
16 | |||
17 | static int seq_show(struct seq_file *m, void *v) | ||
18 | { | ||
19 | struct files_struct *files = NULL; | ||
20 | int f_flags = 0, ret = -ENOENT; | ||
21 | struct file *file = NULL; | ||
22 | struct task_struct *task; | ||
23 | |||
24 | task = get_proc_task(m->private); | ||
25 | if (!task) | ||
26 | return -ENOENT; | ||
27 | |||
28 | files = get_files_struct(task); | ||
29 | put_task_struct(task); | ||
30 | |||
31 | if (files) { | ||
32 | int fd = proc_fd(m->private); | ||
33 | |||
34 | spin_lock(&files->file_lock); | ||
35 | file = fcheck_files(files, fd); | ||
36 | if (file) { | ||
37 | struct fdtable *fdt = files_fdtable(files); | ||
38 | |||
39 | f_flags = file->f_flags; | ||
40 | if (close_on_exec(fd, fdt)) | ||
41 | f_flags |= O_CLOEXEC; | ||
42 | |||
43 | get_file(file); | ||
44 | ret = 0; | ||
45 | } | ||
46 | spin_unlock(&files->file_lock); | ||
47 | put_files_struct(files); | ||
48 | } | ||
49 | |||
50 | if (!ret) { | ||
51 | seq_printf(m, "pos:\t%lli\nflags:\t0%o\n", | ||
52 | (long long)file->f_pos, f_flags); | ||
53 | if (file->f_op->show_fdinfo) | ||
54 | ret = file->f_op->show_fdinfo(m, file); | ||
55 | fput(file); | ||
56 | } | ||
57 | |||
58 | return ret; | ||
59 | } | ||
60 | |||
61 | static int seq_fdinfo_open(struct inode *inode, struct file *file) | ||
62 | { | ||
63 | return single_open(file, seq_show, inode); | ||
64 | } | ||
65 | |||
66 | static const struct file_operations proc_fdinfo_file_operations = { | ||
67 | .open = seq_fdinfo_open, | ||
68 | .read = seq_read, | ||
69 | .llseek = seq_lseek, | ||
70 | .release = single_release, | ||
71 | }; | ||
72 | |||
73 | static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) | ||
74 | { | ||
75 | struct files_struct *files; | ||
76 | struct task_struct *task; | ||
77 | const struct cred *cred; | ||
78 | struct inode *inode; | ||
79 | int fd; | ||
80 | |||
81 | if (flags & LOOKUP_RCU) | ||
82 | return -ECHILD; | ||
83 | |||
84 | inode = dentry->d_inode; | ||
85 | task = get_proc_task(inode); | ||
86 | fd = proc_fd(inode); | ||
87 | |||
88 | if (task) { | ||
89 | files = get_files_struct(task); | ||
90 | if (files) { | ||
91 | struct file *file; | ||
92 | |||
93 | rcu_read_lock(); | ||
94 | file = fcheck_files(files, fd); | ||
95 | if (file) { | ||
96 | unsigned f_mode = file->f_mode; | ||
97 | |||
98 | rcu_read_unlock(); | ||
99 | put_files_struct(files); | ||
100 | |||
101 | if (task_dumpable(task)) { | ||
102 | rcu_read_lock(); | ||
103 | cred = __task_cred(task); | ||
104 | inode->i_uid = cred->euid; | ||
105 | inode->i_gid = cred->egid; | ||
106 | rcu_read_unlock(); | ||
107 | } else { | ||
108 | inode->i_uid = GLOBAL_ROOT_UID; | ||
109 | inode->i_gid = GLOBAL_ROOT_GID; | ||
110 | } | ||
111 | |||
112 | if (S_ISLNK(inode->i_mode)) { | ||
113 | unsigned i_mode = S_IFLNK; | ||
114 | if (f_mode & FMODE_READ) | ||
115 | i_mode |= S_IRUSR | S_IXUSR; | ||
116 | if (f_mode & FMODE_WRITE) | ||
117 | i_mode |= S_IWUSR | S_IXUSR; | ||
118 | inode->i_mode = i_mode; | ||
119 | } | ||
120 | |||
121 | security_task_to_inode(task, inode); | ||
122 | put_task_struct(task); | ||
123 | return 1; | ||
124 | } | ||
125 | rcu_read_unlock(); | ||
126 | put_files_struct(files); | ||
127 | } | ||
128 | put_task_struct(task); | ||
129 | } | ||
130 | |||
131 | d_drop(dentry); | ||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | static const struct dentry_operations tid_fd_dentry_operations = { | ||
136 | .d_revalidate = tid_fd_revalidate, | ||
137 | .d_delete = pid_delete_dentry, | ||
138 | }; | ||
139 | |||
140 | static int proc_fd_link(struct dentry *dentry, struct path *path) | ||
141 | { | ||
142 | struct files_struct *files = NULL; | ||
143 | struct task_struct *task; | ||
144 | int ret = -ENOENT; | ||
145 | |||
146 | task = get_proc_task(dentry->d_inode); | ||
147 | if (task) { | ||
148 | files = get_files_struct(task); | ||
149 | put_task_struct(task); | ||
150 | } | ||
151 | |||
152 | if (files) { | ||
153 | int fd = proc_fd(dentry->d_inode); | ||
154 | struct file *fd_file; | ||
155 | |||
156 | spin_lock(&files->file_lock); | ||
157 | fd_file = fcheck_files(files, fd); | ||
158 | if (fd_file) { | ||
159 | *path = fd_file->f_path; | ||
160 | path_get(&fd_file->f_path); | ||
161 | ret = 0; | ||
162 | } | ||
163 | spin_unlock(&files->file_lock); | ||
164 | put_files_struct(files); | ||
165 | } | ||
166 | |||
167 | return ret; | ||
168 | } | ||
169 | |||
170 | static struct dentry * | ||
171 | proc_fd_instantiate(struct inode *dir, struct dentry *dentry, | ||
172 | struct task_struct *task, const void *ptr) | ||
173 | { | ||
174 | struct dentry *error = ERR_PTR(-ENOENT); | ||
175 | unsigned fd = (unsigned long)ptr; | ||
176 | struct proc_inode *ei; | ||
177 | struct inode *inode; | ||
178 | |||
179 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
180 | if (!inode) | ||
181 | goto out; | ||
182 | |||
183 | ei = PROC_I(inode); | ||
184 | ei->fd = fd; | ||
185 | |||
186 | inode->i_mode = S_IFLNK; | ||
187 | inode->i_op = &proc_pid_link_inode_operations; | ||
188 | inode->i_size = 64; | ||
189 | |||
190 | ei->op.proc_get_link = proc_fd_link; | ||
191 | |||
192 | d_set_d_op(dentry, &tid_fd_dentry_operations); | ||
193 | d_add(dentry, inode); | ||
194 | |||
195 | /* Close the race of the process dying before we return the dentry */ | ||
196 | if (tid_fd_revalidate(dentry, 0)) | ||
197 | error = NULL; | ||
198 | out: | ||
199 | return error; | ||
200 | } | ||
201 | |||
202 | static struct dentry *proc_lookupfd_common(struct inode *dir, | ||
203 | struct dentry *dentry, | ||
204 | instantiate_t instantiate) | ||
205 | { | ||
206 | struct task_struct *task = get_proc_task(dir); | ||
207 | struct dentry *result = ERR_PTR(-ENOENT); | ||
208 | unsigned fd = name_to_int(dentry); | ||
209 | |||
210 | if (!task) | ||
211 | goto out_no_task; | ||
212 | if (fd == ~0U) | ||
213 | goto out; | ||
214 | |||
215 | result = instantiate(dir, dentry, task, (void *)(unsigned long)fd); | ||
216 | out: | ||
217 | put_task_struct(task); | ||
218 | out_no_task: | ||
219 | return result; | ||
220 | } | ||
221 | |||
222 | static int proc_readfd_common(struct file * filp, void * dirent, | ||
223 | filldir_t filldir, instantiate_t instantiate) | ||
224 | { | ||
225 | struct dentry *dentry = filp->f_path.dentry; | ||
226 | struct inode *inode = dentry->d_inode; | ||
227 | struct task_struct *p = get_proc_task(inode); | ||
228 | struct files_struct *files; | ||
229 | unsigned int fd, ino; | ||
230 | int retval; | ||
231 | |||
232 | retval = -ENOENT; | ||
233 | if (!p) | ||
234 | goto out_no_task; | ||
235 | retval = 0; | ||
236 | |||
237 | fd = filp->f_pos; | ||
238 | switch (fd) { | ||
239 | case 0: | ||
240 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) | ||
241 | goto out; | ||
242 | filp->f_pos++; | ||
243 | case 1: | ||
244 | ino = parent_ino(dentry); | ||
245 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
246 | goto out; | ||
247 | filp->f_pos++; | ||
248 | default: | ||
249 | files = get_files_struct(p); | ||
250 | if (!files) | ||
251 | goto out; | ||
252 | rcu_read_lock(); | ||
253 | for (fd = filp->f_pos - 2; | ||
254 | fd < files_fdtable(files)->max_fds; | ||
255 | fd++, filp->f_pos++) { | ||
256 | char name[PROC_NUMBUF]; | ||
257 | int len; | ||
258 | int rv; | ||
259 | |||
260 | if (!fcheck_files(files, fd)) | ||
261 | continue; | ||
262 | rcu_read_unlock(); | ||
263 | |||
264 | len = snprintf(name, sizeof(name), "%d", fd); | ||
265 | rv = proc_fill_cache(filp, dirent, filldir, | ||
266 | name, len, instantiate, p, | ||
267 | (void *)(unsigned long)fd); | ||
268 | if (rv < 0) | ||
269 | goto out_fd_loop; | ||
270 | rcu_read_lock(); | ||
271 | } | ||
272 | rcu_read_unlock(); | ||
273 | out_fd_loop: | ||
274 | put_files_struct(files); | ||
275 | } | ||
276 | out: | ||
277 | put_task_struct(p); | ||
278 | out_no_task: | ||
279 | return retval; | ||
280 | } | ||
281 | |||
282 | static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) | ||
283 | { | ||
284 | return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); | ||
285 | } | ||
286 | |||
287 | const struct file_operations proc_fd_operations = { | ||
288 | .read = generic_read_dir, | ||
289 | .readdir = proc_readfd, | ||
290 | .llseek = default_llseek, | ||
291 | }; | ||
292 | |||
293 | static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, | ||
294 | unsigned int flags) | ||
295 | { | ||
296 | return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * /proc/pid/fd needs a special permission handler so that a process can still | ||
301 | * access /proc/self/fd after it has executed a setuid(). | ||
302 | */ | ||
303 | int proc_fd_permission(struct inode *inode, int mask) | ||
304 | { | ||
305 | int rv = generic_permission(inode, mask); | ||
306 | if (rv == 0) | ||
307 | return 0; | ||
308 | if (task_pid(current) == proc_pid(inode)) | ||
309 | rv = 0; | ||
310 | return rv; | ||
311 | } | ||
312 | |||
313 | const struct inode_operations proc_fd_inode_operations = { | ||
314 | .lookup = proc_lookupfd, | ||
315 | .permission = proc_fd_permission, | ||
316 | .setattr = proc_setattr, | ||
317 | }; | ||
318 | |||
319 | static struct dentry * | ||
320 | proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, | ||
321 | struct task_struct *task, const void *ptr) | ||
322 | { | ||
323 | struct dentry *error = ERR_PTR(-ENOENT); | ||
324 | unsigned fd = (unsigned long)ptr; | ||
325 | struct proc_inode *ei; | ||
326 | struct inode *inode; | ||
327 | |||
328 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
329 | if (!inode) | ||
330 | goto out; | ||
331 | |||
332 | ei = PROC_I(inode); | ||
333 | ei->fd = fd; | ||
334 | |||
335 | inode->i_mode = S_IFREG | S_IRUSR; | ||
336 | inode->i_fop = &proc_fdinfo_file_operations; | ||
337 | |||
338 | d_set_d_op(dentry, &tid_fd_dentry_operations); | ||
339 | d_add(dentry, inode); | ||
340 | |||
341 | /* Close the race of the process dying before we return the dentry */ | ||
342 | if (tid_fd_revalidate(dentry, 0)) | ||
343 | error = NULL; | ||
344 | out: | ||
345 | return error; | ||
346 | } | ||
347 | |||
348 | static struct dentry * | ||
349 | proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags) | ||
350 | { | ||
351 | return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); | ||
352 | } | ||
353 | |||
354 | static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) | ||
355 | { | ||
356 | return proc_readfd_common(filp, dirent, filldir, | ||
357 | proc_fdinfo_instantiate); | ||
358 | } | ||
359 | |||
360 | const struct inode_operations proc_fdinfo_inode_operations = { | ||
361 | .lookup = proc_lookupfdinfo, | ||
362 | .setattr = proc_setattr, | ||
363 | }; | ||
364 | |||
365 | const struct file_operations proc_fdinfo_operations = { | ||
366 | .read = generic_read_dir, | ||
367 | .readdir = proc_readfdinfo, | ||
368 | .llseek = default_llseek, | ||
369 | }; | ||
diff --git a/fs/proc/fd.h b/fs/proc/fd.h deleted file mode 100644 index cbb1d47deda..00000000000 --- a/fs/proc/fd.h +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | #ifndef __PROCFS_FD_H__ | ||
2 | #define __PROCFS_FD_H__ | ||
3 | |||
4 | #include <linux/fs.h> | ||
5 | |||
6 | extern const struct file_operations proc_fd_operations; | ||
7 | extern const struct inode_operations proc_fd_inode_operations; | ||
8 | |||
9 | extern const struct file_operations proc_fdinfo_operations; | ||
10 | extern const struct inode_operations proc_fdinfo_inode_operations; | ||
11 | |||
12 | extern int proc_fd_permission(struct inode *inode, int mask); | ||
13 | |||
14 | #endif /* __PROCFS_FD_H__ */ | ||
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 76ddae83daa..9d99131d0d6 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -261,9 +261,16 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) | |||
261 | if (error) | 261 | if (error) |
262 | return error; | 262 | return error; |
263 | 263 | ||
264 | if ((iattr->ia_valid & ATTR_SIZE) && | ||
265 | iattr->ia_size != i_size_read(inode)) { | ||
266 | error = vmtruncate(inode, iattr->ia_size); | ||
267 | if (error) | ||
268 | return error; | ||
269 | } | ||
270 | |||
264 | setattr_copy(inode, iattr); | 271 | setattr_copy(inode, iattr); |
265 | mark_inode_dirty(inode); | 272 | mark_inode_dirty(inode); |
266 | 273 | ||
267 | de->uid = inode->i_uid; | 274 | de->uid = inode->i_uid; |
268 | de->gid = inode->i_gid; | 275 | de->gid = inode->i_gid; |
269 | de->mode = inode->i_mode; | 276 | de->mode = inode->i_mode; |
@@ -276,7 +283,7 @@ static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
276 | struct inode *inode = dentry->d_inode; | 283 | struct inode *inode = dentry->d_inode; |
277 | struct proc_dir_entry *de = PROC_I(inode)->pde; | 284 | struct proc_dir_entry *de = PROC_I(inode)->pde; |
278 | if (de && de->nlink) | 285 | if (de && de->nlink) |
279 | set_nlink(inode, de->nlink); | 286 | inode->i_nlink = de->nlink; |
280 | 287 | ||
281 | generic_fillattr(inode, stat); | 288 | generic_fillattr(inode, stat); |
282 | return 0; | 289 | return 0; |
@@ -343,39 +350,37 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ | |||
343 | * Return an inode number between PROC_DYNAMIC_FIRST and | 350 | * Return an inode number between PROC_DYNAMIC_FIRST and |
344 | * 0xffffffff, or zero on failure. | 351 | * 0xffffffff, or zero on failure. |
345 | */ | 352 | */ |
346 | int proc_alloc_inum(unsigned int *inum) | 353 | static unsigned int get_inode_number(void) |
347 | { | 354 | { |
348 | unsigned int i; | 355 | unsigned int i; |
349 | int error; | 356 | int error; |
350 | 357 | ||
351 | retry: | 358 | retry: |
352 | if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL)) | 359 | if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0) |
353 | return -ENOMEM; | 360 | return 0; |
354 | 361 | ||
355 | spin_lock_irq(&proc_inum_lock); | 362 | spin_lock(&proc_inum_lock); |
356 | error = ida_get_new(&proc_inum_ida, &i); | 363 | error = ida_get_new(&proc_inum_ida, &i); |
357 | spin_unlock_irq(&proc_inum_lock); | 364 | spin_unlock(&proc_inum_lock); |
358 | if (error == -EAGAIN) | 365 | if (error == -EAGAIN) |
359 | goto retry; | 366 | goto retry; |
360 | else if (error) | 367 | else if (error) |
361 | return error; | 368 | return 0; |
362 | 369 | ||
363 | if (i > UINT_MAX - PROC_DYNAMIC_FIRST) { | 370 | if (i > UINT_MAX - PROC_DYNAMIC_FIRST) { |
364 | spin_lock_irq(&proc_inum_lock); | 371 | spin_lock(&proc_inum_lock); |
365 | ida_remove(&proc_inum_ida, i); | 372 | ida_remove(&proc_inum_ida, i); |
366 | spin_unlock_irq(&proc_inum_lock); | 373 | spin_unlock(&proc_inum_lock); |
367 | return -ENOSPC; | 374 | return 0; |
368 | } | 375 | } |
369 | *inum = PROC_DYNAMIC_FIRST + i; | 376 | return PROC_DYNAMIC_FIRST + i; |
370 | return 0; | ||
371 | } | 377 | } |
372 | 378 | ||
373 | void proc_free_inum(unsigned int inum) | 379 | static void release_inode_number(unsigned int inum) |
374 | { | 380 | { |
375 | unsigned long flags; | 381 | spin_lock(&proc_inum_lock); |
376 | spin_lock_irqsave(&proc_inum_lock, flags); | ||
377 | ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); | 382 | ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); |
378 | spin_unlock_irqrestore(&proc_inum_lock, flags); | 383 | spin_unlock(&proc_inum_lock); |
379 | } | 384 | } |
380 | 385 | ||
381 | static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) | 386 | static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) |
@@ -422,7 +427,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, | |||
422 | if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { | 427 | if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { |
423 | pde_get(de); | 428 | pde_get(de); |
424 | spin_unlock(&proc_subdir_lock); | 429 | spin_unlock(&proc_subdir_lock); |
425 | error = -ENOMEM; | 430 | error = -EINVAL; |
426 | inode = proc_get_inode(dir->i_sb, de); | 431 | inode = proc_get_inode(dir->i_sb, de); |
427 | goto out_unlock; | 432 | goto out_unlock; |
428 | } | 433 | } |
@@ -441,7 +446,7 @@ out_unlock: | |||
441 | } | 446 | } |
442 | 447 | ||
443 | struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, | 448 | struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, |
444 | unsigned int flags) | 449 | struct nameidata *nd) |
445 | { | 450 | { |
446 | return proc_lookup_de(PDE(dir), dir, dentry); | 451 | return proc_lookup_de(PDE(dir), dir, dentry); |
447 | } | 452 | } |
@@ -549,12 +554,13 @@ static const struct inode_operations proc_dir_inode_operations = { | |||
549 | 554 | ||
550 | static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) | 555 | static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) |
551 | { | 556 | { |
557 | unsigned int i; | ||
552 | struct proc_dir_entry *tmp; | 558 | struct proc_dir_entry *tmp; |
553 | int ret; | ||
554 | 559 | ||
555 | ret = proc_alloc_inum(&dp->low_ino); | 560 | i = get_inode_number(); |
556 | if (ret) | 561 | if (i == 0) |
557 | return ret; | 562 | return -EAGAIN; |
563 | dp->low_ino = i; | ||
558 | 564 | ||
559 | if (S_ISDIR(dp->mode)) { | 565 | if (S_ISDIR(dp->mode)) { |
560 | if (dp->proc_iops == NULL) { | 566 | if (dp->proc_iops == NULL) { |
@@ -591,7 +597,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp | |||
591 | 597 | ||
592 | static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, | 598 | static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, |
593 | const char *name, | 599 | const char *name, |
594 | umode_t mode, | 600 | mode_t mode, |
595 | nlink_t nlink) | 601 | nlink_t nlink) |
596 | { | 602 | { |
597 | struct proc_dir_entry *ent = NULL; | 603 | struct proc_dir_entry *ent = NULL; |
@@ -599,8 +605,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, | |||
599 | unsigned int len; | 605 | unsigned int len; |
600 | 606 | ||
601 | /* make sure name is valid */ | 607 | /* make sure name is valid */ |
602 | if (!name || !strlen(name)) | 608 | if (!name || !strlen(name)) goto out; |
603 | goto out; | ||
604 | 609 | ||
605 | if (xlate_proc_name(name, parent, &fn) != 0) | 610 | if (xlate_proc_name(name, parent, &fn) != 0) |
606 | goto out; | 611 | goto out; |
@@ -611,18 +616,20 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, | |||
611 | 616 | ||
612 | len = strlen(fn); | 617 | len = strlen(fn); |
613 | 618 | ||
614 | ent = kzalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); | 619 | ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); |
615 | if (!ent) | 620 | if (!ent) goto out; |
616 | goto out; | ||
617 | 621 | ||
622 | memset(ent, 0, sizeof(struct proc_dir_entry)); | ||
618 | memcpy(ent->name, fn, len + 1); | 623 | memcpy(ent->name, fn, len + 1); |
619 | ent->namelen = len; | 624 | ent->namelen = len; |
620 | ent->mode = mode; | 625 | ent->mode = mode; |
621 | ent->nlink = nlink; | 626 | ent->nlink = nlink; |
622 | atomic_set(&ent->count, 1); | 627 | atomic_set(&ent->count, 1); |
628 | ent->pde_users = 0; | ||
623 | spin_lock_init(&ent->pde_unload_lock); | 629 | spin_lock_init(&ent->pde_unload_lock); |
630 | ent->pde_unload_completion = NULL; | ||
624 | INIT_LIST_HEAD(&ent->pde_openers); | 631 | INIT_LIST_HEAD(&ent->pde_openers); |
625 | out: | 632 | out: |
626 | return ent; | 633 | return ent; |
627 | } | 634 | } |
628 | 635 | ||
@@ -652,7 +659,7 @@ struct proc_dir_entry *proc_symlink(const char *name, | |||
652 | } | 659 | } |
653 | EXPORT_SYMBOL(proc_symlink); | 660 | EXPORT_SYMBOL(proc_symlink); |
654 | 661 | ||
655 | struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode, | 662 | struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, |
656 | struct proc_dir_entry *parent) | 663 | struct proc_dir_entry *parent) |
657 | { | 664 | { |
658 | struct proc_dir_entry *ent; | 665 | struct proc_dir_entry *ent; |
@@ -692,7 +699,7 @@ struct proc_dir_entry *proc_mkdir(const char *name, | |||
692 | } | 699 | } |
693 | EXPORT_SYMBOL(proc_mkdir); | 700 | EXPORT_SYMBOL(proc_mkdir); |
694 | 701 | ||
695 | struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode, | 702 | struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, |
696 | struct proc_dir_entry *parent) | 703 | struct proc_dir_entry *parent) |
697 | { | 704 | { |
698 | struct proc_dir_entry *ent; | 705 | struct proc_dir_entry *ent; |
@@ -721,7 +728,7 @@ struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode, | |||
721 | } | 728 | } |
722 | EXPORT_SYMBOL(create_proc_entry); | 729 | EXPORT_SYMBOL(create_proc_entry); |
723 | 730 | ||
724 | struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, | 731 | struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, |
725 | struct proc_dir_entry *parent, | 732 | struct proc_dir_entry *parent, |
726 | const struct file_operations *proc_fops, | 733 | const struct file_operations *proc_fops, |
727 | void *data) | 734 | void *data) |
@@ -758,7 +765,7 @@ EXPORT_SYMBOL(proc_create_data); | |||
758 | 765 | ||
759 | static void free_proc_entry(struct proc_dir_entry *de) | 766 | static void free_proc_entry(struct proc_dir_entry *de) |
760 | { | 767 | { |
761 | proc_free_inum(de->low_ino); | 768 | release_inode_number(de->low_ino); |
762 | 769 | ||
763 | if (S_ISLNK(de->mode)) | 770 | if (S_ISLNK(de->mode)) |
764 | kfree(de->data); | 771 | kfree(de->data); |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 439ae688650..7ed72d6c1c6 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -7,7 +7,6 @@ | |||
7 | #include <linux/time.h> | 7 | #include <linux/time.h> |
8 | #include <linux/proc_fs.h> | 8 | #include <linux/proc_fs.h> |
9 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
10 | #include <linux/pid_namespace.h> | ||
11 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
12 | #include <linux/string.h> | 11 | #include <linux/string.h> |
13 | #include <linux/stat.h> | 12 | #include <linux/stat.h> |
@@ -18,10 +17,9 @@ | |||
18 | #include <linux/init.h> | 17 | #include <linux/init.h> |
19 | #include <linux/module.h> | 18 | #include <linux/module.h> |
20 | #include <linux/sysctl.h> | 19 | #include <linux/sysctl.h> |
21 | #include <linux/seq_file.h> | ||
22 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
23 | #include <linux/mount.h> | ||
24 | 21 | ||
22 | #include <asm/system.h> | ||
25 | #include <asm/uaccess.h> | 23 | #include <asm/uaccess.h> |
26 | 24 | ||
27 | #include "internal.h" | 25 | #include "internal.h" |
@@ -31,10 +29,9 @@ static void proc_evict_inode(struct inode *inode) | |||
31 | struct proc_dir_entry *de; | 29 | struct proc_dir_entry *de; |
32 | struct ctl_table_header *head; | 30 | struct ctl_table_header *head; |
33 | const struct proc_ns_operations *ns_ops; | 31 | const struct proc_ns_operations *ns_ops; |
34 | void *ns; | ||
35 | 32 | ||
36 | truncate_inode_pages(&inode->i_data, 0); | 33 | truncate_inode_pages(&inode->i_data, 0); |
37 | clear_inode(inode); | 34 | end_writeback(inode); |
38 | 35 | ||
39 | /* Stop tracking associated processes */ | 36 | /* Stop tracking associated processes */ |
40 | put_pid(PROC_I(inode)->pid); | 37 | put_pid(PROC_I(inode)->pid); |
@@ -50,9 +47,8 @@ static void proc_evict_inode(struct inode *inode) | |||
50 | } | 47 | } |
51 | /* Release any associated namespace */ | 48 | /* Release any associated namespace */ |
52 | ns_ops = PROC_I(inode)->ns_ops; | 49 | ns_ops = PROC_I(inode)->ns_ops; |
53 | ns = PROC_I(inode)->ns; | 50 | if (ns_ops && ns_ops->put) |
54 | if (ns_ops && ns) | 51 | ns_ops->put(PROC_I(inode)->ns); |
55 | ns_ops->put(ns); | ||
56 | } | 52 | } |
57 | 53 | ||
58 | static struct kmem_cache * proc_inode_cachep; | 54 | static struct kmem_cache * proc_inode_cachep; |
@@ -81,6 +77,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb) | |||
81 | static void proc_i_callback(struct rcu_head *head) | 77 | static void proc_i_callback(struct rcu_head *head) |
82 | { | 78 | { |
83 | struct inode *inode = container_of(head, struct inode, i_rcu); | 79 | struct inode *inode = container_of(head, struct inode, i_rcu); |
80 | INIT_LIST_HEAD(&inode->i_dentry); | ||
84 | kmem_cache_free(proc_inode_cachep, PROC_I(inode)); | 81 | kmem_cache_free(proc_inode_cachep, PROC_I(inode)); |
85 | } | 82 | } |
86 | 83 | ||
@@ -105,27 +102,12 @@ void __init proc_init_inodecache(void) | |||
105 | init_once); | 102 | init_once); |
106 | } | 103 | } |
107 | 104 | ||
108 | static int proc_show_options(struct seq_file *seq, struct dentry *root) | ||
109 | { | ||
110 | struct super_block *sb = root->d_sb; | ||
111 | struct pid_namespace *pid = sb->s_fs_info; | ||
112 | |||
113 | if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID)) | ||
114 | seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid)); | ||
115 | if (pid->hide_pid != 0) | ||
116 | seq_printf(seq, ",hidepid=%u", pid->hide_pid); | ||
117 | |||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | static const struct super_operations proc_sops = { | 105 | static const struct super_operations proc_sops = { |
122 | .alloc_inode = proc_alloc_inode, | 106 | .alloc_inode = proc_alloc_inode, |
123 | .destroy_inode = proc_destroy_inode, | 107 | .destroy_inode = proc_destroy_inode, |
124 | .drop_inode = generic_delete_inode, | 108 | .drop_inode = generic_delete_inode, |
125 | .evict_inode = proc_evict_inode, | 109 | .evict_inode = proc_evict_inode, |
126 | .statfs = simple_statfs, | 110 | .statfs = simple_statfs, |
127 | .remount_fs = proc_remount, | ||
128 | .show_options = proc_show_options, | ||
129 | }; | 111 | }; |
130 | 112 | ||
131 | static void __pde_users_dec(struct proc_dir_entry *pde) | 113 | static void __pde_users_dec(struct proc_dir_entry *pde) |
@@ -452,6 +434,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) | |||
452 | return NULL; | 434 | return NULL; |
453 | if (inode->i_state & I_NEW) { | 435 | if (inode->i_state & I_NEW) { |
454 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 436 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
437 | PROC_I(inode)->fd = 0; | ||
455 | PROC_I(inode)->pde = de; | 438 | PROC_I(inode)->pde = de; |
456 | 439 | ||
457 | if (de->mode) { | 440 | if (de->mode) { |
@@ -462,7 +445,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) | |||
462 | if (de->size) | 445 | if (de->size) |
463 | inode->i_size = de->size; | 446 | inode->i_size = de->size; |
464 | if (de->nlink) | 447 | if (de->nlink) |
465 | set_nlink(inode, de->nlink); | 448 | inode->i_nlink = de->nlink; |
466 | if (de->proc_iops) | 449 | if (de->proc_iops) |
467 | inode->i_op = de->proc_iops; | 450 | inode->i_op = de->proc_iops; |
468 | if (de->proc_fops) { | 451 | if (de->proc_fops) { |
@@ -486,6 +469,8 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) | |||
486 | 469 | ||
487 | int proc_fill_super(struct super_block *s) | 470 | int proc_fill_super(struct super_block *s) |
488 | { | 471 | { |
472 | struct inode * root_inode; | ||
473 | |||
489 | s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; | 474 | s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; |
490 | s->s_blocksize = 1024; | 475 | s->s_blocksize = 1024; |
491 | s->s_blocksize_bits = 10; | 476 | s->s_blocksize_bits = 10; |
@@ -494,11 +479,19 @@ int proc_fill_super(struct super_block *s) | |||
494 | s->s_time_gran = 1; | 479 | s->s_time_gran = 1; |
495 | 480 | ||
496 | pde_get(&proc_root); | 481 | pde_get(&proc_root); |
497 | s->s_root = d_make_root(proc_get_inode(s, &proc_root)); | 482 | root_inode = proc_get_inode(s, &proc_root); |
498 | if (s->s_root) | 483 | if (!root_inode) |
499 | return 0; | 484 | goto out_no_root; |
485 | root_inode->i_uid = 0; | ||
486 | root_inode->i_gid = 0; | ||
487 | s->s_root = d_alloc_root(root_inode); | ||
488 | if (!s->s_root) | ||
489 | goto out_no_root; | ||
490 | return 0; | ||
500 | 491 | ||
492 | out_no_root: | ||
501 | printk("proc_read_super: get root inode failed\n"); | 493 | printk("proc_read_super: get root inode failed\n"); |
494 | iput(root_inode); | ||
502 | pde_put(&proc_root); | 495 | pde_put(&proc_root); |
503 | return -ENOMEM; | 496 | return -ENOMEM; |
504 | } | 497 | } |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 252544c0520..7838e5cfec1 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -9,19 +9,13 @@ | |||
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/proc_fs.h> | 12 | #include <linux/proc_fs.h> |
14 | struct ctl_table_header; | ||
15 | struct mempolicy; | ||
16 | 13 | ||
17 | extern struct proc_dir_entry proc_root; | 14 | extern struct proc_dir_entry proc_root; |
18 | extern void proc_self_init(void); | ||
19 | #ifdef CONFIG_PROC_SYSCTL | 15 | #ifdef CONFIG_PROC_SYSCTL |
20 | extern int proc_sys_init(void); | 16 | extern int proc_sys_init(void); |
21 | extern void sysctl_head_put(struct ctl_table_header *head); | ||
22 | #else | 17 | #else |
23 | static inline void proc_sys_init(void) { } | 18 | static inline void proc_sys_init(void) { } |
24 | static inline void sysctl_head_put(struct ctl_table_header *head) { } | ||
25 | #endif | 19 | #endif |
26 | #ifdef CONFIG_NET | 20 | #ifdef CONFIG_NET |
27 | extern int proc_net_init(void); | 21 | extern int proc_net_init(void); |
@@ -34,6 +28,8 @@ struct vmalloc_info { | |||
34 | unsigned long largest_chunk; | 28 | unsigned long largest_chunk; |
35 | }; | 29 | }; |
36 | 30 | ||
31 | extern struct mm_struct *mm_for_maps(struct task_struct *); | ||
32 | |||
37 | #ifdef CONFIG_MMU | 33 | #ifdef CONFIG_MMU |
38 | #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) | 34 | #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) |
39 | extern void get_vmalloc_info(struct vmalloc_info *vmi); | 35 | extern void get_vmalloc_info(struct vmalloc_info *vmi); |
@@ -57,18 +53,13 @@ extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, | |||
57 | struct pid *pid, struct task_struct *task); | 53 | struct pid *pid, struct task_struct *task); |
58 | extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); | 54 | extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); |
59 | 55 | ||
60 | extern const struct file_operations proc_tid_children_operations; | 56 | extern const struct file_operations proc_maps_operations; |
61 | extern const struct file_operations proc_pid_maps_operations; | 57 | extern const struct file_operations proc_numa_maps_operations; |
62 | extern const struct file_operations proc_tid_maps_operations; | 58 | extern const struct file_operations proc_smaps_operations; |
63 | extern const struct file_operations proc_pid_numa_maps_operations; | ||
64 | extern const struct file_operations proc_tid_numa_maps_operations; | ||
65 | extern const struct file_operations proc_pid_smaps_operations; | ||
66 | extern const struct file_operations proc_tid_smaps_operations; | ||
67 | extern const struct file_operations proc_clear_refs_operations; | 59 | extern const struct file_operations proc_clear_refs_operations; |
68 | extern const struct file_operations proc_pagemap_operations; | 60 | extern const struct file_operations proc_pagemap_operations; |
69 | extern const struct file_operations proc_net_operations; | 61 | extern const struct file_operations proc_net_operations; |
70 | extern const struct inode_operations proc_net_inode_operations; | 62 | extern const struct inode_operations proc_net_inode_operations; |
71 | extern const struct inode_operations proc_pid_link_inode_operations; | ||
72 | 63 | ||
73 | struct proc_maps_private { | 64 | struct proc_maps_private { |
74 | struct pid *pid; | 65 | struct pid *pid; |
@@ -76,9 +67,6 @@ struct proc_maps_private { | |||
76 | #ifdef CONFIG_MMU | 67 | #ifdef CONFIG_MMU |
77 | struct vm_area_struct *tail_vma; | 68 | struct vm_area_struct *tail_vma; |
78 | #endif | 69 | #endif |
79 | #ifdef CONFIG_NUMA | ||
80 | struct mempolicy *task_mempolicy; | ||
81 | #endif | ||
82 | }; | 70 | }; |
83 | 71 | ||
84 | void proc_init_inodecache(void); | 72 | void proc_init_inodecache(void); |
@@ -98,52 +86,6 @@ static inline int proc_fd(struct inode *inode) | |||
98 | return PROC_I(inode)->fd; | 86 | return PROC_I(inode)->fd; |
99 | } | 87 | } |
100 | 88 | ||
101 | static inline int task_dumpable(struct task_struct *task) | ||
102 | { | ||
103 | int dumpable = 0; | ||
104 | struct mm_struct *mm; | ||
105 | |||
106 | task_lock(task); | ||
107 | mm = task->mm; | ||
108 | if (mm) | ||
109 | dumpable = get_dumpable(mm); | ||
110 | task_unlock(task); | ||
111 | if (dumpable == SUID_DUMPABLE_ENABLED) | ||
112 | return 1; | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | static inline int pid_delete_dentry(const struct dentry * dentry) | ||
117 | { | ||
118 | /* Is the task we represent dead? | ||
119 | * If so, then don't put the dentry on the lru list, | ||
120 | * kill it immediately. | ||
121 | */ | ||
122 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; | ||
123 | } | ||
124 | |||
125 | static inline unsigned name_to_int(struct dentry *dentry) | ||
126 | { | ||
127 | const char *name = dentry->d_name.name; | ||
128 | int len = dentry->d_name.len; | ||
129 | unsigned n = 0; | ||
130 | |||
131 | if (len > 1 && *name == '0') | ||
132 | goto out; | ||
133 | while (len-- > 0) { | ||
134 | unsigned c = *name++ - '0'; | ||
135 | if (c > 9) | ||
136 | goto out; | ||
137 | if (n >= (~0U-9)/10) | ||
138 | goto out; | ||
139 | n *= 10; | ||
140 | n += c; | ||
141 | } | ||
142 | return n; | ||
143 | out: | ||
144 | return ~0U; | ||
145 | } | ||
146 | |||
147 | struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, | 89 | struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, |
148 | struct dentry *dentry); | 90 | struct dentry *dentry); |
149 | int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, | 91 | int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, |
@@ -159,7 +101,7 @@ void pde_users_dec(struct proc_dir_entry *pde); | |||
159 | 101 | ||
160 | extern spinlock_t proc_subdir_lock; | 102 | extern spinlock_t proc_subdir_lock; |
161 | 103 | ||
162 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int); | 104 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); |
163 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); | 105 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); |
164 | unsigned long task_vsize(struct mm_struct *); | 106 | unsigned long task_vsize(struct mm_struct *); |
165 | unsigned long task_statm(struct mm_struct *, | 107 | unsigned long task_statm(struct mm_struct *, |
@@ -175,7 +117,6 @@ void pde_put(struct proc_dir_entry *pde); | |||
175 | 117 | ||
176 | int proc_fill_super(struct super_block *); | 118 | int proc_fill_super(struct super_block *); |
177 | struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); | 119 | struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); |
178 | int proc_remount(struct super_block *sb, int *flags, char *data); | ||
179 | 120 | ||
180 | /* | 121 | /* |
181 | * These are generic /proc routines that use the internal | 122 | * These are generic /proc routines that use the internal |
@@ -185,7 +126,7 @@ int proc_remount(struct super_block *sb, int *flags, char *data); | |||
185 | * of the /proc/<pid> subdirectories. | 126 | * of the /proc/<pid> subdirectories. |
186 | */ | 127 | */ |
187 | int proc_readdir(struct file *, void *, filldir_t); | 128 | int proc_readdir(struct file *, void *, filldir_t); |
188 | struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); | 129 | struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); |
189 | 130 | ||
190 | 131 | ||
191 | 132 | ||
@@ -195,7 +136,7 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *, | |||
195 | int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | 136 | int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, |
196 | const char *name, int len, | 137 | const char *name, int len, |
197 | instantiate_t instantiate, struct task_struct *task, const void *ptr); | 138 | instantiate_t instantiate, struct task_struct *task, const void *ptr); |
198 | int pid_revalidate(struct dentry *dentry, unsigned int flags); | 139 | int pid_revalidate(struct dentry *dentry, struct nameidata *nd); |
199 | struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task); | 140 | struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task); |
200 | extern const struct dentry_operations pid_dentry_operations; | 141 | extern const struct dentry_operations pid_dentry_operations; |
201 | int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); | 142 | int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); |
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index e96d4f18ca3..d245cb23dd7 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
@@ -157,8 +157,7 @@ static int kcore_update_ram(void) | |||
157 | 157 | ||
158 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | 158 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
159 | /* calculate vmemmap's address from given system ram pfn and register it */ | 159 | /* calculate vmemmap's address from given system ram pfn and register it */ |
160 | static int | 160 | int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) |
161 | get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | ||
162 | { | 161 | { |
163 | unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT; | 162 | unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT; |
164 | unsigned long nr_pages = ent->size >> PAGE_SHIFT; | 163 | unsigned long nr_pages = ent->size >> PAGE_SHIFT; |
@@ -190,8 +189,7 @@ get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | |||
190 | 189 | ||
191 | } | 190 | } |
192 | #else | 191 | #else |
193 | static int | 192 | int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) |
194 | get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | ||
195 | { | 193 | { |
196 | return 1; | 194 | return 1; |
197 | } | 195 | } |
@@ -249,7 +247,7 @@ static int kcore_update_ram(void) | |||
249 | /* Not inialized....update now */ | 247 | /* Not inialized....update now */ |
250 | /* find out "max pfn" */ | 248 | /* find out "max pfn" */ |
251 | end_pfn = 0; | 249 | end_pfn = 0; |
252 | for_each_node_state(nid, N_MEMORY) { | 250 | for_each_node_state(nid, N_HIGH_MEMORY) { |
253 | unsigned long node_end; | 251 | unsigned long node_end; |
254 | node_end = NODE_DATA(nid)->node_start_pfn + | 252 | node_end = NODE_DATA(nid)->node_start_pfn + |
255 | NODE_DATA(nid)->node_spanned_pages; | 253 | NODE_DATA(nid)->node_spanned_pages; |
@@ -515,7 +513,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) | |||
515 | 513 | ||
516 | n = copy_to_user(buffer, (char *)start, tsz); | 514 | n = copy_to_user(buffer, (char *)start, tsz); |
517 | /* | 515 | /* |
518 | * We cannot distinguish between fault on source | 516 | * We cannot distingush between fault on source |
519 | * and fault on destination. When this happens | 517 | * and fault on destination. When this happens |
520 | * we clear too and hope it will trigger the | 518 | * we clear too and hope it will trigger the |
521 | * EFAULT again. | 519 | * EFAULT again. |
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index b7a47196c8c..be177f702ac 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c | |||
@@ -9,9 +9,9 @@ | |||
9 | #include <linux/file.h> | 9 | #include <linux/file.h> |
10 | #include <linux/utsname.h> | 10 | #include <linux/utsname.h> |
11 | #include <net/net_namespace.h> | 11 | #include <net/net_namespace.h> |
12 | #include <linux/mnt_namespace.h> | ||
12 | #include <linux/ipc_namespace.h> | 13 | #include <linux/ipc_namespace.h> |
13 | #include <linux/pid_namespace.h> | 14 | #include <linux/pid_namespace.h> |
14 | #include <linux/user_namespace.h> | ||
15 | #include "internal.h" | 15 | #include "internal.h" |
16 | 16 | ||
17 | 17 | ||
@@ -25,168 +25,12 @@ static const struct proc_ns_operations *ns_entries[] = { | |||
25 | #ifdef CONFIG_IPC_NS | 25 | #ifdef CONFIG_IPC_NS |
26 | &ipcns_operations, | 26 | &ipcns_operations, |
27 | #endif | 27 | #endif |
28 | #ifdef CONFIG_PID_NS | ||
29 | &pidns_operations, | ||
30 | #endif | ||
31 | #ifdef CONFIG_USER_NS | ||
32 | &userns_operations, | ||
33 | #endif | ||
34 | &mntns_operations, | ||
35 | }; | 28 | }; |
36 | 29 | ||
37 | static const struct file_operations ns_file_operations = { | 30 | static const struct file_operations ns_file_operations = { |
38 | .llseek = no_llseek, | 31 | .llseek = no_llseek, |
39 | }; | 32 | }; |
40 | 33 | ||
41 | static const struct inode_operations ns_inode_operations = { | ||
42 | .setattr = proc_setattr, | ||
43 | }; | ||
44 | |||
45 | static int ns_delete_dentry(const struct dentry *dentry) | ||
46 | { | ||
47 | /* Don't cache namespace inodes when not in use */ | ||
48 | return 1; | ||
49 | } | ||
50 | |||
51 | static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) | ||
52 | { | ||
53 | struct inode *inode = dentry->d_inode; | ||
54 | const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; | ||
55 | |||
56 | return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", | ||
57 | ns_ops->name, inode->i_ino); | ||
58 | } | ||
59 | |||
60 | const struct dentry_operations ns_dentry_operations = | ||
61 | { | ||
62 | .d_delete = ns_delete_dentry, | ||
63 | .d_dname = ns_dname, | ||
64 | }; | ||
65 | |||
66 | static struct dentry *proc_ns_get_dentry(struct super_block *sb, | ||
67 | struct task_struct *task, const struct proc_ns_operations *ns_ops) | ||
68 | { | ||
69 | struct dentry *dentry, *result; | ||
70 | struct inode *inode; | ||
71 | struct proc_inode *ei; | ||
72 | struct qstr qname = { .name = "", }; | ||
73 | void *ns; | ||
74 | |||
75 | ns = ns_ops->get(task); | ||
76 | if (!ns) | ||
77 | return ERR_PTR(-ENOENT); | ||
78 | |||
79 | dentry = d_alloc_pseudo(sb, &qname); | ||
80 | if (!dentry) { | ||
81 | ns_ops->put(ns); | ||
82 | return ERR_PTR(-ENOMEM); | ||
83 | } | ||
84 | |||
85 | inode = iget_locked(sb, ns_ops->inum(ns)); | ||
86 | if (!inode) { | ||
87 | dput(dentry); | ||
88 | ns_ops->put(ns); | ||
89 | return ERR_PTR(-ENOMEM); | ||
90 | } | ||
91 | |||
92 | ei = PROC_I(inode); | ||
93 | if (inode->i_state & I_NEW) { | ||
94 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
95 | inode->i_op = &ns_inode_operations; | ||
96 | inode->i_mode = S_IFREG | S_IRUGO; | ||
97 | inode->i_fop = &ns_file_operations; | ||
98 | ei->ns_ops = ns_ops; | ||
99 | ei->ns = ns; | ||
100 | unlock_new_inode(inode); | ||
101 | } else { | ||
102 | ns_ops->put(ns); | ||
103 | } | ||
104 | |||
105 | d_set_d_op(dentry, &ns_dentry_operations); | ||
106 | result = d_instantiate_unique(dentry, inode); | ||
107 | if (result) { | ||
108 | dput(dentry); | ||
109 | dentry = result; | ||
110 | } | ||
111 | |||
112 | return dentry; | ||
113 | } | ||
114 | |||
115 | static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
116 | { | ||
117 | struct inode *inode = dentry->d_inode; | ||
118 | struct super_block *sb = inode->i_sb; | ||
119 | struct proc_inode *ei = PROC_I(inode); | ||
120 | struct task_struct *task; | ||
121 | struct dentry *ns_dentry; | ||
122 | void *error = ERR_PTR(-EACCES); | ||
123 | |||
124 | task = get_proc_task(inode); | ||
125 | if (!task) | ||
126 | goto out; | ||
127 | |||
128 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
129 | goto out_put_task; | ||
130 | |||
131 | ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops); | ||
132 | if (IS_ERR(ns_dentry)) { | ||
133 | error = ERR_CAST(ns_dentry); | ||
134 | goto out_put_task; | ||
135 | } | ||
136 | |||
137 | dput(nd->path.dentry); | ||
138 | nd->path.dentry = ns_dentry; | ||
139 | error = NULL; | ||
140 | |||
141 | out_put_task: | ||
142 | put_task_struct(task); | ||
143 | out: | ||
144 | return error; | ||
145 | } | ||
146 | |||
147 | static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen) | ||
148 | { | ||
149 | struct inode *inode = dentry->d_inode; | ||
150 | struct proc_inode *ei = PROC_I(inode); | ||
151 | const struct proc_ns_operations *ns_ops = ei->ns_ops; | ||
152 | struct task_struct *task; | ||
153 | void *ns; | ||
154 | char name[50]; | ||
155 | int len = -EACCES; | ||
156 | |||
157 | task = get_proc_task(inode); | ||
158 | if (!task) | ||
159 | goto out; | ||
160 | |||
161 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
162 | goto out_put_task; | ||
163 | |||
164 | len = -ENOENT; | ||
165 | ns = ns_ops->get(task); | ||
166 | if (!ns) | ||
167 | goto out_put_task; | ||
168 | |||
169 | snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); | ||
170 | len = strlen(name); | ||
171 | |||
172 | if (len > buflen) | ||
173 | len = buflen; | ||
174 | if (copy_to_user(buffer, name, len)) | ||
175 | len = -EFAULT; | ||
176 | |||
177 | ns_ops->put(ns); | ||
178 | out_put_task: | ||
179 | put_task_struct(task); | ||
180 | out: | ||
181 | return len; | ||
182 | } | ||
183 | |||
184 | static const struct inode_operations proc_ns_link_inode_operations = { | ||
185 | .readlink = proc_ns_readlink, | ||
186 | .follow_link = proc_ns_follow_link, | ||
187 | .setattr = proc_setattr, | ||
188 | }; | ||
189 | |||
190 | static struct dentry *proc_ns_instantiate(struct inode *dir, | 34 | static struct dentry *proc_ns_instantiate(struct inode *dir, |
191 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 35 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
192 | { | 36 | { |
@@ -194,23 +38,32 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, | |||
194 | struct inode *inode; | 38 | struct inode *inode; |
195 | struct proc_inode *ei; | 39 | struct proc_inode *ei; |
196 | struct dentry *error = ERR_PTR(-ENOENT); | 40 | struct dentry *error = ERR_PTR(-ENOENT); |
41 | void *ns; | ||
197 | 42 | ||
198 | inode = proc_pid_make_inode(dir->i_sb, task); | 43 | inode = proc_pid_make_inode(dir->i_sb, task); |
199 | if (!inode) | 44 | if (!inode) |
200 | goto out; | 45 | goto out; |
201 | 46 | ||
47 | ns = ns_ops->get(task); | ||
48 | if (!ns) | ||
49 | goto out_iput; | ||
50 | |||
202 | ei = PROC_I(inode); | 51 | ei = PROC_I(inode); |
203 | inode->i_mode = S_IFLNK|S_IRWXUGO; | 52 | inode->i_mode = S_IFREG|S_IRUSR; |
204 | inode->i_op = &proc_ns_link_inode_operations; | 53 | inode->i_fop = &ns_file_operations; |
205 | ei->ns_ops = ns_ops; | 54 | ei->ns_ops = ns_ops; |
55 | ei->ns = ns; | ||
206 | 56 | ||
207 | d_set_d_op(dentry, &pid_dentry_operations); | 57 | dentry->d_op = &pid_dentry_operations; |
208 | d_add(dentry, inode); | 58 | d_add(dentry, inode); |
209 | /* Close the race of the process dying before we return the dentry */ | 59 | /* Close the race of the process dying before we return the dentry */ |
210 | if (pid_revalidate(dentry, 0)) | 60 | if (pid_revalidate(dentry, NULL)) |
211 | error = NULL; | 61 | error = NULL; |
212 | out: | 62 | out: |
213 | return error; | 63 | return error; |
64 | out_iput: | ||
65 | iput(inode); | ||
66 | goto out; | ||
214 | } | 67 | } |
215 | 68 | ||
216 | static int proc_ns_fill_cache(struct file *filp, void *dirent, | 69 | static int proc_ns_fill_cache(struct file *filp, void *dirent, |
@@ -237,6 +90,10 @@ static int proc_ns_dir_readdir(struct file *filp, void *dirent, | |||
237 | if (!task) | 90 | if (!task) |
238 | goto out_no_task; | 91 | goto out_no_task; |
239 | 92 | ||
93 | ret = -EPERM; | ||
94 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
95 | goto out; | ||
96 | |||
240 | ret = 0; | 97 | ret = 0; |
241 | i = filp->f_pos; | 98 | i = filp->f_pos; |
242 | switch (i) { | 99 | switch (i) { |
@@ -284,7 +141,7 @@ const struct file_operations proc_ns_dir_operations = { | |||
284 | }; | 141 | }; |
285 | 142 | ||
286 | static struct dentry *proc_ns_dir_lookup(struct inode *dir, | 143 | static struct dentry *proc_ns_dir_lookup(struct inode *dir, |
287 | struct dentry *dentry, unsigned int flags) | 144 | struct dentry *dentry, struct nameidata *nd) |
288 | { | 145 | { |
289 | struct dentry *error; | 146 | struct dentry *error; |
290 | struct task_struct *task = get_proc_task(dir); | 147 | struct task_struct *task = get_proc_task(dir); |
@@ -296,14 +153,19 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir, | |||
296 | if (!task) | 153 | if (!task) |
297 | goto out_no_task; | 154 | goto out_no_task; |
298 | 155 | ||
299 | last = &ns_entries[ARRAY_SIZE(ns_entries)]; | 156 | error = ERR_PTR(-EPERM); |
300 | for (entry = ns_entries; entry < last; entry++) { | 157 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
158 | goto out; | ||
159 | |||
160 | last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; | ||
161 | for (entry = ns_entries; entry <= last; entry++) { | ||
301 | if (strlen((*entry)->name) != len) | 162 | if (strlen((*entry)->name) != len) |
302 | continue; | 163 | continue; |
303 | if (!memcmp(dentry->d_name.name, (*entry)->name, len)) | 164 | if (!memcmp(dentry->d_name.name, (*entry)->name, len)) |
304 | break; | 165 | break; |
305 | } | 166 | } |
306 | if (entry == last) | 167 | error = ERR_PTR(-ENOENT); |
168 | if (entry > last) | ||
307 | goto out; | 169 | goto out; |
308 | 170 | ||
309 | error = proc_ns_instantiate(dir, dentry, task, *entry); | 171 | error = proc_ns_instantiate(dir, dentry, task, *entry); |
@@ -337,7 +199,3 @@ out_invalid: | |||
337 | return ERR_PTR(-EINVAL); | 199 | return ERR_PTR(-EINVAL); |
338 | } | 200 | } |
339 | 201 | ||
340 | bool proc_ns_inode(struct inode *inode) | ||
341 | { | ||
342 | return inode->i_fop == &ns_file_operations; | ||
343 | } | ||
diff --git a/fs/proc/page.c b/fs/proc/page.c index b8730d9ebae..6d8e6a9e93a 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c | |||
@@ -115,14 +115,6 @@ u64 stable_page_flags(struct page *page) | |||
115 | u |= 1 << KPF_COMPOUND_TAIL; | 115 | u |= 1 << KPF_COMPOUND_TAIL; |
116 | if (PageHuge(page)) | 116 | if (PageHuge(page)) |
117 | u |= 1 << KPF_HUGE; | 117 | u |= 1 << KPF_HUGE; |
118 | /* | ||
119 | * PageTransCompound can be true for non-huge compound pages (slab | ||
120 | * pages or pages allocated by drivers with __GFP_COMP) because it | ||
121 | * just checks PG_head/PG_tail, so we need to check PageLRU to make | ||
122 | * sure a given page is a thp, not a non-huge compound page. | ||
123 | */ | ||
124 | else if (PageTransCompound(page) && PageLRU(compound_trans_head(page))) | ||
125 | u |= 1 << KPF_THP; | ||
126 | 118 | ||
127 | /* | 119 | /* |
128 | * Caveats on high order pages: page->_count will only be set | 120 | * Caveats on high order pages: page->_count will only be set |
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index de20ec480fa..927cbd115e5 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c | |||
@@ -101,11 +101,6 @@ void proc_device_tree_update_prop(struct proc_dir_entry *pde, | |||
101 | { | 101 | { |
102 | struct proc_dir_entry *ent; | 102 | struct proc_dir_entry *ent; |
103 | 103 | ||
104 | if (!oldprop) { | ||
105 | proc_device_tree_add_prop(pde, newprop); | ||
106 | return; | ||
107 | } | ||
108 | |||
109 | for (ent = pde->subdir; ent != NULL; ent = ent->next) | 104 | for (ent = pde->subdir; ent != NULL; ent = ent->next) |
110 | if (ent->data == oldprop) | 105 | if (ent->data == oldprop) |
111 | break; | 106 | break; |
@@ -195,7 +190,11 @@ void proc_device_tree_add_node(struct device_node *np, | |||
195 | set_node_proc_entry(np, de); | 190 | set_node_proc_entry(np, de); |
196 | for (child = NULL; (child = of_get_next_child(np, child));) { | 191 | for (child = NULL; (child = of_get_next_child(np, child));) { |
197 | /* Use everything after the last slash, or the full name */ | 192 | /* Use everything after the last slash, or the full name */ |
198 | p = kbasename(child->full_name); | 193 | p = strrchr(child->full_name, '/'); |
194 | if (!p) | ||
195 | p = child->full_name; | ||
196 | else | ||
197 | ++p; | ||
199 | 198 | ||
200 | if (duplicate_name(de, p)) | 199 | if (duplicate_name(de, p)) |
201 | p = fixup_name(np, de, p); | 200 | p = fixup_name(np, de, p); |
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index fe72cd073de..f738024ccc8 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c | |||
@@ -119,7 +119,7 @@ static struct net *get_proc_task_net(struct inode *dir) | |||
119 | } | 119 | } |
120 | 120 | ||
121 | static struct dentry *proc_tgid_net_lookup(struct inode *dir, | 121 | static struct dentry *proc_tgid_net_lookup(struct inode *dir, |
122 | struct dentry *dentry, unsigned int flags) | 122 | struct dentry *dentry, struct nameidata *nd) |
123 | { | 123 | { |
124 | struct dentry *de; | 124 | struct dentry *de; |
125 | struct net *net; | 125 | struct net *net; |
@@ -179,7 +179,7 @@ const struct file_operations proc_net_operations = { | |||
179 | 179 | ||
180 | 180 | ||
181 | struct proc_dir_entry *proc_net_fops_create(struct net *net, | 181 | struct proc_dir_entry *proc_net_fops_create(struct net *net, |
182 | const char *name, umode_t mode, const struct file_operations *fops) | 182 | const char *name, mode_t mode, const struct file_operations *fops) |
183 | { | 183 | { |
184 | return proc_create(name, mode, net->proc_net, fops); | 184 | return proc_create(name, mode, net->proc_net, fops); |
185 | } | 185 | } |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 1827d88ad58..1a77dbef226 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -3,13 +3,9 @@ | |||
3 | */ | 3 | */ |
4 | #include <linux/init.h> | 4 | #include <linux/init.h> |
5 | #include <linux/sysctl.h> | 5 | #include <linux/sysctl.h> |
6 | #include <linux/poll.h> | ||
7 | #include <linux/proc_fs.h> | 6 | #include <linux/proc_fs.h> |
8 | #include <linux/security.h> | 7 | #include <linux/security.h> |
9 | #include <linux/sched.h> | ||
10 | #include <linux/namei.h> | 8 | #include <linux/namei.h> |
11 | #include <linux/mm.h> | ||
12 | #include <linux/module.h> | ||
13 | #include "internal.h" | 9 | #include "internal.h" |
14 | 10 | ||
15 | static const struct dentry_operations proc_sys_dentry_operations; | 11 | static const struct dentry_operations proc_sys_dentry_operations; |
@@ -18,379 +14,6 @@ static const struct inode_operations proc_sys_inode_operations; | |||
18 | static const struct file_operations proc_sys_dir_file_operations; | 14 | static const struct file_operations proc_sys_dir_file_operations; |
19 | static const struct inode_operations proc_sys_dir_operations; | 15 | static const struct inode_operations proc_sys_dir_operations; |
20 | 16 | ||
21 | void proc_sys_poll_notify(struct ctl_table_poll *poll) | ||
22 | { | ||
23 | if (!poll) | ||
24 | return; | ||
25 | |||
26 | atomic_inc(&poll->event); | ||
27 | wake_up_interruptible(&poll->wait); | ||
28 | } | ||
29 | |||
30 | static struct ctl_table root_table[] = { | ||
31 | { | ||
32 | .procname = "", | ||
33 | .mode = S_IFDIR|S_IRUGO|S_IXUGO, | ||
34 | }, | ||
35 | { } | ||
36 | }; | ||
37 | static struct ctl_table_root sysctl_table_root = { | ||
38 | .default_set.dir.header = { | ||
39 | {{.count = 1, | ||
40 | .nreg = 1, | ||
41 | .ctl_table = root_table }}, | ||
42 | .ctl_table_arg = root_table, | ||
43 | .root = &sysctl_table_root, | ||
44 | .set = &sysctl_table_root.default_set, | ||
45 | }, | ||
46 | }; | ||
47 | |||
48 | static DEFINE_SPINLOCK(sysctl_lock); | ||
49 | |||
50 | static void drop_sysctl_table(struct ctl_table_header *header); | ||
51 | static int sysctl_follow_link(struct ctl_table_header **phead, | ||
52 | struct ctl_table **pentry, struct nsproxy *namespaces); | ||
53 | static int insert_links(struct ctl_table_header *head); | ||
54 | static void put_links(struct ctl_table_header *header); | ||
55 | |||
56 | static void sysctl_print_dir(struct ctl_dir *dir) | ||
57 | { | ||
58 | if (dir->header.parent) | ||
59 | sysctl_print_dir(dir->header.parent); | ||
60 | printk(KERN_CONT "%s/", dir->header.ctl_table[0].procname); | ||
61 | } | ||
62 | |||
63 | static int namecmp(const char *name1, int len1, const char *name2, int len2) | ||
64 | { | ||
65 | int minlen; | ||
66 | int cmp; | ||
67 | |||
68 | minlen = len1; | ||
69 | if (minlen > len2) | ||
70 | minlen = len2; | ||
71 | |||
72 | cmp = memcmp(name1, name2, minlen); | ||
73 | if (cmp == 0) | ||
74 | cmp = len1 - len2; | ||
75 | return cmp; | ||
76 | } | ||
77 | |||
78 | /* Called under sysctl_lock */ | ||
79 | static struct ctl_table *find_entry(struct ctl_table_header **phead, | ||
80 | struct ctl_dir *dir, const char *name, int namelen) | ||
81 | { | ||
82 | struct ctl_table_header *head; | ||
83 | struct ctl_table *entry; | ||
84 | struct rb_node *node = dir->root.rb_node; | ||
85 | |||
86 | while (node) | ||
87 | { | ||
88 | struct ctl_node *ctl_node; | ||
89 | const char *procname; | ||
90 | int cmp; | ||
91 | |||
92 | ctl_node = rb_entry(node, struct ctl_node, node); | ||
93 | head = ctl_node->header; | ||
94 | entry = &head->ctl_table[ctl_node - head->node]; | ||
95 | procname = entry->procname; | ||
96 | |||
97 | cmp = namecmp(name, namelen, procname, strlen(procname)); | ||
98 | if (cmp < 0) | ||
99 | node = node->rb_left; | ||
100 | else if (cmp > 0) | ||
101 | node = node->rb_right; | ||
102 | else { | ||
103 | *phead = head; | ||
104 | return entry; | ||
105 | } | ||
106 | } | ||
107 | return NULL; | ||
108 | } | ||
109 | |||
110 | static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) | ||
111 | { | ||
112 | struct rb_node *node = &head->node[entry - head->ctl_table].node; | ||
113 | struct rb_node **p = &head->parent->root.rb_node; | ||
114 | struct rb_node *parent = NULL; | ||
115 | const char *name = entry->procname; | ||
116 | int namelen = strlen(name); | ||
117 | |||
118 | while (*p) { | ||
119 | struct ctl_table_header *parent_head; | ||
120 | struct ctl_table *parent_entry; | ||
121 | struct ctl_node *parent_node; | ||
122 | const char *parent_name; | ||
123 | int cmp; | ||
124 | |||
125 | parent = *p; | ||
126 | parent_node = rb_entry(parent, struct ctl_node, node); | ||
127 | parent_head = parent_node->header; | ||
128 | parent_entry = &parent_head->ctl_table[parent_node - parent_head->node]; | ||
129 | parent_name = parent_entry->procname; | ||
130 | |||
131 | cmp = namecmp(name, namelen, parent_name, strlen(parent_name)); | ||
132 | if (cmp < 0) | ||
133 | p = &(*p)->rb_left; | ||
134 | else if (cmp > 0) | ||
135 | p = &(*p)->rb_right; | ||
136 | else { | ||
137 | printk(KERN_ERR "sysctl duplicate entry: "); | ||
138 | sysctl_print_dir(head->parent); | ||
139 | printk(KERN_CONT "/%s\n", entry->procname); | ||
140 | return -EEXIST; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | rb_link_node(node, parent, p); | ||
145 | rb_insert_color(node, &head->parent->root); | ||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry) | ||
150 | { | ||
151 | struct rb_node *node = &head->node[entry - head->ctl_table].node; | ||
152 | |||
153 | rb_erase(node, &head->parent->root); | ||
154 | } | ||
155 | |||
156 | static void init_header(struct ctl_table_header *head, | ||
157 | struct ctl_table_root *root, struct ctl_table_set *set, | ||
158 | struct ctl_node *node, struct ctl_table *table) | ||
159 | { | ||
160 | head->ctl_table = table; | ||
161 | head->ctl_table_arg = table; | ||
162 | head->used = 0; | ||
163 | head->count = 1; | ||
164 | head->nreg = 1; | ||
165 | head->unregistering = NULL; | ||
166 | head->root = root; | ||
167 | head->set = set; | ||
168 | head->parent = NULL; | ||
169 | head->node = node; | ||
170 | if (node) { | ||
171 | struct ctl_table *entry; | ||
172 | for (entry = table; entry->procname; entry++, node++) | ||
173 | node->header = head; | ||
174 | } | ||
175 | } | ||
176 | |||
177 | static void erase_header(struct ctl_table_header *head) | ||
178 | { | ||
179 | struct ctl_table *entry; | ||
180 | for (entry = head->ctl_table; entry->procname; entry++) | ||
181 | erase_entry(head, entry); | ||
182 | } | ||
183 | |||
184 | static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) | ||
185 | { | ||
186 | struct ctl_table *entry; | ||
187 | int err; | ||
188 | |||
189 | dir->header.nreg++; | ||
190 | header->parent = dir; | ||
191 | err = insert_links(header); | ||
192 | if (err) | ||
193 | goto fail_links; | ||
194 | for (entry = header->ctl_table; entry->procname; entry++) { | ||
195 | err = insert_entry(header, entry); | ||
196 | if (err) | ||
197 | goto fail; | ||
198 | } | ||
199 | return 0; | ||
200 | fail: | ||
201 | erase_header(header); | ||
202 | put_links(header); | ||
203 | fail_links: | ||
204 | header->parent = NULL; | ||
205 | drop_sysctl_table(&dir->header); | ||
206 | return err; | ||
207 | } | ||
208 | |||
209 | /* called under sysctl_lock */ | ||
210 | static int use_table(struct ctl_table_header *p) | ||
211 | { | ||
212 | if (unlikely(p->unregistering)) | ||
213 | return 0; | ||
214 | p->used++; | ||
215 | return 1; | ||
216 | } | ||
217 | |||
218 | /* called under sysctl_lock */ | ||
219 | static void unuse_table(struct ctl_table_header *p) | ||
220 | { | ||
221 | if (!--p->used) | ||
222 | if (unlikely(p->unregistering)) | ||
223 | complete(p->unregistering); | ||
224 | } | ||
225 | |||
226 | /* called under sysctl_lock, will reacquire if has to wait */ | ||
227 | static void start_unregistering(struct ctl_table_header *p) | ||
228 | { | ||
229 | /* | ||
230 | * if p->used is 0, nobody will ever touch that entry again; | ||
231 | * we'll eliminate all paths to it before dropping sysctl_lock | ||
232 | */ | ||
233 | if (unlikely(p->used)) { | ||
234 | struct completion wait; | ||
235 | init_completion(&wait); | ||
236 | p->unregistering = &wait; | ||
237 | spin_unlock(&sysctl_lock); | ||
238 | wait_for_completion(&wait); | ||
239 | spin_lock(&sysctl_lock); | ||
240 | } else { | ||
241 | /* anything non-NULL; we'll never dereference it */ | ||
242 | p->unregistering = ERR_PTR(-EINVAL); | ||
243 | } | ||
244 | /* | ||
245 | * do not remove from the list until nobody holds it; walking the | ||
246 | * list in do_sysctl() relies on that. | ||
247 | */ | ||
248 | erase_header(p); | ||
249 | } | ||
250 | |||
251 | static void sysctl_head_get(struct ctl_table_header *head) | ||
252 | { | ||
253 | spin_lock(&sysctl_lock); | ||
254 | head->count++; | ||
255 | spin_unlock(&sysctl_lock); | ||
256 | } | ||
257 | |||
258 | void sysctl_head_put(struct ctl_table_header *head) | ||
259 | { | ||
260 | spin_lock(&sysctl_lock); | ||
261 | if (!--head->count) | ||
262 | kfree_rcu(head, rcu); | ||
263 | spin_unlock(&sysctl_lock); | ||
264 | } | ||
265 | |||
266 | static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) | ||
267 | { | ||
268 | BUG_ON(!head); | ||
269 | spin_lock(&sysctl_lock); | ||
270 | if (!use_table(head)) | ||
271 | head = ERR_PTR(-ENOENT); | ||
272 | spin_unlock(&sysctl_lock); | ||
273 | return head; | ||
274 | } | ||
275 | |||
276 | static void sysctl_head_finish(struct ctl_table_header *head) | ||
277 | { | ||
278 | if (!head) | ||
279 | return; | ||
280 | spin_lock(&sysctl_lock); | ||
281 | unuse_table(head); | ||
282 | spin_unlock(&sysctl_lock); | ||
283 | } | ||
284 | |||
285 | static struct ctl_table_set * | ||
286 | lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) | ||
287 | { | ||
288 | struct ctl_table_set *set = &root->default_set; | ||
289 | if (root->lookup) | ||
290 | set = root->lookup(root, namespaces); | ||
291 | return set; | ||
292 | } | ||
293 | |||
294 | static struct ctl_table *lookup_entry(struct ctl_table_header **phead, | ||
295 | struct ctl_dir *dir, | ||
296 | const char *name, int namelen) | ||
297 | { | ||
298 | struct ctl_table_header *head; | ||
299 | struct ctl_table *entry; | ||
300 | |||
301 | spin_lock(&sysctl_lock); | ||
302 | entry = find_entry(&head, dir, name, namelen); | ||
303 | if (entry && use_table(head)) | ||
304 | *phead = head; | ||
305 | else | ||
306 | entry = NULL; | ||
307 | spin_unlock(&sysctl_lock); | ||
308 | return entry; | ||
309 | } | ||
310 | |||
311 | static struct ctl_node *first_usable_entry(struct rb_node *node) | ||
312 | { | ||
313 | struct ctl_node *ctl_node; | ||
314 | |||
315 | for (;node; node = rb_next(node)) { | ||
316 | ctl_node = rb_entry(node, struct ctl_node, node); | ||
317 | if (use_table(ctl_node->header)) | ||
318 | return ctl_node; | ||
319 | } | ||
320 | return NULL; | ||
321 | } | ||
322 | |||
323 | static void first_entry(struct ctl_dir *dir, | ||
324 | struct ctl_table_header **phead, struct ctl_table **pentry) | ||
325 | { | ||
326 | struct ctl_table_header *head = NULL; | ||
327 | struct ctl_table *entry = NULL; | ||
328 | struct ctl_node *ctl_node; | ||
329 | |||
330 | spin_lock(&sysctl_lock); | ||
331 | ctl_node = first_usable_entry(rb_first(&dir->root)); | ||
332 | spin_unlock(&sysctl_lock); | ||
333 | if (ctl_node) { | ||
334 | head = ctl_node->header; | ||
335 | entry = &head->ctl_table[ctl_node - head->node]; | ||
336 | } | ||
337 | *phead = head; | ||
338 | *pentry = entry; | ||
339 | } | ||
340 | |||
341 | static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry) | ||
342 | { | ||
343 | struct ctl_table_header *head = *phead; | ||
344 | struct ctl_table *entry = *pentry; | ||
345 | struct ctl_node *ctl_node = &head->node[entry - head->ctl_table]; | ||
346 | |||
347 | spin_lock(&sysctl_lock); | ||
348 | unuse_table(head); | ||
349 | |||
350 | ctl_node = first_usable_entry(rb_next(&ctl_node->node)); | ||
351 | spin_unlock(&sysctl_lock); | ||
352 | head = NULL; | ||
353 | if (ctl_node) { | ||
354 | head = ctl_node->header; | ||
355 | entry = &head->ctl_table[ctl_node - head->node]; | ||
356 | } | ||
357 | *phead = head; | ||
358 | *pentry = entry; | ||
359 | } | ||
360 | |||
361 | void register_sysctl_root(struct ctl_table_root *root) | ||
362 | { | ||
363 | } | ||
364 | |||
365 | /* | ||
366 | * sysctl_perm does NOT grant the superuser all rights automatically, because | ||
367 | * some sysctl variables are readonly even to root. | ||
368 | */ | ||
369 | |||
370 | static int test_perm(int mode, int op) | ||
371 | { | ||
372 | if (uid_eq(current_euid(), GLOBAL_ROOT_UID)) | ||
373 | mode >>= 6; | ||
374 | else if (in_egroup_p(GLOBAL_ROOT_GID)) | ||
375 | mode >>= 3; | ||
376 | if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) | ||
377 | return 0; | ||
378 | return -EACCES; | ||
379 | } | ||
380 | |||
381 | static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, int op) | ||
382 | { | ||
383 | struct ctl_table_root *root = head->root; | ||
384 | int mode; | ||
385 | |||
386 | if (root->permissions) | ||
387 | mode = root->permissions(head, table); | ||
388 | else | ||
389 | mode = table->mode; | ||
390 | |||
391 | return test_perm(mode, op); | ||
392 | } | ||
393 | |||
394 | static struct inode *proc_sys_make_inode(struct super_block *sb, | 17 | static struct inode *proc_sys_make_inode(struct super_block *sb, |
395 | struct ctl_table_header *head, struct ctl_table *table) | 18 | struct ctl_table_header *head, struct ctl_table *table) |
396 | { | 19 | { |
@@ -410,12 +33,13 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, | |||
410 | 33 | ||
411 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 34 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
412 | inode->i_mode = table->mode; | 35 | inode->i_mode = table->mode; |
413 | if (!S_ISDIR(table->mode)) { | 36 | if (!table->child) { |
414 | inode->i_mode |= S_IFREG; | 37 | inode->i_mode |= S_IFREG; |
415 | inode->i_op = &proc_sys_inode_operations; | 38 | inode->i_op = &proc_sys_inode_operations; |
416 | inode->i_fop = &proc_sys_file_operations; | 39 | inode->i_fop = &proc_sys_file_operations; |
417 | } else { | 40 | } else { |
418 | inode->i_mode |= S_IFDIR; | 41 | inode->i_mode |= S_IFDIR; |
42 | inode->i_nlink = 0; | ||
419 | inode->i_op = &proc_sys_dir_operations; | 43 | inode->i_op = &proc_sys_dir_operations; |
420 | inode->i_fop = &proc_sys_dir_file_operations; | 44 | inode->i_fop = &proc_sys_dir_file_operations; |
421 | } | 45 | } |
@@ -423,44 +47,75 @@ out: | |||
423 | return inode; | 47 | return inode; |
424 | } | 48 | } |
425 | 49 | ||
50 | static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) | ||
51 | { | ||
52 | int len; | ||
53 | for ( ; p->procname; p++) { | ||
54 | |||
55 | if (!p->procname) | ||
56 | continue; | ||
57 | |||
58 | len = strlen(p->procname); | ||
59 | if (len != name->len) | ||
60 | continue; | ||
61 | |||
62 | if (memcmp(p->procname, name->name, len) != 0) | ||
63 | continue; | ||
64 | |||
65 | /* I have a match */ | ||
66 | return p; | ||
67 | } | ||
68 | return NULL; | ||
69 | } | ||
70 | |||
426 | static struct ctl_table_header *grab_header(struct inode *inode) | 71 | static struct ctl_table_header *grab_header(struct inode *inode) |
427 | { | 72 | { |
428 | struct ctl_table_header *head = PROC_I(inode)->sysctl; | 73 | if (PROC_I(inode)->sysctl) |
429 | if (!head) | 74 | return sysctl_head_grab(PROC_I(inode)->sysctl); |
430 | head = &sysctl_table_root.default_set.dir.header; | 75 | else |
431 | return sysctl_head_grab(head); | 76 | return sysctl_head_next(NULL); |
432 | } | 77 | } |
433 | 78 | ||
434 | static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, | 79 | static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, |
435 | unsigned int flags) | 80 | struct nameidata *nd) |
436 | { | 81 | { |
437 | struct ctl_table_header *head = grab_header(dir); | 82 | struct ctl_table_header *head = grab_header(dir); |
83 | struct ctl_table *table = PROC_I(dir)->sysctl_entry; | ||
438 | struct ctl_table_header *h = NULL; | 84 | struct ctl_table_header *h = NULL; |
439 | struct qstr *name = &dentry->d_name; | 85 | struct qstr *name = &dentry->d_name; |
440 | struct ctl_table *p; | 86 | struct ctl_table *p; |
441 | struct inode *inode; | 87 | struct inode *inode; |
442 | struct dentry *err = ERR_PTR(-ENOENT); | 88 | struct dentry *err = ERR_PTR(-ENOENT); |
443 | struct ctl_dir *ctl_dir; | ||
444 | int ret; | ||
445 | 89 | ||
446 | if (IS_ERR(head)) | 90 | if (IS_ERR(head)) |
447 | return ERR_CAST(head); | 91 | return ERR_CAST(head); |
448 | 92 | ||
449 | ctl_dir = container_of(head, struct ctl_dir, header); | 93 | if (table && !table->child) { |
450 | 94 | WARN_ON(1); | |
451 | p = lookup_entry(&h, ctl_dir, name->name, name->len); | ||
452 | if (!p) | ||
453 | goto out; | 95 | goto out; |
96 | } | ||
454 | 97 | ||
455 | if (S_ISLNK(p->mode)) { | 98 | table = table ? table->child : head->ctl_table; |
456 | ret = sysctl_follow_link(&h, &p, current->nsproxy); | 99 | |
457 | err = ERR_PTR(ret); | 100 | p = find_in_table(table, name); |
458 | if (ret) | 101 | if (!p) { |
459 | goto out; | 102 | for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { |
103 | if (h->attached_to != table) | ||
104 | continue; | ||
105 | p = find_in_table(h->attached_by, name); | ||
106 | if (p) | ||
107 | break; | ||
108 | } | ||
460 | } | 109 | } |
461 | 110 | ||
111 | if (!p) | ||
112 | goto out; | ||
113 | |||
462 | err = ERR_PTR(-ENOMEM); | 114 | err = ERR_PTR(-ENOMEM); |
463 | inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); | 115 | inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); |
116 | if (h) | ||
117 | sysctl_head_finish(h); | ||
118 | |||
464 | if (!inode) | 119 | if (!inode) |
465 | goto out; | 120 | goto out; |
466 | 121 | ||
@@ -469,8 +124,6 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, | |||
469 | d_add(dentry, inode); | 124 | d_add(dentry, inode); |
470 | 125 | ||
471 | out: | 126 | out: |
472 | if (h) | ||
473 | sysctl_head_finish(h); | ||
474 | sysctl_head_finish(head); | 127 | sysctl_head_finish(head); |
475 | return err; | 128 | return err; |
476 | } | 129 | } |
@@ -492,7 +145,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, | |||
492 | * and won't be until we finish. | 145 | * and won't be until we finish. |
493 | */ | 146 | */ |
494 | error = -EPERM; | 147 | error = -EPERM; |
495 | if (sysctl_perm(head, table, write ? MAY_WRITE : MAY_READ)) | 148 | if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) |
496 | goto out; | 149 | goto out; |
497 | 150 | ||
498 | /* if that can happen at all, it should be -EINVAL, not -EISDIR */ | 151 | /* if that can happen at all, it should be -EINVAL, not -EISDIR */ |
@@ -523,54 +176,6 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf, | |||
523 | return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1); | 176 | return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1); |
524 | } | 177 | } |
525 | 178 | ||
526 | static int proc_sys_open(struct inode *inode, struct file *filp) | ||
527 | { | ||
528 | struct ctl_table_header *head = grab_header(inode); | ||
529 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; | ||
530 | |||
531 | /* sysctl was unregistered */ | ||
532 | if (IS_ERR(head)) | ||
533 | return PTR_ERR(head); | ||
534 | |||
535 | if (table->poll) | ||
536 | filp->private_data = proc_sys_poll_event(table->poll); | ||
537 | |||
538 | sysctl_head_finish(head); | ||
539 | |||
540 | return 0; | ||
541 | } | ||
542 | |||
543 | static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) | ||
544 | { | ||
545 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
546 | struct ctl_table_header *head = grab_header(inode); | ||
547 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; | ||
548 | unsigned int ret = DEFAULT_POLLMASK; | ||
549 | unsigned long event; | ||
550 | |||
551 | /* sysctl was unregistered */ | ||
552 | if (IS_ERR(head)) | ||
553 | return POLLERR | POLLHUP; | ||
554 | |||
555 | if (!table->proc_handler) | ||
556 | goto out; | ||
557 | |||
558 | if (!table->poll) | ||
559 | goto out; | ||
560 | |||
561 | event = (unsigned long)filp->private_data; | ||
562 | poll_wait(filp, &table->poll->wait, wait); | ||
563 | |||
564 | if (event != atomic_read(&table->poll->event)) { | ||
565 | filp->private_data = proc_sys_poll_event(table->poll); | ||
566 | ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI; | ||
567 | } | ||
568 | |||
569 | out: | ||
570 | sysctl_head_finish(head); | ||
571 | |||
572 | return ret; | ||
573 | } | ||
574 | 179 | ||
575 | static int proc_sys_fill_cache(struct file *filp, void *dirent, | 180 | static int proc_sys_fill_cache(struct file *filp, void *dirent, |
576 | filldir_t filldir, | 181 | filldir_t filldir, |
@@ -610,45 +215,28 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent, | |||
610 | return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); | 215 | return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); |
611 | } | 216 | } |
612 | 217 | ||
613 | static int proc_sys_link_fill_cache(struct file *filp, void *dirent, | ||
614 | filldir_t filldir, | ||
615 | struct ctl_table_header *head, | ||
616 | struct ctl_table *table) | ||
617 | { | ||
618 | int err, ret = 0; | ||
619 | head = sysctl_head_grab(head); | ||
620 | |||
621 | if (S_ISLNK(table->mode)) { | ||
622 | /* It is not an error if we can not follow the link ignore it */ | ||
623 | err = sysctl_follow_link(&head, &table, current->nsproxy); | ||
624 | if (err) | ||
625 | goto out; | ||
626 | } | ||
627 | |||
628 | ret = proc_sys_fill_cache(filp, dirent, filldir, head, table); | ||
629 | out: | ||
630 | sysctl_head_finish(head); | ||
631 | return ret; | ||
632 | } | ||
633 | |||
634 | static int scan(struct ctl_table_header *head, ctl_table *table, | 218 | static int scan(struct ctl_table_header *head, ctl_table *table, |
635 | unsigned long *pos, struct file *file, | 219 | unsigned long *pos, struct file *file, |
636 | void *dirent, filldir_t filldir) | 220 | void *dirent, filldir_t filldir) |
637 | { | 221 | { |
638 | int res; | ||
639 | 222 | ||
640 | if ((*pos)++ < file->f_pos) | 223 | for (; table->procname; table++, (*pos)++) { |
641 | return 0; | 224 | int res; |
642 | 225 | ||
643 | if (unlikely(S_ISLNK(table->mode))) | 226 | /* Can't do anything without a proc name */ |
644 | res = proc_sys_link_fill_cache(file, dirent, filldir, head, table); | 227 | if (!table->procname) |
645 | else | 228 | continue; |
646 | res = proc_sys_fill_cache(file, dirent, filldir, head, table); | ||
647 | 229 | ||
648 | if (res == 0) | 230 | if (*pos < file->f_pos) |
649 | file->f_pos = *pos; | 231 | continue; |
232 | |||
233 | res = proc_sys_fill_cache(file, dirent, filldir, head, table); | ||
234 | if (res) | ||
235 | return res; | ||
650 | 236 | ||
651 | return res; | 237 | file->f_pos = *pos + 1; |
238 | } | ||
239 | return 0; | ||
652 | } | 240 | } |
653 | 241 | ||
654 | static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | 242 | static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) |
@@ -656,16 +244,20 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
656 | struct dentry *dentry = filp->f_path.dentry; | 244 | struct dentry *dentry = filp->f_path.dentry; |
657 | struct inode *inode = dentry->d_inode; | 245 | struct inode *inode = dentry->d_inode; |
658 | struct ctl_table_header *head = grab_header(inode); | 246 | struct ctl_table_header *head = grab_header(inode); |
247 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; | ||
659 | struct ctl_table_header *h = NULL; | 248 | struct ctl_table_header *h = NULL; |
660 | struct ctl_table *entry; | ||
661 | struct ctl_dir *ctl_dir; | ||
662 | unsigned long pos; | 249 | unsigned long pos; |
663 | int ret = -EINVAL; | 250 | int ret = -EINVAL; |
664 | 251 | ||
665 | if (IS_ERR(head)) | 252 | if (IS_ERR(head)) |
666 | return PTR_ERR(head); | 253 | return PTR_ERR(head); |
667 | 254 | ||
668 | ctl_dir = container_of(head, struct ctl_dir, header); | 255 | if (table && !table->child) { |
256 | WARN_ON(1); | ||
257 | goto out; | ||
258 | } | ||
259 | |||
260 | table = table ? table->child : head->ctl_table; | ||
669 | 261 | ||
670 | ret = 0; | 262 | ret = 0; |
671 | /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ | 263 | /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ |
@@ -683,8 +275,14 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
683 | } | 275 | } |
684 | pos = 2; | 276 | pos = 2; |
685 | 277 | ||
686 | for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { | 278 | ret = scan(head, table, &pos, filp, dirent, filldir); |
687 | ret = scan(h, entry, &pos, filp, dirent, filldir); | 279 | if (ret) |
280 | goto out; | ||
281 | |||
282 | for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { | ||
283 | if (h->attached_to != table) | ||
284 | continue; | ||
285 | ret = scan(h, h->attached_by, &pos, filp, dirent, filldir); | ||
688 | if (ret) { | 286 | if (ret) { |
689 | sysctl_head_finish(h); | 287 | sysctl_head_finish(h); |
690 | break; | 288 | break; |
@@ -718,7 +316,7 @@ static int proc_sys_permission(struct inode *inode, int mask) | |||
718 | if (!table) /* global root - r-xr-xr-x */ | 316 | if (!table) /* global root - r-xr-xr-x */ |
719 | error = mask & MAY_WRITE ? -EACCES : 0; | 317 | error = mask & MAY_WRITE ? -EACCES : 0; |
720 | else /* Use the permissions on the sysctl table entry */ | 318 | else /* Use the permissions on the sysctl table entry */ |
721 | error = sysctl_perm(head, table, mask & ~MAY_NOT_BLOCK); | 319 | error = sysctl_perm(head->root, table, mask & ~MAY_NOT_BLOCK); |
722 | 320 | ||
723 | sysctl_head_finish(head); | 321 | sysctl_head_finish(head); |
724 | return error; | 322 | return error; |
@@ -736,6 +334,13 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) | |||
736 | if (error) | 334 | if (error) |
737 | return error; | 335 | return error; |
738 | 336 | ||
337 | if ((attr->ia_valid & ATTR_SIZE) && | ||
338 | attr->ia_size != i_size_read(inode)) { | ||
339 | error = vmtruncate(inode, attr->ia_size); | ||
340 | if (error) | ||
341 | return error; | ||
342 | } | ||
343 | |||
739 | setattr_copy(inode, attr); | 344 | setattr_copy(inode, attr); |
740 | mark_inode_dirty(inode); | 345 | mark_inode_dirty(inode); |
741 | return 0; | 346 | return 0; |
@@ -759,15 +364,12 @@ static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct | |||
759 | } | 364 | } |
760 | 365 | ||
761 | static const struct file_operations proc_sys_file_operations = { | 366 | static const struct file_operations proc_sys_file_operations = { |
762 | .open = proc_sys_open, | ||
763 | .poll = proc_sys_poll, | ||
764 | .read = proc_sys_read, | 367 | .read = proc_sys_read, |
765 | .write = proc_sys_write, | 368 | .write = proc_sys_write, |
766 | .llseek = default_llseek, | 369 | .llseek = default_llseek, |
767 | }; | 370 | }; |
768 | 371 | ||
769 | static const struct file_operations proc_sys_dir_file_operations = { | 372 | static const struct file_operations proc_sys_dir_file_operations = { |
770 | .read = generic_read_dir, | ||
771 | .readdir = proc_sys_readdir, | 373 | .readdir = proc_sys_readdir, |
772 | .llseek = generic_file_llseek, | 374 | .llseek = generic_file_llseek, |
773 | }; | 375 | }; |
@@ -785,9 +387,9 @@ static const struct inode_operations proc_sys_dir_operations = { | |||
785 | .getattr = proc_sys_getattr, | 387 | .getattr = proc_sys_getattr, |
786 | }; | 388 | }; |
787 | 389 | ||
788 | static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags) | 390 | static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) |
789 | { | 391 | { |
790 | if (flags & LOOKUP_RCU) | 392 | if (nd->flags & LOOKUP_RCU) |
791 | return -ECHILD; | 393 | return -ECHILD; |
792 | return !PROC_I(dentry->d_inode)->sysctl->unregistering; | 394 | return !PROC_I(dentry->d_inode)->sysctl->unregistering; |
793 | } | 395 | } |
@@ -797,21 +399,6 @@ static int proc_sys_delete(const struct dentry *dentry) | |||
797 | return !!PROC_I(dentry->d_inode)->sysctl->unregistering; | 399 | return !!PROC_I(dentry->d_inode)->sysctl->unregistering; |
798 | } | 400 | } |
799 | 401 | ||
800 | static int sysctl_is_seen(struct ctl_table_header *p) | ||
801 | { | ||
802 | struct ctl_table_set *set = p->set; | ||
803 | int res; | ||
804 | spin_lock(&sysctl_lock); | ||
805 | if (p->unregistering) | ||
806 | res = 0; | ||
807 | else if (!set->is_seen) | ||
808 | res = 1; | ||
809 | else | ||
810 | res = set->is_seen(set); | ||
811 | spin_unlock(&sysctl_lock); | ||
812 | return res; | ||
813 | } | ||
814 | |||
815 | static int proc_sys_compare(const struct dentry *parent, | 402 | static int proc_sys_compare(const struct dentry *parent, |
816 | const struct inode *pinode, | 403 | const struct inode *pinode, |
817 | const struct dentry *dentry, const struct inode *inode, | 404 | const struct dentry *dentry, const struct inode *inode, |
@@ -837,753 +424,6 @@ static const struct dentry_operations proc_sys_dentry_operations = { | |||
837 | .d_compare = proc_sys_compare, | 424 | .d_compare = proc_sys_compare, |
838 | }; | 425 | }; |
839 | 426 | ||
840 | static struct ctl_dir *find_subdir(struct ctl_dir *dir, | ||
841 | const char *name, int namelen) | ||
842 | { | ||
843 | struct ctl_table_header *head; | ||
844 | struct ctl_table *entry; | ||
845 | |||
846 | entry = find_entry(&head, dir, name, namelen); | ||
847 | if (!entry) | ||
848 | return ERR_PTR(-ENOENT); | ||
849 | if (!S_ISDIR(entry->mode)) | ||
850 | return ERR_PTR(-ENOTDIR); | ||
851 | return container_of(head, struct ctl_dir, header); | ||
852 | } | ||
853 | |||
854 | static struct ctl_dir *new_dir(struct ctl_table_set *set, | ||
855 | const char *name, int namelen) | ||
856 | { | ||
857 | struct ctl_table *table; | ||
858 | struct ctl_dir *new; | ||
859 | struct ctl_node *node; | ||
860 | char *new_name; | ||
861 | |||
862 | new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) + | ||
863 | sizeof(struct ctl_table)*2 + namelen + 1, | ||
864 | GFP_KERNEL); | ||
865 | if (!new) | ||
866 | return NULL; | ||
867 | |||
868 | node = (struct ctl_node *)(new + 1); | ||
869 | table = (struct ctl_table *)(node + 1); | ||
870 | new_name = (char *)(table + 2); | ||
871 | memcpy(new_name, name, namelen); | ||
872 | new_name[namelen] = '\0'; | ||
873 | table[0].procname = new_name; | ||
874 | table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; | ||
875 | init_header(&new->header, set->dir.header.root, set, node, table); | ||
876 | |||
877 | return new; | ||
878 | } | ||
879 | |||
880 | /** | ||
881 | * get_subdir - find or create a subdir with the specified name. | ||
882 | * @dir: Directory to create the subdirectory in | ||
883 | * @name: The name of the subdirectory to find or create | ||
884 | * @namelen: The length of name | ||
885 | * | ||
886 | * Takes a directory with an elevated reference count so we know that | ||
887 | * if we drop the lock the directory will not go away. Upon success | ||
888 | * the reference is moved from @dir to the returned subdirectory. | ||
889 | * Upon error an error code is returned and the reference on @dir is | ||
890 | * simply dropped. | ||
891 | */ | ||
892 | static struct ctl_dir *get_subdir(struct ctl_dir *dir, | ||
893 | const char *name, int namelen) | ||
894 | { | ||
895 | struct ctl_table_set *set = dir->header.set; | ||
896 | struct ctl_dir *subdir, *new = NULL; | ||
897 | int err; | ||
898 | |||
899 | spin_lock(&sysctl_lock); | ||
900 | subdir = find_subdir(dir, name, namelen); | ||
901 | if (!IS_ERR(subdir)) | ||
902 | goto found; | ||
903 | if (PTR_ERR(subdir) != -ENOENT) | ||
904 | goto failed; | ||
905 | |||
906 | spin_unlock(&sysctl_lock); | ||
907 | new = new_dir(set, name, namelen); | ||
908 | spin_lock(&sysctl_lock); | ||
909 | subdir = ERR_PTR(-ENOMEM); | ||
910 | if (!new) | ||
911 | goto failed; | ||
912 | |||
913 | /* Was the subdir added while we dropped the lock? */ | ||
914 | subdir = find_subdir(dir, name, namelen); | ||
915 | if (!IS_ERR(subdir)) | ||
916 | goto found; | ||
917 | if (PTR_ERR(subdir) != -ENOENT) | ||
918 | goto failed; | ||
919 | |||
920 | /* Nope. Use the our freshly made directory entry. */ | ||
921 | err = insert_header(dir, &new->header); | ||
922 | subdir = ERR_PTR(err); | ||
923 | if (err) | ||
924 | goto failed; | ||
925 | subdir = new; | ||
926 | found: | ||
927 | subdir->header.nreg++; | ||
928 | failed: | ||
929 | if (unlikely(IS_ERR(subdir))) { | ||
930 | printk(KERN_ERR "sysctl could not get directory: "); | ||
931 | sysctl_print_dir(dir); | ||
932 | printk(KERN_CONT "/%*.*s %ld\n", | ||
933 | namelen, namelen, name, PTR_ERR(subdir)); | ||
934 | } | ||
935 | drop_sysctl_table(&dir->header); | ||
936 | if (new) | ||
937 | drop_sysctl_table(&new->header); | ||
938 | spin_unlock(&sysctl_lock); | ||
939 | return subdir; | ||
940 | } | ||
941 | |||
942 | static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir) | ||
943 | { | ||
944 | struct ctl_dir *parent; | ||
945 | const char *procname; | ||
946 | if (!dir->header.parent) | ||
947 | return &set->dir; | ||
948 | parent = xlate_dir(set, dir->header.parent); | ||
949 | if (IS_ERR(parent)) | ||
950 | return parent; | ||
951 | procname = dir->header.ctl_table[0].procname; | ||
952 | return find_subdir(parent, procname, strlen(procname)); | ||
953 | } | ||
954 | |||
955 | static int sysctl_follow_link(struct ctl_table_header **phead, | ||
956 | struct ctl_table **pentry, struct nsproxy *namespaces) | ||
957 | { | ||
958 | struct ctl_table_header *head; | ||
959 | struct ctl_table_root *root; | ||
960 | struct ctl_table_set *set; | ||
961 | struct ctl_table *entry; | ||
962 | struct ctl_dir *dir; | ||
963 | int ret; | ||
964 | |||
965 | ret = 0; | ||
966 | spin_lock(&sysctl_lock); | ||
967 | root = (*pentry)->data; | ||
968 | set = lookup_header_set(root, namespaces); | ||
969 | dir = xlate_dir(set, (*phead)->parent); | ||
970 | if (IS_ERR(dir)) | ||
971 | ret = PTR_ERR(dir); | ||
972 | else { | ||
973 | const char *procname = (*pentry)->procname; | ||
974 | head = NULL; | ||
975 | entry = find_entry(&head, dir, procname, strlen(procname)); | ||
976 | ret = -ENOENT; | ||
977 | if (entry && use_table(head)) { | ||
978 | unuse_table(*phead); | ||
979 | *phead = head; | ||
980 | *pentry = entry; | ||
981 | ret = 0; | ||
982 | } | ||
983 | } | ||
984 | |||
985 | spin_unlock(&sysctl_lock); | ||
986 | return ret; | ||
987 | } | ||
988 | |||
989 | static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) | ||
990 | { | ||
991 | struct va_format vaf; | ||
992 | va_list args; | ||
993 | |||
994 | va_start(args, fmt); | ||
995 | vaf.fmt = fmt; | ||
996 | vaf.va = &args; | ||
997 | |||
998 | printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n", | ||
999 | path, table->procname, &vaf); | ||
1000 | |||
1001 | va_end(args); | ||
1002 | return -EINVAL; | ||
1003 | } | ||
1004 | |||
1005 | static int sysctl_check_table(const char *path, struct ctl_table *table) | ||
1006 | { | ||
1007 | int err = 0; | ||
1008 | for (; table->procname; table++) { | ||
1009 | if (table->child) | ||
1010 | err = sysctl_err(path, table, "Not a file"); | ||
1011 | |||
1012 | if ((table->proc_handler == proc_dostring) || | ||
1013 | (table->proc_handler == proc_dointvec) || | ||
1014 | (table->proc_handler == proc_dointvec_minmax) || | ||
1015 | (table->proc_handler == proc_dointvec_jiffies) || | ||
1016 | (table->proc_handler == proc_dointvec_userhz_jiffies) || | ||
1017 | (table->proc_handler == proc_dointvec_ms_jiffies) || | ||
1018 | (table->proc_handler == proc_doulongvec_minmax) || | ||
1019 | (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { | ||
1020 | if (!table->data) | ||
1021 | err = sysctl_err(path, table, "No data"); | ||
1022 | if (!table->maxlen) | ||
1023 | err = sysctl_err(path, table, "No maxlen"); | ||
1024 | } | ||
1025 | if (!table->proc_handler) | ||
1026 | err = sysctl_err(path, table, "No proc_handler"); | ||
1027 | |||
1028 | if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode) | ||
1029 | err = sysctl_err(path, table, "bogus .mode 0%o", | ||
1030 | table->mode); | ||
1031 | } | ||
1032 | return err; | ||
1033 | } | ||
1034 | |||
1035 | static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table, | ||
1036 | struct ctl_table_root *link_root) | ||
1037 | { | ||
1038 | struct ctl_table *link_table, *entry, *link; | ||
1039 | struct ctl_table_header *links; | ||
1040 | struct ctl_node *node; | ||
1041 | char *link_name; | ||
1042 | int nr_entries, name_bytes; | ||
1043 | |||
1044 | name_bytes = 0; | ||
1045 | nr_entries = 0; | ||
1046 | for (entry = table; entry->procname; entry++) { | ||
1047 | nr_entries++; | ||
1048 | name_bytes += strlen(entry->procname) + 1; | ||
1049 | } | ||
1050 | |||
1051 | links = kzalloc(sizeof(struct ctl_table_header) + | ||
1052 | sizeof(struct ctl_node)*nr_entries + | ||
1053 | sizeof(struct ctl_table)*(nr_entries + 1) + | ||
1054 | name_bytes, | ||
1055 | GFP_KERNEL); | ||
1056 | |||
1057 | if (!links) | ||
1058 | return NULL; | ||
1059 | |||
1060 | node = (struct ctl_node *)(links + 1); | ||
1061 | link_table = (struct ctl_table *)(node + nr_entries); | ||
1062 | link_name = (char *)&link_table[nr_entries + 1]; | ||
1063 | |||
1064 | for (link = link_table, entry = table; entry->procname; link++, entry++) { | ||
1065 | int len = strlen(entry->procname) + 1; | ||
1066 | memcpy(link_name, entry->procname, len); | ||
1067 | link->procname = link_name; | ||
1068 | link->mode = S_IFLNK|S_IRWXUGO; | ||
1069 | link->data = link_root; | ||
1070 | link_name += len; | ||
1071 | } | ||
1072 | init_header(links, dir->header.root, dir->header.set, node, link_table); | ||
1073 | links->nreg = nr_entries; | ||
1074 | |||
1075 | return links; | ||
1076 | } | ||
1077 | |||
1078 | static bool get_links(struct ctl_dir *dir, | ||
1079 | struct ctl_table *table, struct ctl_table_root *link_root) | ||
1080 | { | ||
1081 | struct ctl_table_header *head; | ||
1082 | struct ctl_table *entry, *link; | ||
1083 | |||
1084 | /* Are there links available for every entry in table? */ | ||
1085 | for (entry = table; entry->procname; entry++) { | ||
1086 | const char *procname = entry->procname; | ||
1087 | link = find_entry(&head, dir, procname, strlen(procname)); | ||
1088 | if (!link) | ||
1089 | return false; | ||
1090 | if (S_ISDIR(link->mode) && S_ISDIR(entry->mode)) | ||
1091 | continue; | ||
1092 | if (S_ISLNK(link->mode) && (link->data == link_root)) | ||
1093 | continue; | ||
1094 | return false; | ||
1095 | } | ||
1096 | |||
1097 | /* The checks passed. Increase the registration count on the links */ | ||
1098 | for (entry = table; entry->procname; entry++) { | ||
1099 | const char *procname = entry->procname; | ||
1100 | link = find_entry(&head, dir, procname, strlen(procname)); | ||
1101 | head->nreg++; | ||
1102 | } | ||
1103 | return true; | ||
1104 | } | ||
1105 | |||
1106 | static int insert_links(struct ctl_table_header *head) | ||
1107 | { | ||
1108 | struct ctl_table_set *root_set = &sysctl_table_root.default_set; | ||
1109 | struct ctl_dir *core_parent = NULL; | ||
1110 | struct ctl_table_header *links; | ||
1111 | int err; | ||
1112 | |||
1113 | if (head->set == root_set) | ||
1114 | return 0; | ||
1115 | |||
1116 | core_parent = xlate_dir(root_set, head->parent); | ||
1117 | if (IS_ERR(core_parent)) | ||
1118 | return 0; | ||
1119 | |||
1120 | if (get_links(core_parent, head->ctl_table, head->root)) | ||
1121 | return 0; | ||
1122 | |||
1123 | core_parent->header.nreg++; | ||
1124 | spin_unlock(&sysctl_lock); | ||
1125 | |||
1126 | links = new_links(core_parent, head->ctl_table, head->root); | ||
1127 | |||
1128 | spin_lock(&sysctl_lock); | ||
1129 | err = -ENOMEM; | ||
1130 | if (!links) | ||
1131 | goto out; | ||
1132 | |||
1133 | err = 0; | ||
1134 | if (get_links(core_parent, head->ctl_table, head->root)) { | ||
1135 | kfree(links); | ||
1136 | goto out; | ||
1137 | } | ||
1138 | |||
1139 | err = insert_header(core_parent, links); | ||
1140 | if (err) | ||
1141 | kfree(links); | ||
1142 | out: | ||
1143 | drop_sysctl_table(&core_parent->header); | ||
1144 | return err; | ||
1145 | } | ||
1146 | |||
1147 | /** | ||
1148 | * __register_sysctl_table - register a leaf sysctl table | ||
1149 | * @set: Sysctl tree to register on | ||
1150 | * @path: The path to the directory the sysctl table is in. | ||
1151 | * @table: the top-level table structure | ||
1152 | * | ||
1153 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
1154 | * array. A completely 0 filled entry terminates the table. | ||
1155 | * | ||
1156 | * The members of the &struct ctl_table structure are used as follows: | ||
1157 | * | ||
1158 | * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not | ||
1159 | * enter a sysctl file | ||
1160 | * | ||
1161 | * data - a pointer to data for use by proc_handler | ||
1162 | * | ||
1163 | * maxlen - the maximum size in bytes of the data | ||
1164 | * | ||
1165 | * mode - the file permissions for the /proc/sys file | ||
1166 | * | ||
1167 | * child - must be %NULL. | ||
1168 | * | ||
1169 | * proc_handler - the text handler routine (described below) | ||
1170 | * | ||
1171 | * extra1, extra2 - extra pointers usable by the proc handler routines | ||
1172 | * | ||
1173 | * Leaf nodes in the sysctl tree will be represented by a single file | ||
1174 | * under /proc; non-leaf nodes will be represented by directories. | ||
1175 | * | ||
1176 | * There must be a proc_handler routine for any terminal nodes. | ||
1177 | * Several default handlers are available to cover common cases - | ||
1178 | * | ||
1179 | * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), | ||
1180 | * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), | ||
1181 | * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() | ||
1182 | * | ||
1183 | * It is the handler's job to read the input buffer from user memory | ||
1184 | * and process it. The handler should return 0 on success. | ||
1185 | * | ||
1186 | * This routine returns %NULL on a failure to register, and a pointer | ||
1187 | * to the table header on success. | ||
1188 | */ | ||
1189 | struct ctl_table_header *__register_sysctl_table( | ||
1190 | struct ctl_table_set *set, | ||
1191 | const char *path, struct ctl_table *table) | ||
1192 | { | ||
1193 | struct ctl_table_root *root = set->dir.header.root; | ||
1194 | struct ctl_table_header *header; | ||
1195 | const char *name, *nextname; | ||
1196 | struct ctl_dir *dir; | ||
1197 | struct ctl_table *entry; | ||
1198 | struct ctl_node *node; | ||
1199 | int nr_entries = 0; | ||
1200 | |||
1201 | for (entry = table; entry->procname; entry++) | ||
1202 | nr_entries++; | ||
1203 | |||
1204 | header = kzalloc(sizeof(struct ctl_table_header) + | ||
1205 | sizeof(struct ctl_node)*nr_entries, GFP_KERNEL); | ||
1206 | if (!header) | ||
1207 | return NULL; | ||
1208 | |||
1209 | node = (struct ctl_node *)(header + 1); | ||
1210 | init_header(header, root, set, node, table); | ||
1211 | if (sysctl_check_table(path, table)) | ||
1212 | goto fail; | ||
1213 | |||
1214 | spin_lock(&sysctl_lock); | ||
1215 | dir = &set->dir; | ||
1216 | /* Reference moved down the diretory tree get_subdir */ | ||
1217 | dir->header.nreg++; | ||
1218 | spin_unlock(&sysctl_lock); | ||
1219 | |||
1220 | /* Find the directory for the ctl_table */ | ||
1221 | for (name = path; name; name = nextname) { | ||
1222 | int namelen; | ||
1223 | nextname = strchr(name, '/'); | ||
1224 | if (nextname) { | ||
1225 | namelen = nextname - name; | ||
1226 | nextname++; | ||
1227 | } else { | ||
1228 | namelen = strlen(name); | ||
1229 | } | ||
1230 | if (namelen == 0) | ||
1231 | continue; | ||
1232 | |||
1233 | dir = get_subdir(dir, name, namelen); | ||
1234 | if (IS_ERR(dir)) | ||
1235 | goto fail; | ||
1236 | } | ||
1237 | |||
1238 | spin_lock(&sysctl_lock); | ||
1239 | if (insert_header(dir, header)) | ||
1240 | goto fail_put_dir_locked; | ||
1241 | |||
1242 | drop_sysctl_table(&dir->header); | ||
1243 | spin_unlock(&sysctl_lock); | ||
1244 | |||
1245 | return header; | ||
1246 | |||
1247 | fail_put_dir_locked: | ||
1248 | drop_sysctl_table(&dir->header); | ||
1249 | spin_unlock(&sysctl_lock); | ||
1250 | fail: | ||
1251 | kfree(header); | ||
1252 | dump_stack(); | ||
1253 | return NULL; | ||
1254 | } | ||
1255 | |||
1256 | /** | ||
1257 | * register_sysctl - register a sysctl table | ||
1258 | * @path: The path to the directory the sysctl table is in. | ||
1259 | * @table: the table structure | ||
1260 | * | ||
1261 | * Register a sysctl table. @table should be a filled in ctl_table | ||
1262 | * array. A completely 0 filled entry terminates the table. | ||
1263 | * | ||
1264 | * See __register_sysctl_table for more details. | ||
1265 | */ | ||
1266 | struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table) | ||
1267 | { | ||
1268 | return __register_sysctl_table(&sysctl_table_root.default_set, | ||
1269 | path, table); | ||
1270 | } | ||
1271 | EXPORT_SYMBOL(register_sysctl); | ||
1272 | |||
1273 | static char *append_path(const char *path, char *pos, const char *name) | ||
1274 | { | ||
1275 | int namelen; | ||
1276 | namelen = strlen(name); | ||
1277 | if (((pos - path) + namelen + 2) >= PATH_MAX) | ||
1278 | return NULL; | ||
1279 | memcpy(pos, name, namelen); | ||
1280 | pos[namelen] = '/'; | ||
1281 | pos[namelen + 1] = '\0'; | ||
1282 | pos += namelen + 1; | ||
1283 | return pos; | ||
1284 | } | ||
1285 | |||
1286 | static int count_subheaders(struct ctl_table *table) | ||
1287 | { | ||
1288 | int has_files = 0; | ||
1289 | int nr_subheaders = 0; | ||
1290 | struct ctl_table *entry; | ||
1291 | |||
1292 | /* special case: no directory and empty directory */ | ||
1293 | if (!table || !table->procname) | ||
1294 | return 1; | ||
1295 | |||
1296 | for (entry = table; entry->procname; entry++) { | ||
1297 | if (entry->child) | ||
1298 | nr_subheaders += count_subheaders(entry->child); | ||
1299 | else | ||
1300 | has_files = 1; | ||
1301 | } | ||
1302 | return nr_subheaders + has_files; | ||
1303 | } | ||
1304 | |||
1305 | static int register_leaf_sysctl_tables(const char *path, char *pos, | ||
1306 | struct ctl_table_header ***subheader, struct ctl_table_set *set, | ||
1307 | struct ctl_table *table) | ||
1308 | { | ||
1309 | struct ctl_table *ctl_table_arg = NULL; | ||
1310 | struct ctl_table *entry, *files; | ||
1311 | int nr_files = 0; | ||
1312 | int nr_dirs = 0; | ||
1313 | int err = -ENOMEM; | ||
1314 | |||
1315 | for (entry = table; entry->procname; entry++) { | ||
1316 | if (entry->child) | ||
1317 | nr_dirs++; | ||
1318 | else | ||
1319 | nr_files++; | ||
1320 | } | ||
1321 | |||
1322 | files = table; | ||
1323 | /* If there are mixed files and directories we need a new table */ | ||
1324 | if (nr_dirs && nr_files) { | ||
1325 | struct ctl_table *new; | ||
1326 | files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1), | ||
1327 | GFP_KERNEL); | ||
1328 | if (!files) | ||
1329 | goto out; | ||
1330 | |||
1331 | ctl_table_arg = files; | ||
1332 | for (new = files, entry = table; entry->procname; entry++) { | ||
1333 | if (entry->child) | ||
1334 | continue; | ||
1335 | *new = *entry; | ||
1336 | new++; | ||
1337 | } | ||
1338 | } | ||
1339 | |||
1340 | /* Register everything except a directory full of subdirectories */ | ||
1341 | if (nr_files || !nr_dirs) { | ||
1342 | struct ctl_table_header *header; | ||
1343 | header = __register_sysctl_table(set, path, files); | ||
1344 | if (!header) { | ||
1345 | kfree(ctl_table_arg); | ||
1346 | goto out; | ||
1347 | } | ||
1348 | |||
1349 | /* Remember if we need to free the file table */ | ||
1350 | header->ctl_table_arg = ctl_table_arg; | ||
1351 | **subheader = header; | ||
1352 | (*subheader)++; | ||
1353 | } | ||
1354 | |||
1355 | /* Recurse into the subdirectories. */ | ||
1356 | for (entry = table; entry->procname; entry++) { | ||
1357 | char *child_pos; | ||
1358 | |||
1359 | if (!entry->child) | ||
1360 | continue; | ||
1361 | |||
1362 | err = -ENAMETOOLONG; | ||
1363 | child_pos = append_path(path, pos, entry->procname); | ||
1364 | if (!child_pos) | ||
1365 | goto out; | ||
1366 | |||
1367 | err = register_leaf_sysctl_tables(path, child_pos, subheader, | ||
1368 | set, entry->child); | ||
1369 | pos[0] = '\0'; | ||
1370 | if (err) | ||
1371 | goto out; | ||
1372 | } | ||
1373 | err = 0; | ||
1374 | out: | ||
1375 | /* On failure our caller will unregister all registered subheaders */ | ||
1376 | return err; | ||
1377 | } | ||
1378 | |||
1379 | /** | ||
1380 | * __register_sysctl_paths - register a sysctl table hierarchy | ||
1381 | * @set: Sysctl tree to register on | ||
1382 | * @path: The path to the directory the sysctl table is in. | ||
1383 | * @table: the top-level table structure | ||
1384 | * | ||
1385 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
1386 | * array. A completely 0 filled entry terminates the table. | ||
1387 | * | ||
1388 | * See __register_sysctl_table for more details. | ||
1389 | */ | ||
1390 | struct ctl_table_header *__register_sysctl_paths( | ||
1391 | struct ctl_table_set *set, | ||
1392 | const struct ctl_path *path, struct ctl_table *table) | ||
1393 | { | ||
1394 | struct ctl_table *ctl_table_arg = table; | ||
1395 | int nr_subheaders = count_subheaders(table); | ||
1396 | struct ctl_table_header *header = NULL, **subheaders, **subheader; | ||
1397 | const struct ctl_path *component; | ||
1398 | char *new_path, *pos; | ||
1399 | |||
1400 | pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL); | ||
1401 | if (!new_path) | ||
1402 | return NULL; | ||
1403 | |||
1404 | pos[0] = '\0'; | ||
1405 | for (component = path; component->procname; component++) { | ||
1406 | pos = append_path(new_path, pos, component->procname); | ||
1407 | if (!pos) | ||
1408 | goto out; | ||
1409 | } | ||
1410 | while (table->procname && table->child && !table[1].procname) { | ||
1411 | pos = append_path(new_path, pos, table->procname); | ||
1412 | if (!pos) | ||
1413 | goto out; | ||
1414 | table = table->child; | ||
1415 | } | ||
1416 | if (nr_subheaders == 1) { | ||
1417 | header = __register_sysctl_table(set, new_path, table); | ||
1418 | if (header) | ||
1419 | header->ctl_table_arg = ctl_table_arg; | ||
1420 | } else { | ||
1421 | header = kzalloc(sizeof(*header) + | ||
1422 | sizeof(*subheaders)*nr_subheaders, GFP_KERNEL); | ||
1423 | if (!header) | ||
1424 | goto out; | ||
1425 | |||
1426 | subheaders = (struct ctl_table_header **) (header + 1); | ||
1427 | subheader = subheaders; | ||
1428 | header->ctl_table_arg = ctl_table_arg; | ||
1429 | |||
1430 | if (register_leaf_sysctl_tables(new_path, pos, &subheader, | ||
1431 | set, table)) | ||
1432 | goto err_register_leaves; | ||
1433 | } | ||
1434 | |||
1435 | out: | ||
1436 | kfree(new_path); | ||
1437 | return header; | ||
1438 | |||
1439 | err_register_leaves: | ||
1440 | while (subheader > subheaders) { | ||
1441 | struct ctl_table_header *subh = *(--subheader); | ||
1442 | struct ctl_table *table = subh->ctl_table_arg; | ||
1443 | unregister_sysctl_table(subh); | ||
1444 | kfree(table); | ||
1445 | } | ||
1446 | kfree(header); | ||
1447 | header = NULL; | ||
1448 | goto out; | ||
1449 | } | ||
1450 | |||
1451 | /** | ||
1452 | * register_sysctl_table_path - register a sysctl table hierarchy | ||
1453 | * @path: The path to the directory the sysctl table is in. | ||
1454 | * @table: the top-level table structure | ||
1455 | * | ||
1456 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
1457 | * array. A completely 0 filled entry terminates the table. | ||
1458 | * | ||
1459 | * See __register_sysctl_paths for more details. | ||
1460 | */ | ||
1461 | struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, | ||
1462 | struct ctl_table *table) | ||
1463 | { | ||
1464 | return __register_sysctl_paths(&sysctl_table_root.default_set, | ||
1465 | path, table); | ||
1466 | } | ||
1467 | EXPORT_SYMBOL(register_sysctl_paths); | ||
1468 | |||
1469 | /** | ||
1470 | * register_sysctl_table - register a sysctl table hierarchy | ||
1471 | * @table: the top-level table structure | ||
1472 | * | ||
1473 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
1474 | * array. A completely 0 filled entry terminates the table. | ||
1475 | * | ||
1476 | * See register_sysctl_paths for more details. | ||
1477 | */ | ||
1478 | struct ctl_table_header *register_sysctl_table(struct ctl_table *table) | ||
1479 | { | ||
1480 | static const struct ctl_path null_path[] = { {} }; | ||
1481 | |||
1482 | return register_sysctl_paths(null_path, table); | ||
1483 | } | ||
1484 | EXPORT_SYMBOL(register_sysctl_table); | ||
1485 | |||
1486 | static void put_links(struct ctl_table_header *header) | ||
1487 | { | ||
1488 | struct ctl_table_set *root_set = &sysctl_table_root.default_set; | ||
1489 | struct ctl_table_root *root = header->root; | ||
1490 | struct ctl_dir *parent = header->parent; | ||
1491 | struct ctl_dir *core_parent; | ||
1492 | struct ctl_table *entry; | ||
1493 | |||
1494 | if (header->set == root_set) | ||
1495 | return; | ||
1496 | |||
1497 | core_parent = xlate_dir(root_set, parent); | ||
1498 | if (IS_ERR(core_parent)) | ||
1499 | return; | ||
1500 | |||
1501 | for (entry = header->ctl_table; entry->procname; entry++) { | ||
1502 | struct ctl_table_header *link_head; | ||
1503 | struct ctl_table *link; | ||
1504 | const char *name = entry->procname; | ||
1505 | |||
1506 | link = find_entry(&link_head, core_parent, name, strlen(name)); | ||
1507 | if (link && | ||
1508 | ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) || | ||
1509 | (S_ISLNK(link->mode) && (link->data == root)))) { | ||
1510 | drop_sysctl_table(link_head); | ||
1511 | } | ||
1512 | else { | ||
1513 | printk(KERN_ERR "sysctl link missing during unregister: "); | ||
1514 | sysctl_print_dir(parent); | ||
1515 | printk(KERN_CONT "/%s\n", name); | ||
1516 | } | ||
1517 | } | ||
1518 | } | ||
1519 | |||
1520 | static void drop_sysctl_table(struct ctl_table_header *header) | ||
1521 | { | ||
1522 | struct ctl_dir *parent = header->parent; | ||
1523 | |||
1524 | if (--header->nreg) | ||
1525 | return; | ||
1526 | |||
1527 | put_links(header); | ||
1528 | start_unregistering(header); | ||
1529 | if (!--header->count) | ||
1530 | kfree_rcu(header, rcu); | ||
1531 | |||
1532 | if (parent) | ||
1533 | drop_sysctl_table(&parent->header); | ||
1534 | } | ||
1535 | |||
1536 | /** | ||
1537 | * unregister_sysctl_table - unregister a sysctl table hierarchy | ||
1538 | * @header: the header returned from register_sysctl_table | ||
1539 | * | ||
1540 | * Unregisters the sysctl table and all children. proc entries may not | ||
1541 | * actually be removed until they are no longer used by anyone. | ||
1542 | */ | ||
1543 | void unregister_sysctl_table(struct ctl_table_header * header) | ||
1544 | { | ||
1545 | int nr_subheaders; | ||
1546 | might_sleep(); | ||
1547 | |||
1548 | if (header == NULL) | ||
1549 | return; | ||
1550 | |||
1551 | nr_subheaders = count_subheaders(header->ctl_table_arg); | ||
1552 | if (unlikely(nr_subheaders > 1)) { | ||
1553 | struct ctl_table_header **subheaders; | ||
1554 | int i; | ||
1555 | |||
1556 | subheaders = (struct ctl_table_header **)(header + 1); | ||
1557 | for (i = nr_subheaders -1; i >= 0; i--) { | ||
1558 | struct ctl_table_header *subh = subheaders[i]; | ||
1559 | struct ctl_table *table = subh->ctl_table_arg; | ||
1560 | unregister_sysctl_table(subh); | ||
1561 | kfree(table); | ||
1562 | } | ||
1563 | kfree(header); | ||
1564 | return; | ||
1565 | } | ||
1566 | |||
1567 | spin_lock(&sysctl_lock); | ||
1568 | drop_sysctl_table(header); | ||
1569 | spin_unlock(&sysctl_lock); | ||
1570 | } | ||
1571 | EXPORT_SYMBOL(unregister_sysctl_table); | ||
1572 | |||
1573 | void setup_sysctl_set(struct ctl_table_set *set, | ||
1574 | struct ctl_table_root *root, | ||
1575 | int (*is_seen)(struct ctl_table_set *)) | ||
1576 | { | ||
1577 | memset(set, 0, sizeof(*set)); | ||
1578 | set->is_seen = is_seen; | ||
1579 | init_header(&set->dir.header, root, set, NULL, root_table); | ||
1580 | } | ||
1581 | |||
1582 | void retire_sysctl_set(struct ctl_table_set *set) | ||
1583 | { | ||
1584 | WARN_ON(!RB_EMPTY_ROOT(&set->dir.root)); | ||
1585 | } | ||
1586 | |||
1587 | int __init proc_sys_init(void) | 427 | int __init proc_sys_init(void) |
1588 | { | 428 | { |
1589 | struct proc_dir_entry *proc_sys_root; | 429 | struct proc_dir_entry *proc_sys_root; |
@@ -1592,6 +432,5 @@ int __init proc_sys_init(void) | |||
1592 | proc_sys_root->proc_iops = &proc_sys_dir_operations; | 432 | proc_sys_root->proc_iops = &proc_sys_dir_operations; |
1593 | proc_sys_root->proc_fops = &proc_sys_dir_file_operations; | 433 | proc_sys_root->proc_fops = &proc_sys_dir_file_operations; |
1594 | proc_sys_root->nlink = 0; | 434 | proc_sys_root->nlink = 0; |
1595 | 435 | return 0; | |
1596 | return sysctl_init(); | ||
1597 | } | 436 | } |
diff --git a/fs/proc/root.c b/fs/proc/root.c index c6e9fac26ba..9a8a2b77b87 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -18,7 +18,6 @@ | |||
18 | #include <linux/bitops.h> | 18 | #include <linux/bitops.h> |
19 | #include <linux/mount.h> | 19 | #include <linux/mount.h> |
20 | #include <linux/pid_namespace.h> | 20 | #include <linux/pid_namespace.h> |
21 | #include <linux/parser.h> | ||
22 | 21 | ||
23 | #include "internal.h" | 22 | #include "internal.h" |
24 | 23 | ||
@@ -37,89 +36,25 @@ static int proc_set_super(struct super_block *sb, void *data) | |||
37 | return err; | 36 | return err; |
38 | } | 37 | } |
39 | 38 | ||
40 | enum { | ||
41 | Opt_gid, Opt_hidepid, Opt_err, | ||
42 | }; | ||
43 | |||
44 | static const match_table_t tokens = { | ||
45 | {Opt_hidepid, "hidepid=%u"}, | ||
46 | {Opt_gid, "gid=%u"}, | ||
47 | {Opt_err, NULL}, | ||
48 | }; | ||
49 | |||
50 | static int proc_parse_options(char *options, struct pid_namespace *pid) | ||
51 | { | ||
52 | char *p; | ||
53 | substring_t args[MAX_OPT_ARGS]; | ||
54 | int option; | ||
55 | |||
56 | if (!options) | ||
57 | return 1; | ||
58 | |||
59 | while ((p = strsep(&options, ",")) != NULL) { | ||
60 | int token; | ||
61 | if (!*p) | ||
62 | continue; | ||
63 | |||
64 | args[0].to = args[0].from = NULL; | ||
65 | token = match_token(p, tokens, args); | ||
66 | switch (token) { | ||
67 | case Opt_gid: | ||
68 | if (match_int(&args[0], &option)) | ||
69 | return 0; | ||
70 | pid->pid_gid = make_kgid(current_user_ns(), option); | ||
71 | break; | ||
72 | case Opt_hidepid: | ||
73 | if (match_int(&args[0], &option)) | ||
74 | return 0; | ||
75 | if (option < 0 || option > 2) { | ||
76 | pr_err("proc: hidepid value must be between 0 and 2.\n"); | ||
77 | return 0; | ||
78 | } | ||
79 | pid->hide_pid = option; | ||
80 | break; | ||
81 | default: | ||
82 | pr_err("proc: unrecognized mount option \"%s\" " | ||
83 | "or missing value\n", p); | ||
84 | return 0; | ||
85 | } | ||
86 | } | ||
87 | |||
88 | return 1; | ||
89 | } | ||
90 | |||
91 | int proc_remount(struct super_block *sb, int *flags, char *data) | ||
92 | { | ||
93 | struct pid_namespace *pid = sb->s_fs_info; | ||
94 | return !proc_parse_options(data, pid); | ||
95 | } | ||
96 | |||
97 | static struct dentry *proc_mount(struct file_system_type *fs_type, | 39 | static struct dentry *proc_mount(struct file_system_type *fs_type, |
98 | int flags, const char *dev_name, void *data) | 40 | int flags, const char *dev_name, void *data) |
99 | { | 41 | { |
100 | int err; | 42 | int err; |
101 | struct super_block *sb; | 43 | struct super_block *sb; |
102 | struct pid_namespace *ns; | 44 | struct pid_namespace *ns; |
103 | char *options; | 45 | struct proc_inode *ei; |
104 | 46 | ||
105 | if (flags & MS_KERNMOUNT) { | 47 | if (flags & MS_KERNMOUNT) |
106 | ns = (struct pid_namespace *)data; | 48 | ns = (struct pid_namespace *)data; |
107 | options = NULL; | 49 | else |
108 | } else { | 50 | ns = current->nsproxy->pid_ns; |
109 | ns = task_active_pid_ns(current); | ||
110 | options = data; | ||
111 | } | ||
112 | 51 | ||
113 | sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); | 52 | sb = sget(fs_type, proc_test_super, proc_set_super, ns); |
114 | if (IS_ERR(sb)) | 53 | if (IS_ERR(sb)) |
115 | return ERR_CAST(sb); | 54 | return ERR_CAST(sb); |
116 | 55 | ||
117 | if (!proc_parse_options(options, ns)) { | ||
118 | deactivate_locked_super(sb); | ||
119 | return ERR_PTR(-EINVAL); | ||
120 | } | ||
121 | |||
122 | if (!sb->s_root) { | 56 | if (!sb->s_root) { |
57 | sb->s_flags = flags; | ||
123 | err = proc_fill_super(sb); | 58 | err = proc_fill_super(sb); |
124 | if (err) { | 59 | if (err) { |
125 | deactivate_locked_super(sb); | 60 | deactivate_locked_super(sb); |
@@ -129,6 +64,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, | |||
129 | sb->s_flags |= MS_ACTIVE; | 64 | sb->s_flags |= MS_ACTIVE; |
130 | } | 65 | } |
131 | 66 | ||
67 | ei = PROC_I(sb->s_root->d_inode); | ||
68 | if (!ei->pid) { | ||
69 | rcu_read_lock(); | ||
70 | ei->pid = get_pid(find_pid_ns(1, ns)); | ||
71 | rcu_read_unlock(); | ||
72 | } | ||
73 | |||
132 | return dget(sb->s_root); | 74 | return dget(sb->s_root); |
133 | } | 75 | } |
134 | 76 | ||
@@ -145,19 +87,24 @@ static struct file_system_type proc_fs_type = { | |||
145 | .name = "proc", | 87 | .name = "proc", |
146 | .mount = proc_mount, | 88 | .mount = proc_mount, |
147 | .kill_sb = proc_kill_sb, | 89 | .kill_sb = proc_kill_sb, |
148 | .fs_flags = FS_USERNS_MOUNT, | ||
149 | }; | 90 | }; |
150 | 91 | ||
151 | void __init proc_root_init(void) | 92 | void __init proc_root_init(void) |
152 | { | 93 | { |
94 | struct vfsmount *mnt; | ||
153 | int err; | 95 | int err; |
154 | 96 | ||
155 | proc_init_inodecache(); | 97 | proc_init_inodecache(); |
156 | err = register_filesystem(&proc_fs_type); | 98 | err = register_filesystem(&proc_fs_type); |
157 | if (err) | 99 | if (err) |
158 | return; | 100 | return; |
101 | mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); | ||
102 | if (IS_ERR(mnt)) { | ||
103 | unregister_filesystem(&proc_fs_type); | ||
104 | return; | ||
105 | } | ||
159 | 106 | ||
160 | proc_self_init(); | 107 | init_pid_ns.proc_mnt = mnt; |
161 | proc_symlink("mounts", NULL, "self/mounts"); | 108 | proc_symlink("mounts", NULL, "self/mounts"); |
162 | 109 | ||
163 | proc_net_init(); | 110 | proc_net_init(); |
@@ -188,12 +135,13 @@ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct | |||
188 | return 0; | 135 | return 0; |
189 | } | 136 | } |
190 | 137 | ||
191 | static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags) | 138 | static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) |
192 | { | 139 | { |
193 | if (!proc_lookup(dir, dentry, flags)) | 140 | if (!proc_lookup(dir, dentry, nd)) { |
194 | return NULL; | 141 | return NULL; |
142 | } | ||
195 | 143 | ||
196 | return proc_pid_lookup(dir, dentry, flags); | 144 | return proc_pid_lookup(dir, dentry, nd); |
197 | } | 145 | } |
198 | 146 | ||
199 | static int proc_root_readdir(struct file * filp, | 147 | static int proc_root_readdir(struct file * filp, |
@@ -261,5 +209,5 @@ int pid_ns_prepare_proc(struct pid_namespace *ns) | |||
261 | 209 | ||
262 | void pid_ns_release_proc(struct pid_namespace *ns) | 210 | void pid_ns_release_proc(struct pid_namespace *ns) |
263 | { | 211 | { |
264 | kern_unmount(ns->proc_mnt); | 212 | mntput(ns->proc_mnt); |
265 | } | 213 | } |
diff --git a/fs/proc/self.c b/fs/proc/self.c deleted file mode 100644 index aa5cc3bff14..00000000000 --- a/fs/proc/self.c +++ /dev/null | |||
@@ -1,59 +0,0 @@ | |||
1 | #include <linux/proc_fs.h> | ||
2 | #include <linux/sched.h> | ||
3 | #include <linux/namei.h> | ||
4 | |||
5 | /* | ||
6 | * /proc/self: | ||
7 | */ | ||
8 | static int proc_self_readlink(struct dentry *dentry, char __user *buffer, | ||
9 | int buflen) | ||
10 | { | ||
11 | struct pid_namespace *ns = dentry->d_sb->s_fs_info; | ||
12 | pid_t tgid = task_tgid_nr_ns(current, ns); | ||
13 | char tmp[PROC_NUMBUF]; | ||
14 | if (!tgid) | ||
15 | return -ENOENT; | ||
16 | sprintf(tmp, "%d", tgid); | ||
17 | return vfs_readlink(dentry,buffer,buflen,tmp); | ||
18 | } | ||
19 | |||
20 | static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
21 | { | ||
22 | struct pid_namespace *ns = dentry->d_sb->s_fs_info; | ||
23 | pid_t tgid = task_tgid_nr_ns(current, ns); | ||
24 | char *name = ERR_PTR(-ENOENT); | ||
25 | if (tgid) { | ||
26 | /* 11 for max length of signed int in decimal + NULL term */ | ||
27 | name = kmalloc(12, GFP_KERNEL); | ||
28 | if (!name) | ||
29 | name = ERR_PTR(-ENOMEM); | ||
30 | else | ||
31 | sprintf(name, "%d", tgid); | ||
32 | } | ||
33 | nd_set_link(nd, name); | ||
34 | return NULL; | ||
35 | } | ||
36 | |||
37 | static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, | ||
38 | void *cookie) | ||
39 | { | ||
40 | char *s = nd_get_link(nd); | ||
41 | if (!IS_ERR(s)) | ||
42 | kfree(s); | ||
43 | } | ||
44 | |||
45 | static const struct inode_operations proc_self_inode_operations = { | ||
46 | .readlink = proc_self_readlink, | ||
47 | .follow_link = proc_self_follow_link, | ||
48 | .put_link = proc_self_put_link, | ||
49 | }; | ||
50 | |||
51 | void __init proc_self_init(void) | ||
52 | { | ||
53 | struct proc_dir_entry *proc_self_symlink; | ||
54 | mode_t mode; | ||
55 | |||
56 | mode = S_IFLNK | S_IRWXUGO; | ||
57 | proc_self_symlink = proc_create("self", mode, NULL, NULL ); | ||
58 | proc_self_symlink->proc_iops = &proc_self_inode_operations; | ||
59 | } | ||
diff --git a/fs/proc/stat.c b/fs/proc/stat.c index e296572c73e..4b758ad5c83 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c | |||
@@ -10,7 +10,6 @@ | |||
10 | #include <linux/time.h> | 10 | #include <linux/time.h> |
11 | #include <linux/irqnr.h> | 11 | #include <linux/irqnr.h> |
12 | #include <asm/cputime.h> | 12 | #include <asm/cputime.h> |
13 | #include <linux/tick.h> | ||
14 | 13 | ||
15 | #ifndef arch_irq_stat_cpu | 14 | #ifndef arch_irq_stat_cpu |
16 | #define arch_irq_stat_cpu(cpu) 0 | 15 | #define arch_irq_stat_cpu(cpu) 0 |
@@ -18,93 +17,40 @@ | |||
18 | #ifndef arch_irq_stat | 17 | #ifndef arch_irq_stat |
19 | #define arch_irq_stat() 0 | 18 | #define arch_irq_stat() 0 |
20 | #endif | 19 | #endif |
21 | 20 | #ifndef arch_idle_time | |
22 | #ifdef arch_idle_time | 21 | #define arch_idle_time(cpu) 0 |
23 | |||
24 | static cputime64_t get_idle_time(int cpu) | ||
25 | { | ||
26 | cputime64_t idle; | ||
27 | |||
28 | idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; | ||
29 | if (cpu_online(cpu) && !nr_iowait_cpu(cpu)) | ||
30 | idle += arch_idle_time(cpu); | ||
31 | return idle; | ||
32 | } | ||
33 | |||
34 | static cputime64_t get_iowait_time(int cpu) | ||
35 | { | ||
36 | cputime64_t iowait; | ||
37 | |||
38 | iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; | ||
39 | if (cpu_online(cpu) && nr_iowait_cpu(cpu)) | ||
40 | iowait += arch_idle_time(cpu); | ||
41 | return iowait; | ||
42 | } | ||
43 | |||
44 | #else | ||
45 | |||
46 | static u64 get_idle_time(int cpu) | ||
47 | { | ||
48 | u64 idle, idle_time = -1ULL; | ||
49 | |||
50 | if (cpu_online(cpu)) | ||
51 | idle_time = get_cpu_idle_time_us(cpu, NULL); | ||
52 | |||
53 | if (idle_time == -1ULL) | ||
54 | /* !NO_HZ or cpu offline so we can rely on cpustat.idle */ | ||
55 | idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; | ||
56 | else | ||
57 | idle = usecs_to_cputime64(idle_time); | ||
58 | |||
59 | return idle; | ||
60 | } | ||
61 | |||
62 | static u64 get_iowait_time(int cpu) | ||
63 | { | ||
64 | u64 iowait, iowait_time = -1ULL; | ||
65 | |||
66 | if (cpu_online(cpu)) | ||
67 | iowait_time = get_cpu_iowait_time_us(cpu, NULL); | ||
68 | |||
69 | if (iowait_time == -1ULL) | ||
70 | /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */ | ||
71 | iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; | ||
72 | else | ||
73 | iowait = usecs_to_cputime64(iowait_time); | ||
74 | |||
75 | return iowait; | ||
76 | } | ||
77 | |||
78 | #endif | 22 | #endif |
79 | 23 | ||
80 | static int show_stat(struct seq_file *p, void *v) | 24 | static int show_stat(struct seq_file *p, void *v) |
81 | { | 25 | { |
82 | int i, j; | 26 | int i, j; |
83 | unsigned long jif; | 27 | unsigned long jif; |
84 | u64 user, nice, system, idle, iowait, irq, softirq, steal; | 28 | cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; |
85 | u64 guest, guest_nice; | 29 | cputime64_t guest, guest_nice; |
86 | u64 sum = 0; | 30 | u64 sum = 0; |
87 | u64 sum_softirq = 0; | 31 | u64 sum_softirq = 0; |
88 | unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; | 32 | unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; |
89 | struct timespec boottime; | 33 | struct timespec boottime; |
90 | 34 | ||
91 | user = nice = system = idle = iowait = | 35 | user = nice = system = idle = iowait = |
92 | irq = softirq = steal = 0; | 36 | irq = softirq = steal = cputime64_zero; |
93 | guest = guest_nice = 0; | 37 | guest = guest_nice = cputime64_zero; |
94 | getboottime(&boottime); | 38 | getboottime(&boottime); |
95 | jif = boottime.tv_sec; | 39 | jif = boottime.tv_sec; |
96 | 40 | ||
97 | for_each_possible_cpu(i) { | 41 | for_each_possible_cpu(i) { |
98 | user += kcpustat_cpu(i).cpustat[CPUTIME_USER]; | 42 | user = cputime64_add(user, kstat_cpu(i).cpustat.user); |
99 | nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE]; | 43 | nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); |
100 | system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]; | 44 | system = cputime64_add(system, kstat_cpu(i).cpustat.system); |
101 | idle += get_idle_time(i); | 45 | idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); |
102 | iowait += get_iowait_time(i); | 46 | idle = cputime64_add(idle, arch_idle_time(i)); |
103 | irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ]; | 47 | iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); |
104 | softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]; | 48 | irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); |
105 | steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; | 49 | softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); |
106 | guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; | 50 | steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); |
107 | guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; | 51 | guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); |
52 | guest_nice = cputime64_add(guest_nice, | ||
53 | kstat_cpu(i).cpustat.guest_nice); | ||
108 | sum += kstat_cpu_irqs_sum(i); | 54 | sum += kstat_cpu_irqs_sum(i); |
109 | sum += arch_irq_stat_cpu(i); | 55 | sum += arch_irq_stat_cpu(i); |
110 | 56 | ||
@@ -117,49 +63,56 @@ static int show_stat(struct seq_file *p, void *v) | |||
117 | } | 63 | } |
118 | sum += arch_irq_stat(); | 64 | sum += arch_irq_stat(); |
119 | 65 | ||
120 | seq_puts(p, "cpu "); | 66 | seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu " |
121 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user)); | 67 | "%llu\n", |
122 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice)); | 68 | (unsigned long long)cputime64_to_clock_t(user), |
123 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system)); | 69 | (unsigned long long)cputime64_to_clock_t(nice), |
124 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle)); | 70 | (unsigned long long)cputime64_to_clock_t(system), |
125 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait)); | 71 | (unsigned long long)cputime64_to_clock_t(idle), |
126 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq)); | 72 | (unsigned long long)cputime64_to_clock_t(iowait), |
127 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq)); | 73 | (unsigned long long)cputime64_to_clock_t(irq), |
128 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal)); | 74 | (unsigned long long)cputime64_to_clock_t(softirq), |
129 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest)); | 75 | (unsigned long long)cputime64_to_clock_t(steal), |
130 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice)); | 76 | (unsigned long long)cputime64_to_clock_t(guest), |
131 | seq_putc(p, '\n'); | 77 | (unsigned long long)cputime64_to_clock_t(guest_nice)); |
132 | 78 | #if defined(CONFIG_REPORT_PRESENT_CPUS) | |
79 | for_each_present_cpu(i) { | ||
80 | #else | ||
133 | for_each_online_cpu(i) { | 81 | for_each_online_cpu(i) { |
82 | #endif | ||
83 | |||
134 | /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ | 84 | /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ |
135 | user = kcpustat_cpu(i).cpustat[CPUTIME_USER]; | 85 | user = kstat_cpu(i).cpustat.user; |
136 | nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE]; | 86 | nice = kstat_cpu(i).cpustat.nice; |
137 | system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]; | 87 | system = kstat_cpu(i).cpustat.system; |
138 | idle = get_idle_time(i); | 88 | idle = kstat_cpu(i).cpustat.idle; |
139 | iowait = get_iowait_time(i); | 89 | idle = cputime64_add(idle, arch_idle_time(i)); |
140 | irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ]; | 90 | iowait = kstat_cpu(i).cpustat.iowait; |
141 | softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]; | 91 | irq = kstat_cpu(i).cpustat.irq; |
142 | steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; | 92 | softirq = kstat_cpu(i).cpustat.softirq; |
143 | guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; | 93 | steal = kstat_cpu(i).cpustat.steal; |
144 | guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; | 94 | guest = kstat_cpu(i).cpustat.guest; |
145 | seq_printf(p, "cpu%d", i); | 95 | guest_nice = kstat_cpu(i).cpustat.guest_nice; |
146 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user)); | 96 | seq_printf(p, |
147 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice)); | 97 | "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu " |
148 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system)); | 98 | "%llu\n", |
149 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle)); | 99 | i, |
150 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait)); | 100 | (unsigned long long)cputime64_to_clock_t(user), |
151 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq)); | 101 | (unsigned long long)cputime64_to_clock_t(nice), |
152 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq)); | 102 | (unsigned long long)cputime64_to_clock_t(system), |
153 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal)); | 103 | (unsigned long long)cputime64_to_clock_t(idle), |
154 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest)); | 104 | (unsigned long long)cputime64_to_clock_t(iowait), |
155 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice)); | 105 | (unsigned long long)cputime64_to_clock_t(irq), |
156 | seq_putc(p, '\n'); | 106 | (unsigned long long)cputime64_to_clock_t(softirq), |
107 | (unsigned long long)cputime64_to_clock_t(steal), | ||
108 | (unsigned long long)cputime64_to_clock_t(guest), | ||
109 | (unsigned long long)cputime64_to_clock_t(guest_nice)); | ||
157 | } | 110 | } |
158 | seq_printf(p, "intr %llu", (unsigned long long)sum); | 111 | seq_printf(p, "intr %llu", (unsigned long long)sum); |
159 | 112 | ||
160 | /* sum again ? it could be updated? */ | 113 | /* sum again ? it could be updated? */ |
161 | for_each_irq_nr(j) | 114 | for_each_irq_nr(j) |
162 | seq_put_decimal_ull(p, ' ', kstat_irqs(j)); | 115 | seq_printf(p, " %u", kstat_irqs(j)); |
163 | 116 | ||
164 | seq_printf(p, | 117 | seq_printf(p, |
165 | "\nctxt %llu\n" | 118 | "\nctxt %llu\n" |
@@ -176,7 +129,7 @@ static int show_stat(struct seq_file *p, void *v) | |||
176 | seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq); | 129 | seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq); |
177 | 130 | ||
178 | for (i = 0; i < NR_SOFTIRQS; i++) | 131 | for (i = 0; i < NR_SOFTIRQS; i++) |
179 | seq_put_decimal_ull(p, ' ', per_softirq_sums[i]); | 132 | seq_printf(p, " %u", per_softirq_sums[i]); |
180 | seq_putc(p, '\n'); | 133 | seq_putc(p, '\n'); |
181 | 134 | ||
182 | return 0; | 135 | return 0; |
@@ -184,14 +137,11 @@ static int show_stat(struct seq_file *p, void *v) | |||
184 | 137 | ||
185 | static int stat_open(struct inode *inode, struct file *file) | 138 | static int stat_open(struct inode *inode, struct file *file) |
186 | { | 139 | { |
187 | unsigned size = 1024 + 128 * num_possible_cpus(); | 140 | unsigned size = 4096 * (1 + num_possible_cpus() / 32); |
188 | char *buf; | 141 | char *buf; |
189 | struct seq_file *m; | 142 | struct seq_file *m; |
190 | int res; | 143 | int res; |
191 | 144 | ||
192 | /* minimum size to display an interrupt count : 2 bytes */ | ||
193 | size += 2 * nr_irqs; | ||
194 | |||
195 | /* don't ask for more than the kmalloc() max size */ | 145 | /* don't ask for more than the kmalloc() max size */ |
196 | if (size > KMALLOC_MAX_SIZE) | 146 | if (size > KMALLOC_MAX_SIZE) |
197 | size = KMALLOC_MAX_SIZE; | 147 | size = KMALLOC_MAX_SIZE; |
@@ -203,7 +153,7 @@ static int stat_open(struct inode *inode, struct file *file) | |||
203 | if (!res) { | 153 | if (!res) { |
204 | m = file->private_data; | 154 | m = file->private_data; |
205 | m->buf = buf; | 155 | m->buf = buf; |
206 | m->size = ksize(buf); | 156 | m->size = size; |
207 | } else | 157 | } else |
208 | kfree(buf); | 158 | kfree(buf); |
209 | return res; | 159 | return res; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ca5ce7f9f80..c7d4ee663f1 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -44,7 +44,6 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
44 | "VmPeak:\t%8lu kB\n" | 44 | "VmPeak:\t%8lu kB\n" |
45 | "VmSize:\t%8lu kB\n" | 45 | "VmSize:\t%8lu kB\n" |
46 | "VmLck:\t%8lu kB\n" | 46 | "VmLck:\t%8lu kB\n" |
47 | "VmPin:\t%8lu kB\n" | ||
48 | "VmHWM:\t%8lu kB\n" | 47 | "VmHWM:\t%8lu kB\n" |
49 | "VmRSS:\t%8lu kB\n" | 48 | "VmRSS:\t%8lu kB\n" |
50 | "VmData:\t%8lu kB\n" | 49 | "VmData:\t%8lu kB\n" |
@@ -54,9 +53,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
54 | "VmPTE:\t%8lu kB\n" | 53 | "VmPTE:\t%8lu kB\n" |
55 | "VmSwap:\t%8lu kB\n", | 54 | "VmSwap:\t%8lu kB\n", |
56 | hiwater_vm << (PAGE_SHIFT-10), | 55 | hiwater_vm << (PAGE_SHIFT-10), |
57 | total_vm << (PAGE_SHIFT-10), | 56 | (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), |
58 | mm->locked_vm << (PAGE_SHIFT-10), | 57 | mm->locked_vm << (PAGE_SHIFT-10), |
59 | mm->pinned_vm << (PAGE_SHIFT-10), | ||
60 | hiwater_rss << (PAGE_SHIFT-10), | 58 | hiwater_rss << (PAGE_SHIFT-10), |
61 | total_rss << (PAGE_SHIFT-10), | 59 | total_rss << (PAGE_SHIFT-10), |
62 | data << (PAGE_SHIFT-10), | 60 | data << (PAGE_SHIFT-10), |
@@ -90,55 +88,10 @@ static void pad_len_spaces(struct seq_file *m, int len) | |||
90 | seq_printf(m, "%*c", len, ' '); | 88 | seq_printf(m, "%*c", len, ' '); |
91 | } | 89 | } |
92 | 90 | ||
93 | #ifdef CONFIG_NUMA | ||
94 | /* | ||
95 | * These functions are for numa_maps but called in generic **maps seq_file | ||
96 | * ->start(), ->stop() ops. | ||
97 | * | ||
98 | * numa_maps scans all vmas under mmap_sem and checks their mempolicy. | ||
99 | * Each mempolicy object is controlled by reference counting. The problem here | ||
100 | * is how to avoid accessing dead mempolicy object. | ||
101 | * | ||
102 | * Because we're holding mmap_sem while reading seq_file, it's safe to access | ||
103 | * each vma's mempolicy, no vma objects will never drop refs to mempolicy. | ||
104 | * | ||
105 | * A task's mempolicy (task->mempolicy) has different behavior. task->mempolicy | ||
106 | * is set and replaced under mmap_sem but unrefed and cleared under task_lock(). | ||
107 | * So, without task_lock(), we cannot trust get_vma_policy() because we cannot | ||
108 | * gurantee the task never exits under us. But taking task_lock() around | ||
109 | * get_vma_plicy() causes lock order problem. | ||
110 | * | ||
111 | * To access task->mempolicy without lock, we hold a reference count of an | ||
112 | * object pointed by task->mempolicy and remember it. This will guarantee | ||
113 | * that task->mempolicy points to an alive object or NULL in numa_maps accesses. | ||
114 | */ | ||
115 | static void hold_task_mempolicy(struct proc_maps_private *priv) | ||
116 | { | ||
117 | struct task_struct *task = priv->task; | ||
118 | |||
119 | task_lock(task); | ||
120 | priv->task_mempolicy = task->mempolicy; | ||
121 | mpol_get(priv->task_mempolicy); | ||
122 | task_unlock(task); | ||
123 | } | ||
124 | static void release_task_mempolicy(struct proc_maps_private *priv) | ||
125 | { | ||
126 | mpol_put(priv->task_mempolicy); | ||
127 | } | ||
128 | #else | ||
129 | static void hold_task_mempolicy(struct proc_maps_private *priv) | ||
130 | { | ||
131 | } | ||
132 | static void release_task_mempolicy(struct proc_maps_private *priv) | ||
133 | { | ||
134 | } | ||
135 | #endif | ||
136 | |||
137 | static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) | 91 | static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) |
138 | { | 92 | { |
139 | if (vma && vma != priv->tail_vma) { | 93 | if (vma && vma != priv->tail_vma) { |
140 | struct mm_struct *mm = vma->vm_mm; | 94 | struct mm_struct *mm = vma->vm_mm; |
141 | release_task_mempolicy(priv); | ||
142 | up_read(&mm->mmap_sem); | 95 | up_read(&mm->mmap_sem); |
143 | mmput(mm); | 96 | mmput(mm); |
144 | } | 97 | } |
@@ -170,14 +123,14 @@ static void *m_start(struct seq_file *m, loff_t *pos) | |||
170 | if (!priv->task) | 123 | if (!priv->task) |
171 | return ERR_PTR(-ESRCH); | 124 | return ERR_PTR(-ESRCH); |
172 | 125 | ||
173 | mm = mm_access(priv->task, PTRACE_MODE_READ); | 126 | mm = mm_for_maps(priv->task); |
174 | if (!mm || IS_ERR(mm)) | 127 | if (!mm || IS_ERR(mm)) |
175 | return mm; | 128 | return mm; |
176 | down_read(&mm->mmap_sem); | 129 | down_read(&mm->mmap_sem); |
177 | 130 | ||
178 | tail_vma = get_gate_vma(priv->task->mm); | 131 | tail_vma = get_gate_vma(priv->task->mm); |
179 | priv->tail_vma = tail_vma; | 132 | priv->tail_vma = tail_vma; |
180 | hold_task_mempolicy(priv); | 133 | |
181 | /* Start with last addr hint */ | 134 | /* Start with last addr hint */ |
182 | vma = find_vma(mm, last_addr); | 135 | vma = find_vma(mm, last_addr); |
183 | if (last_addr && vma) { | 136 | if (last_addr && vma) { |
@@ -204,7 +157,6 @@ out: | |||
204 | if (vma) | 157 | if (vma) |
205 | return vma; | 158 | return vma; |
206 | 159 | ||
207 | release_task_mempolicy(priv); | ||
208 | /* End of vmas has been reached */ | 160 | /* End of vmas has been reached */ |
209 | m->version = (tail_vma != NULL)? 0: -1UL; | 161 | m->version = (tail_vma != NULL)? 0: -1UL; |
210 | up_read(&mm->mmap_sem); | 162 | up_read(&mm->mmap_sem); |
@@ -255,20 +207,16 @@ static int do_maps_open(struct inode *inode, struct file *file, | |||
255 | return ret; | 207 | return ret; |
256 | } | 208 | } |
257 | 209 | ||
258 | static void | 210 | static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) |
259 | show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) | ||
260 | { | 211 | { |
261 | struct mm_struct *mm = vma->vm_mm; | 212 | struct mm_struct *mm = vma->vm_mm; |
262 | struct file *file = vma->vm_file; | 213 | struct file *file = vma->vm_file; |
263 | struct proc_maps_private *priv = m->private; | ||
264 | struct task_struct *task = priv->task; | ||
265 | vm_flags_t flags = vma->vm_flags; | 214 | vm_flags_t flags = vma->vm_flags; |
266 | unsigned long ino = 0; | 215 | unsigned long ino = 0; |
267 | unsigned long long pgoff = 0; | 216 | unsigned long long pgoff = 0; |
268 | unsigned long start, end; | 217 | unsigned long start, end; |
269 | dev_t dev = 0; | 218 | dev_t dev = 0; |
270 | int len; | 219 | int len; |
271 | const char *name = NULL; | ||
272 | 220 | ||
273 | if (file) { | 221 | if (file) { |
274 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 222 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
@@ -302,57 +250,36 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) | |||
302 | if (file) { | 250 | if (file) { |
303 | pad_len_spaces(m, len); | 251 | pad_len_spaces(m, len); |
304 | seq_path(m, &file->f_path, "\n"); | 252 | seq_path(m, &file->f_path, "\n"); |
305 | goto done; | 253 | } else { |
306 | } | 254 | const char *name = arch_vma_name(vma); |
307 | 255 | if (!name) { | |
308 | name = arch_vma_name(vma); | 256 | if (mm) { |
309 | if (!name) { | 257 | if (vma->vm_start <= mm->brk && |
310 | pid_t tid; | 258 | vma->vm_end >= mm->start_brk) { |
311 | 259 | name = "[heap]"; | |
312 | if (!mm) { | 260 | } else if (vma->vm_start <= mm->start_stack && |
313 | name = "[vdso]"; | 261 | vma->vm_end >= mm->start_stack) { |
314 | goto done; | 262 | name = "[stack]"; |
315 | } | 263 | } |
316 | |||
317 | if (vma->vm_start <= mm->brk && | ||
318 | vma->vm_end >= mm->start_brk) { | ||
319 | name = "[heap]"; | ||
320 | goto done; | ||
321 | } | ||
322 | |||
323 | tid = vm_is_stack(task, vma, is_pid); | ||
324 | |||
325 | if (tid != 0) { | ||
326 | /* | ||
327 | * Thread stack in /proc/PID/task/TID/maps or | ||
328 | * the main process stack. | ||
329 | */ | ||
330 | if (!is_pid || (vma->vm_start <= mm->start_stack && | ||
331 | vma->vm_end >= mm->start_stack)) { | ||
332 | name = "[stack]"; | ||
333 | } else { | 264 | } else { |
334 | /* Thread stack in /proc/PID/maps */ | 265 | name = "[vdso]"; |
335 | pad_len_spaces(m, len); | ||
336 | seq_printf(m, "[stack:%d]", tid); | ||
337 | } | 266 | } |
338 | } | 267 | } |
339 | } | 268 | if (name) { |
340 | 269 | pad_len_spaces(m, len); | |
341 | done: | 270 | seq_puts(m, name); |
342 | if (name) { | 271 | } |
343 | pad_len_spaces(m, len); | ||
344 | seq_puts(m, name); | ||
345 | } | 272 | } |
346 | seq_putc(m, '\n'); | 273 | seq_putc(m, '\n'); |
347 | } | 274 | } |
348 | 275 | ||
349 | static int show_map(struct seq_file *m, void *v, int is_pid) | 276 | static int show_map(struct seq_file *m, void *v) |
350 | { | 277 | { |
351 | struct vm_area_struct *vma = v; | 278 | struct vm_area_struct *vma = v; |
352 | struct proc_maps_private *priv = m->private; | 279 | struct proc_maps_private *priv = m->private; |
353 | struct task_struct *task = priv->task; | 280 | struct task_struct *task = priv->task; |
354 | 281 | ||
355 | show_map_vma(m, vma, is_pid); | 282 | show_map_vma(m, vma); |
356 | 283 | ||
357 | if (m->count < m->size) /* vma is copied successfully */ | 284 | if (m->count < m->size) /* vma is copied successfully */ |
358 | m->version = (vma != get_gate_vma(task->mm)) | 285 | m->version = (vma != get_gate_vma(task->mm)) |
@@ -360,49 +287,20 @@ static int show_map(struct seq_file *m, void *v, int is_pid) | |||
360 | return 0; | 287 | return 0; |
361 | } | 288 | } |
362 | 289 | ||
363 | static int show_pid_map(struct seq_file *m, void *v) | ||
364 | { | ||
365 | return show_map(m, v, 1); | ||
366 | } | ||
367 | |||
368 | static int show_tid_map(struct seq_file *m, void *v) | ||
369 | { | ||
370 | return show_map(m, v, 0); | ||
371 | } | ||
372 | |||
373 | static const struct seq_operations proc_pid_maps_op = { | 290 | static const struct seq_operations proc_pid_maps_op = { |
374 | .start = m_start, | 291 | .start = m_start, |
375 | .next = m_next, | 292 | .next = m_next, |
376 | .stop = m_stop, | 293 | .stop = m_stop, |
377 | .show = show_pid_map | 294 | .show = show_map |
378 | }; | ||
379 | |||
380 | static const struct seq_operations proc_tid_maps_op = { | ||
381 | .start = m_start, | ||
382 | .next = m_next, | ||
383 | .stop = m_stop, | ||
384 | .show = show_tid_map | ||
385 | }; | 295 | }; |
386 | 296 | ||
387 | static int pid_maps_open(struct inode *inode, struct file *file) | 297 | static int maps_open(struct inode *inode, struct file *file) |
388 | { | 298 | { |
389 | return do_maps_open(inode, file, &proc_pid_maps_op); | 299 | return do_maps_open(inode, file, &proc_pid_maps_op); |
390 | } | 300 | } |
391 | 301 | ||
392 | static int tid_maps_open(struct inode *inode, struct file *file) | 302 | const struct file_operations proc_maps_operations = { |
393 | { | 303 | .open = maps_open, |
394 | return do_maps_open(inode, file, &proc_tid_maps_op); | ||
395 | } | ||
396 | |||
397 | const struct file_operations proc_pid_maps_operations = { | ||
398 | .open = pid_maps_open, | ||
399 | .read = seq_read, | ||
400 | .llseek = seq_lseek, | ||
401 | .release = seq_release_private, | ||
402 | }; | ||
403 | |||
404 | const struct file_operations proc_tid_maps_operations = { | ||
405 | .open = tid_maps_open, | ||
406 | .read = seq_read, | 304 | .read = seq_read, |
407 | .llseek = seq_lseek, | 305 | .llseek = seq_lseek, |
408 | .release = seq_release_private, | 306 | .release = seq_release_private, |
@@ -439,7 +337,6 @@ struct mem_size_stats { | |||
439 | unsigned long anonymous; | 337 | unsigned long anonymous; |
440 | unsigned long anonymous_thp; | 338 | unsigned long anonymous_thp; |
441 | unsigned long swap; | 339 | unsigned long swap; |
442 | unsigned long nonlinear; | ||
443 | u64 pss; | 340 | u64 pss; |
444 | }; | 341 | }; |
445 | 342 | ||
@@ -449,33 +346,24 @@ static void smaps_pte_entry(pte_t ptent, unsigned long addr, | |||
449 | { | 346 | { |
450 | struct mem_size_stats *mss = walk->private; | 347 | struct mem_size_stats *mss = walk->private; |
451 | struct vm_area_struct *vma = mss->vma; | 348 | struct vm_area_struct *vma = mss->vma; |
452 | pgoff_t pgoff = linear_page_index(vma, addr); | 349 | struct page *page; |
453 | struct page *page = NULL; | ||
454 | int mapcount; | 350 | int mapcount; |
455 | 351 | ||
456 | if (pte_present(ptent)) { | 352 | if (is_swap_pte(ptent)) { |
457 | page = vm_normal_page(vma, addr, ptent); | 353 | mss->swap += ptent_size; |
458 | } else if (is_swap_pte(ptent)) { | 354 | return; |
459 | swp_entry_t swpent = pte_to_swp_entry(ptent); | ||
460 | |||
461 | if (!non_swap_entry(swpent)) | ||
462 | mss->swap += ptent_size; | ||
463 | else if (is_migration_entry(swpent)) | ||
464 | page = migration_entry_to_page(swpent); | ||
465 | } else if (pte_file(ptent)) { | ||
466 | if (pte_to_pgoff(ptent) != pgoff) | ||
467 | mss->nonlinear += ptent_size; | ||
468 | } | 355 | } |
469 | 356 | ||
357 | if (!pte_present(ptent)) | ||
358 | return; | ||
359 | |||
360 | page = vm_normal_page(vma, addr, ptent); | ||
470 | if (!page) | 361 | if (!page) |
471 | return; | 362 | return; |
472 | 363 | ||
473 | if (PageAnon(page)) | 364 | if (PageAnon(page)) |
474 | mss->anonymous += ptent_size; | 365 | mss->anonymous += ptent_size; |
475 | 366 | ||
476 | if (page->index != pgoff) | ||
477 | mss->nonlinear += ptent_size; | ||
478 | |||
479 | mss->resident += ptent_size; | 367 | mss->resident += ptent_size; |
480 | /* Accumulate the size in pages that have been accessed. */ | 368 | /* Accumulate the size in pages that have been accessed. */ |
481 | if (pte_young(ptent) || PageReferenced(page)) | 369 | if (pte_young(ptent) || PageReferenced(page)) |
@@ -504,15 +392,21 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
504 | pte_t *pte; | 392 | pte_t *pte; |
505 | spinlock_t *ptl; | 393 | spinlock_t *ptl; |
506 | 394 | ||
507 | if (pmd_trans_huge_lock(pmd, vma) == 1) { | 395 | spin_lock(&walk->mm->page_table_lock); |
508 | smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); | 396 | if (pmd_trans_huge(*pmd)) { |
397 | if (pmd_trans_splitting(*pmd)) { | ||
398 | spin_unlock(&walk->mm->page_table_lock); | ||
399 | wait_split_huge_page(vma->anon_vma, pmd); | ||
400 | } else { | ||
401 | smaps_pte_entry(*(pte_t *)pmd, addr, | ||
402 | HPAGE_PMD_SIZE, walk); | ||
403 | spin_unlock(&walk->mm->page_table_lock); | ||
404 | mss->anonymous_thp += HPAGE_PMD_SIZE; | ||
405 | return 0; | ||
406 | } | ||
407 | } else { | ||
509 | spin_unlock(&walk->mm->page_table_lock); | 408 | spin_unlock(&walk->mm->page_table_lock); |
510 | mss->anonymous_thp += HPAGE_PMD_SIZE; | ||
511 | return 0; | ||
512 | } | 409 | } |
513 | |||
514 | if (pmd_trans_unstable(pmd)) | ||
515 | return 0; | ||
516 | /* | 410 | /* |
517 | * The mmap_sem held all the way back in m_start() is what | 411 | * The mmap_sem held all the way back in m_start() is what |
518 | * keeps khugepaged out of here and from collapsing things | 412 | * keeps khugepaged out of here and from collapsing things |
@@ -526,58 +420,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
526 | return 0; | 420 | return 0; |
527 | } | 421 | } |
528 | 422 | ||
529 | static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) | 423 | static int show_smap(struct seq_file *m, void *v) |
530 | { | ||
531 | /* | ||
532 | * Don't forget to update Documentation/ on changes. | ||
533 | */ | ||
534 | static const char mnemonics[BITS_PER_LONG][2] = { | ||
535 | /* | ||
536 | * In case if we meet a flag we don't know about. | ||
537 | */ | ||
538 | [0 ... (BITS_PER_LONG-1)] = "??", | ||
539 | |||
540 | [ilog2(VM_READ)] = "rd", | ||
541 | [ilog2(VM_WRITE)] = "wr", | ||
542 | [ilog2(VM_EXEC)] = "ex", | ||
543 | [ilog2(VM_SHARED)] = "sh", | ||
544 | [ilog2(VM_MAYREAD)] = "mr", | ||
545 | [ilog2(VM_MAYWRITE)] = "mw", | ||
546 | [ilog2(VM_MAYEXEC)] = "me", | ||
547 | [ilog2(VM_MAYSHARE)] = "ms", | ||
548 | [ilog2(VM_GROWSDOWN)] = "gd", | ||
549 | [ilog2(VM_PFNMAP)] = "pf", | ||
550 | [ilog2(VM_DENYWRITE)] = "dw", | ||
551 | [ilog2(VM_LOCKED)] = "lo", | ||
552 | [ilog2(VM_IO)] = "io", | ||
553 | [ilog2(VM_SEQ_READ)] = "sr", | ||
554 | [ilog2(VM_RAND_READ)] = "rr", | ||
555 | [ilog2(VM_DONTCOPY)] = "dc", | ||
556 | [ilog2(VM_DONTEXPAND)] = "de", | ||
557 | [ilog2(VM_ACCOUNT)] = "ac", | ||
558 | [ilog2(VM_NORESERVE)] = "nr", | ||
559 | [ilog2(VM_HUGETLB)] = "ht", | ||
560 | [ilog2(VM_NONLINEAR)] = "nl", | ||
561 | [ilog2(VM_ARCH_1)] = "ar", | ||
562 | [ilog2(VM_DONTDUMP)] = "dd", | ||
563 | [ilog2(VM_MIXEDMAP)] = "mm", | ||
564 | [ilog2(VM_HUGEPAGE)] = "hg", | ||
565 | [ilog2(VM_NOHUGEPAGE)] = "nh", | ||
566 | [ilog2(VM_MERGEABLE)] = "mg", | ||
567 | }; | ||
568 | size_t i; | ||
569 | |||
570 | seq_puts(m, "VmFlags: "); | ||
571 | for (i = 0; i < BITS_PER_LONG; i++) { | ||
572 | if (vma->vm_flags & (1UL << i)) { | ||
573 | seq_printf(m, "%c%c ", | ||
574 | mnemonics[i][0], mnemonics[i][1]); | ||
575 | } | ||
576 | } | ||
577 | seq_putc(m, '\n'); | ||
578 | } | ||
579 | |||
580 | static int show_smap(struct seq_file *m, void *v, int is_pid) | ||
581 | { | 424 | { |
582 | struct proc_maps_private *priv = m->private; | 425 | struct proc_maps_private *priv = m->private; |
583 | struct task_struct *task = priv->task; | 426 | struct task_struct *task = priv->task; |
@@ -595,7 +438,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) | |||
595 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | 438 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) |
596 | walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); | 439 | walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); |
597 | 440 | ||
598 | show_map_vma(m, vma, is_pid); | 441 | show_map_vma(m, vma); |
599 | 442 | ||
600 | seq_printf(m, | 443 | seq_printf(m, |
601 | "Size: %8lu kB\n" | 444 | "Size: %8lu kB\n" |
@@ -628,61 +471,26 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) | |||
628 | (vma->vm_flags & VM_LOCKED) ? | 471 | (vma->vm_flags & VM_LOCKED) ? |
629 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); | 472 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); |
630 | 473 | ||
631 | if (vma->vm_flags & VM_NONLINEAR) | ||
632 | seq_printf(m, "Nonlinear: %8lu kB\n", | ||
633 | mss.nonlinear >> 10); | ||
634 | |||
635 | show_smap_vma_flags(m, vma); | ||
636 | |||
637 | if (m->count < m->size) /* vma is copied successfully */ | 474 | if (m->count < m->size) /* vma is copied successfully */ |
638 | m->version = (vma != get_gate_vma(task->mm)) | 475 | m->version = (vma != get_gate_vma(task->mm)) |
639 | ? vma->vm_start : 0; | 476 | ? vma->vm_start : 0; |
640 | return 0; | 477 | return 0; |
641 | } | 478 | } |
642 | 479 | ||
643 | static int show_pid_smap(struct seq_file *m, void *v) | ||
644 | { | ||
645 | return show_smap(m, v, 1); | ||
646 | } | ||
647 | |||
648 | static int show_tid_smap(struct seq_file *m, void *v) | ||
649 | { | ||
650 | return show_smap(m, v, 0); | ||
651 | } | ||
652 | |||
653 | static const struct seq_operations proc_pid_smaps_op = { | 480 | static const struct seq_operations proc_pid_smaps_op = { |
654 | .start = m_start, | 481 | .start = m_start, |
655 | .next = m_next, | 482 | .next = m_next, |
656 | .stop = m_stop, | 483 | .stop = m_stop, |
657 | .show = show_pid_smap | 484 | .show = show_smap |
658 | }; | ||
659 | |||
660 | static const struct seq_operations proc_tid_smaps_op = { | ||
661 | .start = m_start, | ||
662 | .next = m_next, | ||
663 | .stop = m_stop, | ||
664 | .show = show_tid_smap | ||
665 | }; | 485 | }; |
666 | 486 | ||
667 | static int pid_smaps_open(struct inode *inode, struct file *file) | 487 | static int smaps_open(struct inode *inode, struct file *file) |
668 | { | 488 | { |
669 | return do_maps_open(inode, file, &proc_pid_smaps_op); | 489 | return do_maps_open(inode, file, &proc_pid_smaps_op); |
670 | } | 490 | } |
671 | 491 | ||
672 | static int tid_smaps_open(struct inode *inode, struct file *file) | 492 | const struct file_operations proc_smaps_operations = { |
673 | { | 493 | .open = smaps_open, |
674 | return do_maps_open(inode, file, &proc_tid_smaps_op); | ||
675 | } | ||
676 | |||
677 | const struct file_operations proc_pid_smaps_operations = { | ||
678 | .open = pid_smaps_open, | ||
679 | .read = seq_read, | ||
680 | .llseek = seq_lseek, | ||
681 | .release = seq_release_private, | ||
682 | }; | ||
683 | |||
684 | const struct file_operations proc_tid_smaps_operations = { | ||
685 | .open = tid_smaps_open, | ||
686 | .read = seq_read, | 494 | .read = seq_read, |
687 | .llseek = seq_lseek, | 495 | .llseek = seq_lseek, |
688 | .release = seq_release_private, | 496 | .release = seq_release_private, |
@@ -696,9 +504,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
696 | spinlock_t *ptl; | 504 | spinlock_t *ptl; |
697 | struct page *page; | 505 | struct page *page; |
698 | 506 | ||
699 | split_huge_page_pmd(vma, addr, pmd); | 507 | split_huge_page_pmd(walk->mm, pmd); |
700 | if (pmd_trans_unstable(pmd)) | ||
701 | return 0; | ||
702 | 508 | ||
703 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 509 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
704 | for (; addr != end; pte++, addr += PAGE_SIZE) { | 510 | for (; addr != end; pte++, addr += PAGE_SIZE) { |
@@ -787,18 +593,11 @@ const struct file_operations proc_clear_refs_operations = { | |||
787 | .llseek = noop_llseek, | 593 | .llseek = noop_llseek, |
788 | }; | 594 | }; |
789 | 595 | ||
790 | typedef struct { | ||
791 | u64 pme; | ||
792 | } pagemap_entry_t; | ||
793 | |||
794 | struct pagemapread { | 596 | struct pagemapread { |
795 | int pos, len; | 597 | int pos, len; |
796 | pagemap_entry_t *buffer; | 598 | u64 *buffer; |
797 | }; | 599 | }; |
798 | 600 | ||
799 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) | ||
800 | #define PAGEMAP_WALK_MASK (PMD_MASK) | ||
801 | |||
802 | #define PM_ENTRY_BYTES sizeof(u64) | 601 | #define PM_ENTRY_BYTES sizeof(u64) |
803 | #define PM_STATUS_BITS 3 | 602 | #define PM_STATUS_BITS 3 |
804 | #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) | 603 | #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) |
@@ -813,19 +612,13 @@ struct pagemapread { | |||
813 | 612 | ||
814 | #define PM_PRESENT PM_STATUS(4LL) | 613 | #define PM_PRESENT PM_STATUS(4LL) |
815 | #define PM_SWAP PM_STATUS(2LL) | 614 | #define PM_SWAP PM_STATUS(2LL) |
816 | #define PM_FILE PM_STATUS(1LL) | ||
817 | #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) | 615 | #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) |
818 | #define PM_END_OF_BUFFER 1 | 616 | #define PM_END_OF_BUFFER 1 |
819 | 617 | ||
820 | static inline pagemap_entry_t make_pme(u64 val) | 618 | static int add_to_pagemap(unsigned long addr, u64 pfn, |
821 | { | ||
822 | return (pagemap_entry_t) { .pme = val }; | ||
823 | } | ||
824 | |||
825 | static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme, | ||
826 | struct pagemapread *pm) | 619 | struct pagemapread *pm) |
827 | { | 620 | { |
828 | pm->buffer[pm->pos++] = *pme; | 621 | pm->buffer[pm->pos++] = pfn; |
829 | if (pm->pos >= pm->len) | 622 | if (pm->pos >= pm->len) |
830 | return PM_END_OF_BUFFER; | 623 | return PM_END_OF_BUFFER; |
831 | return 0; | 624 | return 0; |
@@ -837,66 +630,31 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, | |||
837 | struct pagemapread *pm = walk->private; | 630 | struct pagemapread *pm = walk->private; |
838 | unsigned long addr; | 631 | unsigned long addr; |
839 | int err = 0; | 632 | int err = 0; |
840 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); | ||
841 | |||
842 | for (addr = start; addr < end; addr += PAGE_SIZE) { | 633 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
843 | err = add_to_pagemap(addr, &pme, pm); | 634 | err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); |
844 | if (err) | 635 | if (err) |
845 | break; | 636 | break; |
846 | } | 637 | } |
847 | return err; | 638 | return err; |
848 | } | 639 | } |
849 | 640 | ||
850 | static void pte_to_pagemap_entry(pagemap_entry_t *pme, | 641 | static u64 swap_pte_to_pagemap_entry(pte_t pte) |
851 | struct vm_area_struct *vma, unsigned long addr, pte_t pte) | ||
852 | { | 642 | { |
853 | u64 frame, flags; | 643 | swp_entry_t e = pte_to_swp_entry(pte); |
854 | struct page *page = NULL; | 644 | return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); |
855 | |||
856 | if (pte_present(pte)) { | ||
857 | frame = pte_pfn(pte); | ||
858 | flags = PM_PRESENT; | ||
859 | page = vm_normal_page(vma, addr, pte); | ||
860 | } else if (is_swap_pte(pte)) { | ||
861 | swp_entry_t entry = pte_to_swp_entry(pte); | ||
862 | |||
863 | frame = swp_type(entry) | | ||
864 | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); | ||
865 | flags = PM_SWAP; | ||
866 | if (is_migration_entry(entry)) | ||
867 | page = migration_entry_to_page(entry); | ||
868 | } else { | ||
869 | *pme = make_pme(PM_NOT_PRESENT); | ||
870 | return; | ||
871 | } | ||
872 | |||
873 | if (page && !PageAnon(page)) | ||
874 | flags |= PM_FILE; | ||
875 | |||
876 | *pme = make_pme(PM_PFRAME(frame) | PM_PSHIFT(PAGE_SHIFT) | flags); | ||
877 | } | 645 | } |
878 | 646 | ||
879 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 647 | static u64 pte_to_pagemap_entry(pte_t pte) |
880 | static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | ||
881 | pmd_t pmd, int offset) | ||
882 | { | ||
883 | /* | ||
884 | * Currently pmd for thp is always present because thp can not be | ||
885 | * swapped-out, migrated, or HWPOISONed (split in such cases instead.) | ||
886 | * This if-check is just to prepare for future implementation. | ||
887 | */ | ||
888 | if (pmd_present(pmd)) | ||
889 | *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) | ||
890 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); | ||
891 | else | ||
892 | *pme = make_pme(PM_NOT_PRESENT); | ||
893 | } | ||
894 | #else | ||
895 | static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | ||
896 | pmd_t pmd, int offset) | ||
897 | { | 648 | { |
649 | u64 pme = 0; | ||
650 | if (is_swap_pte(pte)) | ||
651 | pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) | ||
652 | | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; | ||
653 | else if (pte_present(pte)) | ||
654 | pme = PM_PFRAME(pte_pfn(pte)) | ||
655 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; | ||
656 | return pme; | ||
898 | } | 657 | } |
899 | #endif | ||
900 | 658 | ||
901 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 659 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
902 | struct mm_walk *walk) | 660 | struct mm_walk *walk) |
@@ -905,46 +663,29 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
905 | struct pagemapread *pm = walk->private; | 663 | struct pagemapread *pm = walk->private; |
906 | pte_t *pte; | 664 | pte_t *pte; |
907 | int err = 0; | 665 | int err = 0; |
908 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); | 666 | |
667 | split_huge_page_pmd(walk->mm, pmd); | ||
909 | 668 | ||
910 | /* find the first VMA at or above 'addr' */ | 669 | /* find the first VMA at or above 'addr' */ |
911 | vma = find_vma(walk->mm, addr); | 670 | vma = find_vma(walk->mm, addr); |
912 | if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { | ||
913 | for (; addr != end; addr += PAGE_SIZE) { | ||
914 | unsigned long offset; | ||
915 | |||
916 | offset = (addr & ~PAGEMAP_WALK_MASK) >> | ||
917 | PAGE_SHIFT; | ||
918 | thp_pmd_to_pagemap_entry(&pme, *pmd, offset); | ||
919 | err = add_to_pagemap(addr, &pme, pm); | ||
920 | if (err) | ||
921 | break; | ||
922 | } | ||
923 | spin_unlock(&walk->mm->page_table_lock); | ||
924 | return err; | ||
925 | } | ||
926 | |||
927 | if (pmd_trans_unstable(pmd)) | ||
928 | return 0; | ||
929 | for (; addr != end; addr += PAGE_SIZE) { | 671 | for (; addr != end; addr += PAGE_SIZE) { |
672 | u64 pfn = PM_NOT_PRESENT; | ||
930 | 673 | ||
931 | /* check to see if we've left 'vma' behind | 674 | /* check to see if we've left 'vma' behind |
932 | * and need a new, higher one */ | 675 | * and need a new, higher one */ |
933 | if (vma && (addr >= vma->vm_end)) { | 676 | if (vma && (addr >= vma->vm_end)) |
934 | vma = find_vma(walk->mm, addr); | 677 | vma = find_vma(walk->mm, addr); |
935 | pme = make_pme(PM_NOT_PRESENT); | ||
936 | } | ||
937 | 678 | ||
938 | /* check that 'vma' actually covers this address, | 679 | /* check that 'vma' actually covers this address, |
939 | * and that it isn't a huge page vma */ | 680 | * and that it isn't a huge page vma */ |
940 | if (vma && (vma->vm_start <= addr) && | 681 | if (vma && (vma->vm_start <= addr) && |
941 | !is_vm_hugetlb_page(vma)) { | 682 | !is_vm_hugetlb_page(vma)) { |
942 | pte = pte_offset_map(pmd, addr); | 683 | pte = pte_offset_map(pmd, addr); |
943 | pte_to_pagemap_entry(&pme, vma, addr, *pte); | 684 | pfn = pte_to_pagemap_entry(*pte); |
944 | /* unmap before userspace copy */ | 685 | /* unmap before userspace copy */ |
945 | pte_unmap(pte); | 686 | pte_unmap(pte); |
946 | } | 687 | } |
947 | err = add_to_pagemap(addr, &pme, pm); | 688 | err = add_to_pagemap(addr, pfn, pm); |
948 | if (err) | 689 | if (err) |
949 | return err; | 690 | return err; |
950 | } | 691 | } |
@@ -955,14 +696,13 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
955 | } | 696 | } |
956 | 697 | ||
957 | #ifdef CONFIG_HUGETLB_PAGE | 698 | #ifdef CONFIG_HUGETLB_PAGE |
958 | static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, | 699 | static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) |
959 | pte_t pte, int offset) | ||
960 | { | 700 | { |
701 | u64 pme = 0; | ||
961 | if (pte_present(pte)) | 702 | if (pte_present(pte)) |
962 | *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) | 703 | pme = PM_PFRAME(pte_pfn(pte) + offset) |
963 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); | 704 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; |
964 | else | 705 | return pme; |
965 | *pme = make_pme(PM_NOT_PRESENT); | ||
966 | } | 706 | } |
967 | 707 | ||
968 | /* This function walks within one hugetlb entry in the single call */ | 708 | /* This function walks within one hugetlb entry in the single call */ |
@@ -972,12 +712,12 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
972 | { | 712 | { |
973 | struct pagemapread *pm = walk->private; | 713 | struct pagemapread *pm = walk->private; |
974 | int err = 0; | 714 | int err = 0; |
975 | pagemap_entry_t pme; | 715 | u64 pfn; |
976 | 716 | ||
977 | for (; addr != end; addr += PAGE_SIZE) { | 717 | for (; addr != end; addr += PAGE_SIZE) { |
978 | int offset = (addr & ~hmask) >> PAGE_SHIFT; | 718 | int offset = (addr & ~hmask) >> PAGE_SHIFT; |
979 | huge_pte_to_pagemap_entry(&pme, *pte, offset); | 719 | pfn = huge_pte_to_pagemap_entry(*pte, offset); |
980 | err = add_to_pagemap(addr, &pme, pm); | 720 | err = add_to_pagemap(addr, pfn, pm); |
981 | if (err) | 721 | if (err) |
982 | return err; | 722 | return err; |
983 | } | 723 | } |
@@ -994,11 +734,11 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
994 | * For each page in the address space, this file contains one 64-bit entry | 734 | * For each page in the address space, this file contains one 64-bit entry |
995 | * consisting of the following: | 735 | * consisting of the following: |
996 | * | 736 | * |
997 | * Bits 0-54 page frame number (PFN) if present | 737 | * Bits 0-55 page frame number (PFN) if present |
998 | * Bits 0-4 swap type if swapped | 738 | * Bits 0-4 swap type if swapped |
999 | * Bits 5-54 swap offset if swapped | 739 | * Bits 5-55 swap offset if swapped |
1000 | * Bits 55-60 page shift (page size = 1<<page shift) | 740 | * Bits 55-60 page shift (page size = 1<<page shift) |
1001 | * Bit 61 page is file-page or shared-anon | 741 | * Bit 61 reserved for future use |
1002 | * Bit 62 page swapped | 742 | * Bit 62 page swapped |
1003 | * Bit 63 page present | 743 | * Bit 63 page present |
1004 | * | 744 | * |
@@ -1012,6 +752,8 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
1012 | * determine which areas of memory are actually mapped and llseek to | 752 | * determine which areas of memory are actually mapped and llseek to |
1013 | * skip over unmapped regions. | 753 | * skip over unmapped regions. |
1014 | */ | 754 | */ |
755 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) | ||
756 | #define PAGEMAP_WALK_MASK (PMD_MASK) | ||
1015 | static ssize_t pagemap_read(struct file *file, char __user *buf, | 757 | static ssize_t pagemap_read(struct file *file, char __user *buf, |
1016 | size_t count, loff_t *ppos) | 758 | size_t count, loff_t *ppos) |
1017 | { | 759 | { |
@@ -1044,7 +786,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
1044 | if (!pm.buffer) | 786 | if (!pm.buffer) |
1045 | goto out_task; | 787 | goto out_task; |
1046 | 788 | ||
1047 | mm = mm_access(task, PTRACE_MODE_READ); | 789 | mm = mm_for_maps(task); |
1048 | ret = PTR_ERR(mm); | 790 | ret = PTR_ERR(mm); |
1049 | if (!mm || IS_ERR(mm)) | 791 | if (!mm || IS_ERR(mm)) |
1050 | goto out_free; | 792 | goto out_free; |
@@ -1179,7 +921,7 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, | |||
1179 | return NULL; | 921 | return NULL; |
1180 | 922 | ||
1181 | nid = page_to_nid(page); | 923 | nid = page_to_nid(page); |
1182 | if (!node_isset(nid, node_states[N_MEMORY])) | 924 | if (!node_isset(nid, node_states[N_HIGH_MEMORY])) |
1183 | return NULL; | 925 | return NULL; |
1184 | 926 | ||
1185 | return page; | 927 | return page; |
@@ -1194,21 +936,26 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | |||
1194 | pte_t *pte; | 936 | pte_t *pte; |
1195 | 937 | ||
1196 | md = walk->private; | 938 | md = walk->private; |
1197 | 939 | spin_lock(&walk->mm->page_table_lock); | |
1198 | if (pmd_trans_huge_lock(pmd, md->vma) == 1) { | 940 | if (pmd_trans_huge(*pmd)) { |
1199 | pte_t huge_pte = *(pte_t *)pmd; | 941 | if (pmd_trans_splitting(*pmd)) { |
1200 | struct page *page; | 942 | spin_unlock(&walk->mm->page_table_lock); |
1201 | 943 | wait_split_huge_page(md->vma->anon_vma, pmd); | |
1202 | page = can_gather_numa_stats(huge_pte, md->vma, addr); | 944 | } else { |
1203 | if (page) | 945 | pte_t huge_pte = *(pte_t *)pmd; |
1204 | gather_stats(page, md, pte_dirty(huge_pte), | 946 | struct page *page; |
1205 | HPAGE_PMD_SIZE/PAGE_SIZE); | 947 | |
948 | page = can_gather_numa_stats(huge_pte, md->vma, addr); | ||
949 | if (page) | ||
950 | gather_stats(page, md, pte_dirty(huge_pte), | ||
951 | HPAGE_PMD_SIZE/PAGE_SIZE); | ||
952 | spin_unlock(&walk->mm->page_table_lock); | ||
953 | return 0; | ||
954 | } | ||
955 | } else { | ||
1206 | spin_unlock(&walk->mm->page_table_lock); | 956 | spin_unlock(&walk->mm->page_table_lock); |
1207 | return 0; | ||
1208 | } | 957 | } |
1209 | 958 | ||
1210 | if (pmd_trans_unstable(pmd)) | ||
1211 | return 0; | ||
1212 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); | 959 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); |
1213 | do { | 960 | do { |
1214 | struct page *page = can_gather_numa_stats(*pte, md->vma, addr); | 961 | struct page *page = can_gather_numa_stats(*pte, md->vma, addr); |
@@ -1250,14 +997,13 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | |||
1250 | /* | 997 | /* |
1251 | * Display pages allocated per node and memory policy via /proc. | 998 | * Display pages allocated per node and memory policy via /proc. |
1252 | */ | 999 | */ |
1253 | static int show_numa_map(struct seq_file *m, void *v, int is_pid) | 1000 | static int show_numa_map(struct seq_file *m, void *v) |
1254 | { | 1001 | { |
1255 | struct numa_maps_private *numa_priv = m->private; | 1002 | struct numa_maps_private *numa_priv = m->private; |
1256 | struct proc_maps_private *proc_priv = &numa_priv->proc_maps; | 1003 | struct proc_maps_private *proc_priv = &numa_priv->proc_maps; |
1257 | struct vm_area_struct *vma = v; | 1004 | struct vm_area_struct *vma = v; |
1258 | struct numa_maps *md = &numa_priv->md; | 1005 | struct numa_maps *md = &numa_priv->md; |
1259 | struct file *file = vma->vm_file; | 1006 | struct file *file = vma->vm_file; |
1260 | struct task_struct *task = proc_priv->task; | ||
1261 | struct mm_struct *mm = vma->vm_mm; | 1007 | struct mm_struct *mm = vma->vm_mm; |
1262 | struct mm_walk walk = {}; | 1008 | struct mm_walk walk = {}; |
1263 | struct mempolicy *pol; | 1009 | struct mempolicy *pol; |
@@ -1277,8 +1023,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1277 | walk.private = md; | 1023 | walk.private = md; |
1278 | walk.mm = mm; | 1024 | walk.mm = mm; |
1279 | 1025 | ||
1280 | pol = get_vma_policy(task, vma, vma->vm_start); | 1026 | pol = get_vma_policy(proc_priv->task, vma, vma->vm_start); |
1281 | mpol_to_str(buffer, sizeof(buffer), pol); | 1027 | mpol_to_str(buffer, sizeof(buffer), pol, 0); |
1282 | mpol_cond_put(pol); | 1028 | mpol_cond_put(pol); |
1283 | 1029 | ||
1284 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); | 1030 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); |
@@ -1288,19 +1034,9 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1288 | seq_path(m, &file->f_path, "\n\t= "); | 1034 | seq_path(m, &file->f_path, "\n\t= "); |
1289 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { | 1035 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { |
1290 | seq_printf(m, " heap"); | 1036 | seq_printf(m, " heap"); |
1291 | } else { | 1037 | } else if (vma->vm_start <= mm->start_stack && |
1292 | pid_t tid = vm_is_stack(task, vma, is_pid); | 1038 | vma->vm_end >= mm->start_stack) { |
1293 | if (tid != 0) { | 1039 | seq_printf(m, " stack"); |
1294 | /* | ||
1295 | * Thread stack in /proc/PID/task/TID/maps or | ||
1296 | * the main process stack. | ||
1297 | */ | ||
1298 | if (!is_pid || (vma->vm_start <= mm->start_stack && | ||
1299 | vma->vm_end >= mm->start_stack)) | ||
1300 | seq_printf(m, " stack"); | ||
1301 | else | ||
1302 | seq_printf(m, " stack:%d", tid); | ||
1303 | } | ||
1304 | } | 1040 | } |
1305 | 1041 | ||
1306 | if (is_vm_hugetlb_page(vma)) | 1042 | if (is_vm_hugetlb_page(vma)) |
@@ -1332,7 +1068,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1332 | if (md->writeback) | 1068 | if (md->writeback) |
1333 | seq_printf(m, " writeback=%lu", md->writeback); | 1069 | seq_printf(m, " writeback=%lu", md->writeback); |
1334 | 1070 | ||
1335 | for_each_node_state(n, N_MEMORY) | 1071 | for_each_node_state(n, N_HIGH_MEMORY) |
1336 | if (md->node[n]) | 1072 | if (md->node[n]) |
1337 | seq_printf(m, " N%d=%lu", n, md->node[n]); | 1073 | seq_printf(m, " N%d=%lu", n, md->node[n]); |
1338 | out: | 1074 | out: |
@@ -1343,39 +1079,21 @@ out: | |||
1343 | return 0; | 1079 | return 0; |
1344 | } | 1080 | } |
1345 | 1081 | ||
1346 | static int show_pid_numa_map(struct seq_file *m, void *v) | ||
1347 | { | ||
1348 | return show_numa_map(m, v, 1); | ||
1349 | } | ||
1350 | |||
1351 | static int show_tid_numa_map(struct seq_file *m, void *v) | ||
1352 | { | ||
1353 | return show_numa_map(m, v, 0); | ||
1354 | } | ||
1355 | |||
1356 | static const struct seq_operations proc_pid_numa_maps_op = { | 1082 | static const struct seq_operations proc_pid_numa_maps_op = { |
1357 | .start = m_start, | 1083 | .start = m_start, |
1358 | .next = m_next, | 1084 | .next = m_next, |
1359 | .stop = m_stop, | 1085 | .stop = m_stop, |
1360 | .show = show_pid_numa_map, | 1086 | .show = show_numa_map, |
1361 | }; | ||
1362 | |||
1363 | static const struct seq_operations proc_tid_numa_maps_op = { | ||
1364 | .start = m_start, | ||
1365 | .next = m_next, | ||
1366 | .stop = m_stop, | ||
1367 | .show = show_tid_numa_map, | ||
1368 | }; | 1087 | }; |
1369 | 1088 | ||
1370 | static int numa_maps_open(struct inode *inode, struct file *file, | 1089 | static int numa_maps_open(struct inode *inode, struct file *file) |
1371 | const struct seq_operations *ops) | ||
1372 | { | 1090 | { |
1373 | struct numa_maps_private *priv; | 1091 | struct numa_maps_private *priv; |
1374 | int ret = -ENOMEM; | 1092 | int ret = -ENOMEM; |
1375 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | 1093 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); |
1376 | if (priv) { | 1094 | if (priv) { |
1377 | priv->proc_maps.pid = proc_pid(inode); | 1095 | priv->proc_maps.pid = proc_pid(inode); |
1378 | ret = seq_open(file, ops); | 1096 | ret = seq_open(file, &proc_pid_numa_maps_op); |
1379 | if (!ret) { | 1097 | if (!ret) { |
1380 | struct seq_file *m = file->private_data; | 1098 | struct seq_file *m = file->private_data; |
1381 | m->private = priv; | 1099 | m->private = priv; |
@@ -1386,25 +1104,8 @@ static int numa_maps_open(struct inode *inode, struct file *file, | |||
1386 | return ret; | 1104 | return ret; |
1387 | } | 1105 | } |
1388 | 1106 | ||
1389 | static int pid_numa_maps_open(struct inode *inode, struct file *file) | 1107 | const struct file_operations proc_numa_maps_operations = { |
1390 | { | 1108 | .open = numa_maps_open, |
1391 | return numa_maps_open(inode, file, &proc_pid_numa_maps_op); | ||
1392 | } | ||
1393 | |||
1394 | static int tid_numa_maps_open(struct inode *inode, struct file *file) | ||
1395 | { | ||
1396 | return numa_maps_open(inode, file, &proc_tid_numa_maps_op); | ||
1397 | } | ||
1398 | |||
1399 | const struct file_operations proc_pid_numa_maps_operations = { | ||
1400 | .open = pid_numa_maps_open, | ||
1401 | .read = seq_read, | ||
1402 | .llseek = seq_lseek, | ||
1403 | .release = seq_release_private, | ||
1404 | }; | ||
1405 | |||
1406 | const struct file_operations proc_tid_numa_maps_operations = { | ||
1407 | .open = tid_numa_maps_open, | ||
1408 | .read = seq_read, | 1109 | .read = seq_read, |
1409 | .llseek = seq_lseek, | 1110 | .llseek = seq_lseek, |
1410 | .release = seq_release_private, | 1111 | .release = seq_release_private, |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 1ccfa537f5f..980de547c07 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
@@ -134,11 +134,9 @@ static void pad_len_spaces(struct seq_file *m, int len) | |||
134 | /* | 134 | /* |
135 | * display a single VMA to a sequenced file | 135 | * display a single VMA to a sequenced file |
136 | */ | 136 | */ |
137 | static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, | 137 | static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) |
138 | int is_pid) | ||
139 | { | 138 | { |
140 | struct mm_struct *mm = vma->vm_mm; | 139 | struct mm_struct *mm = vma->vm_mm; |
141 | struct proc_maps_private *priv = m->private; | ||
142 | unsigned long ino = 0; | 140 | unsigned long ino = 0; |
143 | struct file *file; | 141 | struct file *file; |
144 | dev_t dev = 0; | 142 | dev_t dev = 0; |
@@ -170,19 +168,10 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, | |||
170 | pad_len_spaces(m, len); | 168 | pad_len_spaces(m, len); |
171 | seq_path(m, &file->f_path, ""); | 169 | seq_path(m, &file->f_path, ""); |
172 | } else if (mm) { | 170 | } else if (mm) { |
173 | pid_t tid = vm_is_stack(priv->task, vma, is_pid); | 171 | if (vma->vm_start <= mm->start_stack && |
174 | 172 | vma->vm_end >= mm->start_stack) { | |
175 | if (tid != 0) { | ||
176 | pad_len_spaces(m, len); | 173 | pad_len_spaces(m, len); |
177 | /* | 174 | seq_puts(m, "[stack]"); |
178 | * Thread stack in /proc/PID/task/TID/maps or | ||
179 | * the main process stack. | ||
180 | */ | ||
181 | if (!is_pid || (vma->vm_start <= mm->start_stack && | ||
182 | vma->vm_end >= mm->start_stack)) | ||
183 | seq_printf(m, "[stack]"); | ||
184 | else | ||
185 | seq_printf(m, "[stack:%d]", tid); | ||
186 | } | 175 | } |
187 | } | 176 | } |
188 | 177 | ||
@@ -193,22 +182,11 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, | |||
193 | /* | 182 | /* |
194 | * display mapping lines for a particular process's /proc/pid/maps | 183 | * display mapping lines for a particular process's /proc/pid/maps |
195 | */ | 184 | */ |
196 | static int show_map(struct seq_file *m, void *_p, int is_pid) | 185 | static int show_map(struct seq_file *m, void *_p) |
197 | { | 186 | { |
198 | struct rb_node *p = _p; | 187 | struct rb_node *p = _p; |
199 | 188 | ||
200 | return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb), | 189 | return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb)); |
201 | is_pid); | ||
202 | } | ||
203 | |||
204 | static int show_pid_map(struct seq_file *m, void *_p) | ||
205 | { | ||
206 | return show_map(m, _p, 1); | ||
207 | } | ||
208 | |||
209 | static int show_tid_map(struct seq_file *m, void *_p) | ||
210 | { | ||
211 | return show_map(m, _p, 0); | ||
212 | } | 190 | } |
213 | 191 | ||
214 | static void *m_start(struct seq_file *m, loff_t *pos) | 192 | static void *m_start(struct seq_file *m, loff_t *pos) |
@@ -223,7 +201,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) | |||
223 | if (!priv->task) | 201 | if (!priv->task) |
224 | return ERR_PTR(-ESRCH); | 202 | return ERR_PTR(-ESRCH); |
225 | 203 | ||
226 | mm = mm_access(priv->task, PTRACE_MODE_READ); | 204 | mm = mm_for_maps(priv->task); |
227 | if (!mm || IS_ERR(mm)) { | 205 | if (!mm || IS_ERR(mm)) { |
228 | put_task_struct(priv->task); | 206 | put_task_struct(priv->task); |
229 | priv->task = NULL; | 207 | priv->task = NULL; |
@@ -262,18 +240,10 @@ static const struct seq_operations proc_pid_maps_ops = { | |||
262 | .start = m_start, | 240 | .start = m_start, |
263 | .next = m_next, | 241 | .next = m_next, |
264 | .stop = m_stop, | 242 | .stop = m_stop, |
265 | .show = show_pid_map | 243 | .show = show_map |
266 | }; | ||
267 | |||
268 | static const struct seq_operations proc_tid_maps_ops = { | ||
269 | .start = m_start, | ||
270 | .next = m_next, | ||
271 | .stop = m_stop, | ||
272 | .show = show_tid_map | ||
273 | }; | 244 | }; |
274 | 245 | ||
275 | static int maps_open(struct inode *inode, struct file *file, | 246 | static int maps_open(struct inode *inode, struct file *file) |
276 | const struct seq_operations *ops) | ||
277 | { | 247 | { |
278 | struct proc_maps_private *priv; | 248 | struct proc_maps_private *priv; |
279 | int ret = -ENOMEM; | 249 | int ret = -ENOMEM; |
@@ -281,7 +251,7 @@ static int maps_open(struct inode *inode, struct file *file, | |||
281 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | 251 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); |
282 | if (priv) { | 252 | if (priv) { |
283 | priv->pid = proc_pid(inode); | 253 | priv->pid = proc_pid(inode); |
284 | ret = seq_open(file, ops); | 254 | ret = seq_open(file, &proc_pid_maps_ops); |
285 | if (!ret) { | 255 | if (!ret) { |
286 | struct seq_file *m = file->private_data; | 256 | struct seq_file *m = file->private_data; |
287 | m->private = priv; | 257 | m->private = priv; |
@@ -292,25 +262,8 @@ static int maps_open(struct inode *inode, struct file *file, | |||
292 | return ret; | 262 | return ret; |
293 | } | 263 | } |
294 | 264 | ||
295 | static int pid_maps_open(struct inode *inode, struct file *file) | 265 | const struct file_operations proc_maps_operations = { |
296 | { | 266 | .open = maps_open, |
297 | return maps_open(inode, file, &proc_pid_maps_ops); | ||
298 | } | ||
299 | |||
300 | static int tid_maps_open(struct inode *inode, struct file *file) | ||
301 | { | ||
302 | return maps_open(inode, file, &proc_tid_maps_ops); | ||
303 | } | ||
304 | |||
305 | const struct file_operations proc_pid_maps_operations = { | ||
306 | .open = pid_maps_open, | ||
307 | .read = seq_read, | ||
308 | .llseek = seq_lseek, | ||
309 | .release = seq_release_private, | ||
310 | }; | ||
311 | |||
312 | const struct file_operations proc_tid_maps_operations = { | ||
313 | .open = tid_maps_open, | ||
314 | .read = seq_read, | 267 | .read = seq_read, |
315 | .llseek = seq_lseek, | 268 | .llseek = seq_lseek, |
316 | .release = seq_release_private, | 269 | .release = seq_release_private, |
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 9610ac772d7..766b1d45605 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c | |||
@@ -11,20 +11,15 @@ static int uptime_proc_show(struct seq_file *m, void *v) | |||
11 | { | 11 | { |
12 | struct timespec uptime; | 12 | struct timespec uptime; |
13 | struct timespec idle; | 13 | struct timespec idle; |
14 | u64 idletime; | ||
15 | u64 nsec; | ||
16 | u32 rem; | ||
17 | int i; | 14 | int i; |
15 | cputime_t idletime = cputime_zero; | ||
18 | 16 | ||
19 | idletime = 0; | ||
20 | for_each_possible_cpu(i) | 17 | for_each_possible_cpu(i) |
21 | idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; | 18 | idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle); |
22 | 19 | ||
23 | do_posix_clock_monotonic_gettime(&uptime); | 20 | do_posix_clock_monotonic_gettime(&uptime); |
24 | monotonic_to_bootbased(&uptime); | 21 | monotonic_to_bootbased(&uptime); |
25 | nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC; | 22 | cputime_to_timespec(idletime, &idle); |
26 | idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); | ||
27 | idle.tv_nsec = rem; | ||
28 | seq_printf(m, "%lu.%02lu %lu.%02lu\n", | 23 | seq_printf(m, "%lu.%02lu %lu.%02lu\n", |
29 | (unsigned long) uptime.tv_sec, | 24 | (unsigned long) uptime.tv_sec, |
30 | (uptime.tv_nsec / (NSEC_PER_SEC / 100)), | 25 | (uptime.tv_nsec / (NSEC_PER_SEC / 100)), |
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 0d5071d2998..cd99bf55765 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <linux/user.h> | 12 | #include <linux/user.h> |
13 | #include <linux/elf.h> | 13 | #include <linux/elf.h> |
14 | #include <linux/elfcore.h> | 14 | #include <linux/elfcore.h> |
15 | #include <linux/export.h> | ||
16 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
17 | #include <linux/highmem.h> | 16 | #include <linux/highmem.h> |
18 | #include <linux/bootmem.h> | 17 | #include <linux/bootmem.h> |
@@ -700,26 +699,3 @@ static int __init vmcore_init(void) | |||
700 | return 0; | 699 | return 0; |
701 | } | 700 | } |
702 | module_init(vmcore_init) | 701 | module_init(vmcore_init) |
703 | |||
704 | /* Cleanup function for vmcore module. */ | ||
705 | void vmcore_cleanup(void) | ||
706 | { | ||
707 | struct list_head *pos, *next; | ||
708 | |||
709 | if (proc_vmcore) { | ||
710 | remove_proc_entry(proc_vmcore->name, proc_vmcore->parent); | ||
711 | proc_vmcore = NULL; | ||
712 | } | ||
713 | |||
714 | /* clear the vmcore list. */ | ||
715 | list_for_each_safe(pos, next, &vmcore_list) { | ||
716 | struct vmcore *m; | ||
717 | |||
718 | m = list_entry(pos, struct vmcore, list); | ||
719 | list_del(&m->list); | ||
720 | kfree(m); | ||
721 | } | ||
722 | kfree(elfcorebuf); | ||
723 | elfcorebuf = NULL; | ||
724 | } | ||
725 | EXPORT_SYMBOL_GPL(vmcore_cleanup); | ||