aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/array.c15
-rw-r--r--fs/proc/base.c570
-rw-r--r--fs/proc/generic.c10
-rw-r--r--fs/proc/inode.c21
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/meminfo.c7
-rw-r--r--fs/proc/namespaces.c1
-rw-r--r--fs/proc/proc_net.c2
-rw-r--r--fs/proc/proc_sysctl.c48
-rw-r--r--fs/proc/root.c78
-rw-r--r--fs/proc/stat.c82
-rw-r--r--fs/proc/task_mmu.c5
-rw-r--r--fs/proc/uptime.c11
-rw-r--r--fs/proc/vmcore.c1
14 files changed, 658 insertions, 194 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c
index ddffd7a88b97..c602b8d20f06 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -394,8 +394,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
394 394
395 sigemptyset(&sigign); 395 sigemptyset(&sigign);
396 sigemptyset(&sigcatch); 396 sigemptyset(&sigcatch);
397 cutime = cstime = utime = stime = cputime_zero; 397 cutime = cstime = utime = stime = 0;
398 cgtime = gtime = cputime_zero; 398 cgtime = gtime = 0;
399 399
400 if (lock_task_sighand(task, &flags)) { 400 if (lock_task_sighand(task, &flags)) {
401 struct signal_struct *sig = task->signal; 401 struct signal_struct *sig = task->signal;
@@ -423,14 +423,14 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
423 do { 423 do {
424 min_flt += t->min_flt; 424 min_flt += t->min_flt;
425 maj_flt += t->maj_flt; 425 maj_flt += t->maj_flt;
426 gtime = cputime_add(gtime, t->gtime); 426 gtime += t->gtime;
427 t = next_thread(t); 427 t = next_thread(t);
428 } while (t != task); 428 } while (t != task);
429 429
430 min_flt += sig->min_flt; 430 min_flt += sig->min_flt;
431 maj_flt += sig->maj_flt; 431 maj_flt += sig->maj_flt;
432 thread_group_times(task, &utime, &stime); 432 thread_group_times(task, &utime, &stime);
433 gtime = cputime_add(gtime, sig->gtime); 433 gtime += sig->gtime;
434 } 434 }
435 435
436 sid = task_session_nr_ns(task, ns); 436 sid = task_session_nr_ns(task, ns);
@@ -464,7 +464,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
464 464
465 seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ 465 seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
466%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ 466%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
467%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n", 467%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld %lu %lu %lu\n",
468 pid_nr_ns(pid, ns), 468 pid_nr_ns(pid, ns),
469 tcomm, 469 tcomm,
470 state, 470 state,
@@ -511,7 +511,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
511 task->policy, 511 task->policy,
512 (unsigned long long)delayacct_blkio_ticks(task), 512 (unsigned long long)delayacct_blkio_ticks(task),
513 cputime_to_clock_t(gtime), 513 cputime_to_clock_t(gtime),
514 cputime_to_clock_t(cgtime)); 514 cputime_to_clock_t(cgtime),
515 (mm && permitted) ? mm->start_data : 0,
516 (mm && permitted) ? mm->end_data : 0,
517 (mm && permitted) ? mm->start_brk : 0);
515 if (mm) 518 if (mm)
516 mmput(mm); 519 mmput(mm);
517 return 0; 520 return 0;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5eb02069e1b8..5485a5388ecb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -83,9 +83,11 @@
83#include <linux/pid_namespace.h> 83#include <linux/pid_namespace.h>
84#include <linux/fs_struct.h> 84#include <linux/fs_struct.h>
85#include <linux/slab.h> 85#include <linux/slab.h>
86#include <linux/flex_array.h>
86#ifdef CONFIG_HARDWALL 87#ifdef CONFIG_HARDWALL
87#include <asm/hardwall.h> 88#include <asm/hardwall.h>
88#endif 89#endif
90#include <trace/events/oom.h>
89#include "internal.h" 91#include "internal.h"
90 92
91/* NOTE: 93/* NOTE:
@@ -101,7 +103,7 @@
101struct pid_entry { 103struct pid_entry {
102 char *name; 104 char *name;
103 int len; 105 int len;
104 mode_t mode; 106 umode_t mode;
105 const struct inode_operations *iop; 107 const struct inode_operations *iop;
106 const struct file_operations *fop; 108 const struct file_operations *fop;
107 union proc_op op; 109 union proc_op op;
@@ -133,6 +135,8 @@ struct pid_entry {
133 NULL, &proc_single_file_operations, \ 135 NULL, &proc_single_file_operations, \
134 { .proc_show = show } ) 136 { .proc_show = show } )
135 137
138static int proc_fd_permission(struct inode *inode, int mask);
139
136/* 140/*
137 * Count the number of hardlinks for the pid_entry table, excluding the . 141 * Count the number of hardlinks for the pid_entry table, excluding the .
138 * and .. links. 142 * and .. links.
@@ -165,9 +169,9 @@ static int get_task_root(struct task_struct *task, struct path *root)
165 return result; 169 return result;
166} 170}
167 171
168static int proc_cwd_link(struct inode *inode, struct path *path) 172static int proc_cwd_link(struct dentry *dentry, struct path *path)
169{ 173{
170 struct task_struct *task = get_proc_task(inode); 174 struct task_struct *task = get_proc_task(dentry->d_inode);
171 int result = -ENOENT; 175 int result = -ENOENT;
172 176
173 if (task) { 177 if (task) {
@@ -182,9 +186,9 @@ static int proc_cwd_link(struct inode *inode, struct path *path)
182 return result; 186 return result;
183} 187}
184 188
185static int proc_root_link(struct inode *inode, struct path *path) 189static int proc_root_link(struct dentry *dentry, struct path *path)
186{ 190{
187 struct task_struct *task = get_proc_task(inode); 191 struct task_struct *task = get_proc_task(dentry->d_inode);
188 int result = -ENOENT; 192 int result = -ENOENT;
189 193
190 if (task) { 194 if (task) {
@@ -627,122 +631,54 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr)
627 return 0; 631 return 0;
628} 632}
629 633
630static const struct inode_operations proc_def_inode_operations = { 634/*
631 .setattr = proc_setattr, 635 * May current process learn task's sched/cmdline info (for hide_pid_min=1)
632}; 636 * or euid/egid (for hide_pid_min=2)?
633 637 */
634static int mounts_open_common(struct inode *inode, struct file *file, 638static bool has_pid_permissions(struct pid_namespace *pid,
635 const struct seq_operations *op) 639 struct task_struct *task,
640 int hide_pid_min)
636{ 641{
637 struct task_struct *task = get_proc_task(inode); 642 if (pid->hide_pid < hide_pid_min)
638 struct nsproxy *nsp; 643 return true;
639 struct mnt_namespace *ns = NULL; 644 if (in_group_p(pid->pid_gid))
640 struct path root; 645 return true;
641 struct proc_mounts *p; 646 return ptrace_may_access(task, PTRACE_MODE_READ);
642 int ret = -EINVAL;
643
644 if (task) {
645 rcu_read_lock();
646 nsp = task_nsproxy(task);
647 if (nsp) {
648 ns = nsp->mnt_ns;
649 if (ns)
650 get_mnt_ns(ns);
651 }
652 rcu_read_unlock();
653 if (ns && get_task_root(task, &root) == 0)
654 ret = 0;
655 put_task_struct(task);
656 }
657
658 if (!ns)
659 goto err;
660 if (ret)
661 goto err_put_ns;
662
663 ret = -ENOMEM;
664 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
665 if (!p)
666 goto err_put_path;
667
668 file->private_data = &p->m;
669 ret = seq_open(file, op);
670 if (ret)
671 goto err_free;
672
673 p->m.private = p;
674 p->ns = ns;
675 p->root = root;
676 p->m.poll_event = ns->event;
677
678 return 0;
679
680 err_free:
681 kfree(p);
682 err_put_path:
683 path_put(&root);
684 err_put_ns:
685 put_mnt_ns(ns);
686 err:
687 return ret;
688} 647}
689 648
690static int mounts_release(struct inode *inode, struct file *file)
691{
692 struct proc_mounts *p = file->private_data;
693 path_put(&p->root);
694 put_mnt_ns(p->ns);
695 return seq_release(inode, file);
696}
697 649
698static unsigned mounts_poll(struct file *file, poll_table *wait) 650static int proc_pid_permission(struct inode *inode, int mask)
699{ 651{
700 struct proc_mounts *p = file->private_data; 652 struct pid_namespace *pid = inode->i_sb->s_fs_info;
701 unsigned res = POLLIN | POLLRDNORM; 653 struct task_struct *task;
702 654 bool has_perms;
703 poll_wait(file, &p->ns->poll, wait);
704 if (mnt_had_events(p))
705 res |= POLLERR | POLLPRI;
706
707 return res;
708}
709 655
710static int mounts_open(struct inode *inode, struct file *file) 656 task = get_proc_task(inode);
711{ 657 if (!task)
712 return mounts_open_common(inode, file, &mounts_op); 658 return -ESRCH;
713} 659 has_perms = has_pid_permissions(pid, task, 1);
660 put_task_struct(task);
714 661
715static const struct file_operations proc_mounts_operations = { 662 if (!has_perms) {
716 .open = mounts_open, 663 if (pid->hide_pid == 2) {
717 .read = seq_read, 664 /*
718 .llseek = seq_lseek, 665 * Let's make getdents(), stat(), and open()
719 .release = mounts_release, 666 * consistent with each other. If a process
720 .poll = mounts_poll, 667 * may not stat() a file, it shouldn't be seen
721}; 668 * in procfs at all.
669 */
670 return -ENOENT;
671 }
722 672
723static int mountinfo_open(struct inode *inode, struct file *file) 673 return -EPERM;
724{ 674 }
725 return mounts_open_common(inode, file, &mountinfo_op); 675 return generic_permission(inode, mask);
726} 676}
727 677
728static const struct file_operations proc_mountinfo_operations = {
729 .open = mountinfo_open,
730 .read = seq_read,
731 .llseek = seq_lseek,
732 .release = mounts_release,
733 .poll = mounts_poll,
734};
735 678
736static int mountstats_open(struct inode *inode, struct file *file)
737{
738 return mounts_open_common(inode, file, &mountstats_op);
739}
740 679
741static const struct file_operations proc_mountstats_operations = { 680static const struct inode_operations proc_def_inode_operations = {
742 .open = mountstats_open, 681 .setattr = proc_setattr,
743 .read = seq_read,
744 .llseek = seq_lseek,
745 .release = mounts_release,
746}; 682};
747 683
748#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ 684#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
@@ -1107,13 +1043,6 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1107 goto err_sighand; 1043 goto err_sighand;
1108 } 1044 }
1109 1045
1110 if (oom_adjust != task->signal->oom_adj) {
1111 if (oom_adjust == OOM_DISABLE)
1112 atomic_inc(&task->mm->oom_disable_count);
1113 if (task->signal->oom_adj == OOM_DISABLE)
1114 atomic_dec(&task->mm->oom_disable_count);
1115 }
1116
1117 /* 1046 /*
1118 * Warn that /proc/pid/oom_adj is deprecated, see 1047 * Warn that /proc/pid/oom_adj is deprecated, see
1119 * Documentation/feature-removal-schedule.txt. 1048 * Documentation/feature-removal-schedule.txt.
@@ -1131,6 +1060,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1131 else 1060 else
1132 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / 1061 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
1133 -OOM_DISABLE; 1062 -OOM_DISABLE;
1063 trace_oom_score_adj_update(task);
1134err_sighand: 1064err_sighand:
1135 unlock_task_sighand(task, &flags); 1065 unlock_task_sighand(task, &flags);
1136err_task_lock: 1066err_task_lock:
@@ -1215,15 +1145,10 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1215 goto err_sighand; 1145 goto err_sighand;
1216 } 1146 }
1217 1147
1218 if (oom_score_adj != task->signal->oom_score_adj) {
1219 if (oom_score_adj == OOM_SCORE_ADJ_MIN)
1220 atomic_inc(&task->mm->oom_disable_count);
1221 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
1222 atomic_dec(&task->mm->oom_disable_count);
1223 }
1224 task->signal->oom_score_adj = oom_score_adj; 1148 task->signal->oom_score_adj = oom_score_adj;
1225 if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1149 if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
1226 task->signal->oom_score_adj_min = oom_score_adj; 1150 task->signal->oom_score_adj_min = oom_score_adj;
1151 trace_oom_score_adj_update(task);
1227 /* 1152 /*
1228 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is 1153 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
1229 * always attainable. 1154 * always attainable.
@@ -1580,13 +1505,13 @@ static const struct file_operations proc_pid_set_comm_operations = {
1580 .release = single_release, 1505 .release = single_release,
1581}; 1506};
1582 1507
1583static int proc_exe_link(struct inode *inode, struct path *exe_path) 1508static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
1584{ 1509{
1585 struct task_struct *task; 1510 struct task_struct *task;
1586 struct mm_struct *mm; 1511 struct mm_struct *mm;
1587 struct file *exe_file; 1512 struct file *exe_file;
1588 1513
1589 task = get_proc_task(inode); 1514 task = get_proc_task(dentry->d_inode);
1590 if (!task) 1515 if (!task)
1591 return -ENOENT; 1516 return -ENOENT;
1592 mm = get_task_mm(task); 1517 mm = get_task_mm(task);
@@ -1616,7 +1541,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1616 if (!proc_fd_access_allowed(inode)) 1541 if (!proc_fd_access_allowed(inode))
1617 goto out; 1542 goto out;
1618 1543
1619 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); 1544 error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path);
1620out: 1545out:
1621 return ERR_PTR(error); 1546 return ERR_PTR(error);
1622} 1547}
@@ -1655,7 +1580,7 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
1655 if (!proc_fd_access_allowed(inode)) 1580 if (!proc_fd_access_allowed(inode))
1656 goto out; 1581 goto out;
1657 1582
1658 error = PROC_I(inode)->op.proc_get_link(inode, &path); 1583 error = PROC_I(inode)->op.proc_get_link(dentry, &path);
1659 if (error) 1584 if (error)
1660 goto out; 1585 goto out;
1661 1586
@@ -1736,6 +1661,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1736 struct inode *inode = dentry->d_inode; 1661 struct inode *inode = dentry->d_inode;
1737 struct task_struct *task; 1662 struct task_struct *task;
1738 const struct cred *cred; 1663 const struct cred *cred;
1664 struct pid_namespace *pid = dentry->d_sb->s_fs_info;
1739 1665
1740 generic_fillattr(inode, stat); 1666 generic_fillattr(inode, stat);
1741 1667
@@ -1744,6 +1670,14 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1744 stat->gid = 0; 1670 stat->gid = 0;
1745 task = pid_task(proc_pid(inode), PIDTYPE_PID); 1671 task = pid_task(proc_pid(inode), PIDTYPE_PID);
1746 if (task) { 1672 if (task) {
1673 if (!has_pid_permissions(pid, task, 2)) {
1674 rcu_read_unlock();
1675 /*
1676 * This doesn't prevent learning whether PID exists,
1677 * it only makes getattr() consistent with readdir().
1678 */
1679 return -ENOENT;
1680 }
1747 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1681 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1748 task_dumpable(task)) { 1682 task_dumpable(task)) {
1749 cred = __task_cred(task); 1683 cred = __task_cred(task);
@@ -1947,9 +1881,9 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1947 return -ENOENT; 1881 return -ENOENT;
1948} 1882}
1949 1883
1950static int proc_fd_link(struct inode *inode, struct path *path) 1884static int proc_fd_link(struct dentry *dentry, struct path *path)
1951{ 1885{
1952 return proc_fd_info(inode, path, NULL); 1886 return proc_fd_info(dentry->d_inode, path, NULL);
1953} 1887}
1954 1888
1955static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1889static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
@@ -2170,6 +2104,355 @@ static const struct file_operations proc_fd_operations = {
2170 .llseek = default_llseek, 2104 .llseek = default_llseek,
2171}; 2105};
2172 2106
2107#ifdef CONFIG_CHECKPOINT_RESTORE
2108
2109/*
2110 * dname_to_vma_addr - maps a dentry name into two unsigned longs
2111 * which represent vma start and end addresses.
2112 */
2113static int dname_to_vma_addr(struct dentry *dentry,
2114 unsigned long *start, unsigned long *end)
2115{
2116 if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)
2117 return -EINVAL;
2118
2119 return 0;
2120}
2121
2122static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
2123{
2124 unsigned long vm_start, vm_end;
2125 bool exact_vma_exists = false;
2126 struct mm_struct *mm = NULL;
2127 struct task_struct *task;
2128 const struct cred *cred;
2129 struct inode *inode;
2130 int status = 0;
2131
2132 if (nd && nd->flags & LOOKUP_RCU)
2133 return -ECHILD;
2134
2135 if (!capable(CAP_SYS_ADMIN)) {
2136 status = -EACCES;
2137 goto out_notask;
2138 }
2139
2140 inode = dentry->d_inode;
2141 task = get_proc_task(inode);
2142 if (!task)
2143 goto out_notask;
2144
2145 if (!ptrace_may_access(task, PTRACE_MODE_READ))
2146 goto out;
2147
2148 mm = get_task_mm(task);
2149 if (!mm)
2150 goto out;
2151
2152 if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
2153 down_read(&mm->mmap_sem);
2154 exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end);
2155 up_read(&mm->mmap_sem);
2156 }
2157
2158 mmput(mm);
2159
2160 if (exact_vma_exists) {
2161 if (task_dumpable(task)) {
2162 rcu_read_lock();
2163 cred = __task_cred(task);
2164 inode->i_uid = cred->euid;
2165 inode->i_gid = cred->egid;
2166 rcu_read_unlock();
2167 } else {
2168 inode->i_uid = 0;
2169 inode->i_gid = 0;
2170 }
2171 security_task_to_inode(task, inode);
2172 status = 1;
2173 }
2174
2175out:
2176 put_task_struct(task);
2177
2178out_notask:
2179 if (status <= 0)
2180 d_drop(dentry);
2181
2182 return status;
2183}
2184
2185static const struct dentry_operations tid_map_files_dentry_operations = {
2186 .d_revalidate = map_files_d_revalidate,
2187 .d_delete = pid_delete_dentry,
2188};
2189
2190static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
2191{
2192 unsigned long vm_start, vm_end;
2193 struct vm_area_struct *vma;
2194 struct task_struct *task;
2195 struct mm_struct *mm;
2196 int rc;
2197
2198 rc = -ENOENT;
2199 task = get_proc_task(dentry->d_inode);
2200 if (!task)
2201 goto out;
2202
2203 mm = get_task_mm(task);
2204 put_task_struct(task);
2205 if (!mm)
2206 goto out;
2207
2208 rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
2209 if (rc)
2210 goto out_mmput;
2211
2212 down_read(&mm->mmap_sem);
2213 vma = find_exact_vma(mm, vm_start, vm_end);
2214 if (vma && vma->vm_file) {
2215 *path = vma->vm_file->f_path;
2216 path_get(path);
2217 rc = 0;
2218 }
2219 up_read(&mm->mmap_sem);
2220
2221out_mmput:
2222 mmput(mm);
2223out:
2224 return rc;
2225}
2226
2227struct map_files_info {
2228 struct file *file;
2229 unsigned long len;
2230 unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
2231};
2232
2233static struct dentry *
2234proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
2235 struct task_struct *task, const void *ptr)
2236{
2237 const struct file *file = ptr;
2238 struct proc_inode *ei;
2239 struct inode *inode;
2240
2241 if (!file)
2242 return ERR_PTR(-ENOENT);
2243
2244 inode = proc_pid_make_inode(dir->i_sb, task);
2245 if (!inode)
2246 return ERR_PTR(-ENOENT);
2247
2248 ei = PROC_I(inode);
2249 ei->op.proc_get_link = proc_map_files_get_link;
2250
2251 inode->i_op = &proc_pid_link_inode_operations;
2252 inode->i_size = 64;
2253 inode->i_mode = S_IFLNK;
2254
2255 if (file->f_mode & FMODE_READ)
2256 inode->i_mode |= S_IRUSR;
2257 if (file->f_mode & FMODE_WRITE)
2258 inode->i_mode |= S_IWUSR;
2259
2260 d_set_d_op(dentry, &tid_map_files_dentry_operations);
2261 d_add(dentry, inode);
2262
2263 return NULL;
2264}
2265
2266static struct dentry *proc_map_files_lookup(struct inode *dir,
2267 struct dentry *dentry, struct nameidata *nd)
2268{
2269 unsigned long vm_start, vm_end;
2270 struct vm_area_struct *vma;
2271 struct task_struct *task;
2272 struct dentry *result;
2273 struct mm_struct *mm;
2274
2275 result = ERR_PTR(-EACCES);
2276 if (!capable(CAP_SYS_ADMIN))
2277 goto out;
2278
2279 result = ERR_PTR(-ENOENT);
2280 task = get_proc_task(dir);
2281 if (!task)
2282 goto out;
2283
2284 result = ERR_PTR(-EACCES);
2285 if (lock_trace(task))
2286 goto out_put_task;
2287
2288 result = ERR_PTR(-ENOENT);
2289 if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
2290 goto out_unlock;
2291
2292 mm = get_task_mm(task);
2293 if (!mm)
2294 goto out_unlock;
2295
2296 down_read(&mm->mmap_sem);
2297 vma = find_exact_vma(mm, vm_start, vm_end);
2298 if (!vma)
2299 goto out_no_vma;
2300
2301 result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file);
2302
2303out_no_vma:
2304 up_read(&mm->mmap_sem);
2305 mmput(mm);
2306out_unlock:
2307 unlock_trace(task);
2308out_put_task:
2309 put_task_struct(task);
2310out:
2311 return result;
2312}
2313
2314static const struct inode_operations proc_map_files_inode_operations = {
2315 .lookup = proc_map_files_lookup,
2316 .permission = proc_fd_permission,
2317 .setattr = proc_setattr,
2318};
2319
2320static int
2321proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
2322{
2323 struct dentry *dentry = filp->f_path.dentry;
2324 struct inode *inode = dentry->d_inode;
2325 struct vm_area_struct *vma;
2326 struct task_struct *task;
2327 struct mm_struct *mm;
2328 ino_t ino;
2329 int ret;
2330
2331 ret = -EACCES;
2332 if (!capable(CAP_SYS_ADMIN))
2333 goto out;
2334
2335 ret = -ENOENT;
2336 task = get_proc_task(inode);
2337 if (!task)
2338 goto out;
2339
2340 ret = -EACCES;
2341 if (lock_trace(task))
2342 goto out_put_task;
2343
2344 ret = 0;
2345 switch (filp->f_pos) {
2346 case 0:
2347 ino = inode->i_ino;
2348 if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0)
2349 goto out_unlock;
2350 filp->f_pos++;
2351 case 1:
2352 ino = parent_ino(dentry);
2353 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
2354 goto out_unlock;
2355 filp->f_pos++;
2356 default:
2357 {
2358 unsigned long nr_files, pos, i;
2359 struct flex_array *fa = NULL;
2360 struct map_files_info info;
2361 struct map_files_info *p;
2362
2363 mm = get_task_mm(task);
2364 if (!mm)
2365 goto out_unlock;
2366 down_read(&mm->mmap_sem);
2367
2368 nr_files = 0;
2369
2370 /*
2371 * We need two passes here:
2372 *
2373 * 1) Collect vmas of mapped files with mmap_sem taken
2374 * 2) Release mmap_sem and instantiate entries
2375 *
2376 * otherwise we get lockdep complained, since filldir()
2377 * routine might require mmap_sem taken in might_fault().
2378 */
2379
2380 for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
2381 if (vma->vm_file && ++pos > filp->f_pos)
2382 nr_files++;
2383 }
2384
2385 if (nr_files) {
2386 fa = flex_array_alloc(sizeof(info), nr_files,
2387 GFP_KERNEL);
2388 if (!fa || flex_array_prealloc(fa, 0, nr_files,
2389 GFP_KERNEL)) {
2390 ret = -ENOMEM;
2391 if (fa)
2392 flex_array_free(fa);
2393 up_read(&mm->mmap_sem);
2394 mmput(mm);
2395 goto out_unlock;
2396 }
2397 for (i = 0, vma = mm->mmap, pos = 2; vma;
2398 vma = vma->vm_next) {
2399 if (!vma->vm_file)
2400 continue;
2401 if (++pos <= filp->f_pos)
2402 continue;
2403
2404 get_file(vma->vm_file);
2405 info.file = vma->vm_file;
2406 info.len = snprintf(info.name,
2407 sizeof(info.name), "%lx-%lx",
2408 vma->vm_start, vma->vm_end);
2409 if (flex_array_put(fa, i++, &info, GFP_KERNEL))
2410 BUG();
2411 }
2412 }
2413 up_read(&mm->mmap_sem);
2414
2415 for (i = 0; i < nr_files; i++) {
2416 p = flex_array_get(fa, i);
2417 ret = proc_fill_cache(filp, dirent, filldir,
2418 p->name, p->len,
2419 proc_map_files_instantiate,
2420 task, p->file);
2421 if (ret)
2422 break;
2423 filp->f_pos++;
2424 fput(p->file);
2425 }
2426 for (; i < nr_files; i++) {
2427 /*
2428 * In case of error don't forget
2429 * to put rest of file refs.
2430 */
2431 p = flex_array_get(fa, i);
2432 fput(p->file);
2433 }
2434 if (fa)
2435 flex_array_free(fa);
2436 mmput(mm);
2437 }
2438 }
2439
2440out_unlock:
2441 unlock_trace(task);
2442out_put_task:
2443 put_task_struct(task);
2444out:
2445 return ret;
2446}
2447
2448static const struct file_operations proc_map_files_operations = {
2449 .read = generic_read_dir,
2450 .readdir = proc_map_files_readdir,
2451 .llseek = default_llseek,
2452};
2453
2454#endif /* CONFIG_CHECKPOINT_RESTORE */
2455
2173/* 2456/*
2174 * /proc/pid/fd needs a special permission handler so that a process can still 2457 * /proc/pid/fd needs a special permission handler so that a process can still
2175 * access /proc/self/fd after it has executed a setuid(). 2458 * access /proc/self/fd after it has executed a setuid().
@@ -2261,7 +2544,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
2261 ei = PROC_I(inode); 2544 ei = PROC_I(inode);
2262 inode->i_mode = p->mode; 2545 inode->i_mode = p->mode;
2263 if (S_ISDIR(inode->i_mode)) 2546 if (S_ISDIR(inode->i_mode))
2264 inode->i_nlink = 2; /* Use getattr to fix if necessary */ 2547 set_nlink(inode, 2); /* Use getattr to fix if necessary */
2265 if (p->iop) 2548 if (p->iop)
2266 inode->i_op = p->iop; 2549 inode->i_op = p->iop;
2267 if (p->fop) 2550 if (p->fop)
@@ -2655,7 +2938,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2655 2938
2656 inode->i_mode = p->mode; 2939 inode->i_mode = p->mode;
2657 if (S_ISDIR(inode->i_mode)) 2940 if (S_ISDIR(inode->i_mode))
2658 inode->i_nlink = 2; 2941 set_nlink(inode, 2);
2659 if (S_ISLNK(inode->i_mode)) 2942 if (S_ISLNK(inode->i_mode))
2660 inode->i_size = 64; 2943 inode->i_size = 64;
2661 if (p->iop) 2944 if (p->iop)
@@ -2785,6 +3068,9 @@ static const struct inode_operations proc_task_inode_operations;
2785static const struct pid_entry tgid_base_stuff[] = { 3068static const struct pid_entry tgid_base_stuff[] = {
2786 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), 3069 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
2787 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 3070 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3071#ifdef CONFIG_CHECKPOINT_RESTORE
3072 DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
3073#endif
2788 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 3074 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2789 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 3075 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
2790#ifdef CONFIG_NET 3076#ifdef CONFIG_NET
@@ -2888,6 +3174,7 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
2888 .lookup = proc_tgid_base_lookup, 3174 .lookup = proc_tgid_base_lookup,
2889 .getattr = pid_getattr, 3175 .getattr = pid_getattr,
2890 .setattr = proc_setattr, 3176 .setattr = proc_setattr,
3177 .permission = proc_pid_permission,
2891}; 3178};
2892 3179
2893static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) 3180static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
@@ -2994,8 +3281,8 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
2994 inode->i_fop = &proc_tgid_base_operations; 3281 inode->i_fop = &proc_tgid_base_operations;
2995 inode->i_flags|=S_IMMUTABLE; 3282 inode->i_flags|=S_IMMUTABLE;
2996 3283
2997 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, 3284 set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff,
2998 ARRAY_SIZE(tgid_base_stuff)); 3285 ARRAY_SIZE(tgid_base_stuff)));
2999 3286
3000 d_set_d_op(dentry, &pid_dentry_operations); 3287 d_set_d_op(dentry, &pid_dentry_operations);
3001 3288
@@ -3091,6 +3378,12 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
3091 proc_pid_instantiate, iter.task, NULL); 3378 proc_pid_instantiate, iter.task, NULL);
3092} 3379}
3093 3380
3381static int fake_filldir(void *buf, const char *name, int namelen,
3382 loff_t offset, u64 ino, unsigned d_type)
3383{
3384 return 0;
3385}
3386
3094/* for the /proc/ directory itself, after non-process stuff has been done */ 3387/* for the /proc/ directory itself, after non-process stuff has been done */
3095int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 3388int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
3096{ 3389{
@@ -3098,6 +3391,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
3098 struct task_struct *reaper; 3391 struct task_struct *reaper;
3099 struct tgid_iter iter; 3392 struct tgid_iter iter;
3100 struct pid_namespace *ns; 3393 struct pid_namespace *ns;
3394 filldir_t __filldir;
3101 3395
3102 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) 3396 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
3103 goto out_no_task; 3397 goto out_no_task;
@@ -3119,8 +3413,13 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
3119 for (iter = next_tgid(ns, iter); 3413 for (iter = next_tgid(ns, iter);
3120 iter.task; 3414 iter.task;
3121 iter.tgid += 1, iter = next_tgid(ns, iter)) { 3415 iter.tgid += 1, iter = next_tgid(ns, iter)) {
3416 if (has_pid_permissions(ns, iter.task, 2))
3417 __filldir = filldir;
3418 else
3419 __filldir = fake_filldir;
3420
3122 filp->f_pos = iter.tgid + TGID_OFFSET; 3421 filp->f_pos = iter.tgid + TGID_OFFSET;
3123 if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { 3422 if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) {
3124 put_task_struct(iter.task); 3423 put_task_struct(iter.task);
3125 goto out; 3424 goto out;
3126 } 3425 }
@@ -3246,8 +3545,8 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
3246 inode->i_fop = &proc_tid_base_operations; 3545 inode->i_fop = &proc_tid_base_operations;
3247 inode->i_flags|=S_IMMUTABLE; 3546 inode->i_flags|=S_IMMUTABLE;
3248 3547
3249 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, 3548 set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff,
3250 ARRAY_SIZE(tid_base_stuff)); 3549 ARRAY_SIZE(tid_base_stuff)));
3251 3550
3252 d_set_d_op(dentry, &pid_dentry_operations); 3551 d_set_d_op(dentry, &pid_dentry_operations);
3253 3552
@@ -3455,6 +3754,7 @@ static const struct inode_operations proc_task_inode_operations = {
3455 .lookup = proc_task_lookup, 3754 .lookup = proc_task_lookup,
3456 .getattr = proc_task_getattr, 3755 .getattr = proc_task_getattr,
3457 .setattr = proc_setattr, 3756 .setattr = proc_setattr,
3757 .permission = proc_pid_permission,
3458}; 3758};
3459 3759
3460static const struct file_operations proc_task_operations = { 3760static const struct file_operations proc_task_operations = {
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 9d99131d0d65..2edf34f2eb61 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -283,7 +283,7 @@ static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
283 struct inode *inode = dentry->d_inode; 283 struct inode *inode = dentry->d_inode;
284 struct proc_dir_entry *de = PROC_I(inode)->pde; 284 struct proc_dir_entry *de = PROC_I(inode)->pde;
285 if (de && de->nlink) 285 if (de && de->nlink)
286 inode->i_nlink = de->nlink; 286 set_nlink(inode, de->nlink);
287 287
288 generic_fillattr(inode, stat); 288 generic_fillattr(inode, stat);
289 return 0; 289 return 0;
@@ -597,7 +597,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
597 597
598static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, 598static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
599 const char *name, 599 const char *name,
600 mode_t mode, 600 umode_t mode,
601 nlink_t nlink) 601 nlink_t nlink)
602{ 602{
603 struct proc_dir_entry *ent = NULL; 603 struct proc_dir_entry *ent = NULL;
@@ -659,7 +659,7 @@ struct proc_dir_entry *proc_symlink(const char *name,
659} 659}
660EXPORT_SYMBOL(proc_symlink); 660EXPORT_SYMBOL(proc_symlink);
661 661
662struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, 662struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
663 struct proc_dir_entry *parent) 663 struct proc_dir_entry *parent)
664{ 664{
665 struct proc_dir_entry *ent; 665 struct proc_dir_entry *ent;
@@ -699,7 +699,7 @@ struct proc_dir_entry *proc_mkdir(const char *name,
699} 699}
700EXPORT_SYMBOL(proc_mkdir); 700EXPORT_SYMBOL(proc_mkdir);
701 701
702struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, 702struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode,
703 struct proc_dir_entry *parent) 703 struct proc_dir_entry *parent)
704{ 704{
705 struct proc_dir_entry *ent; 705 struct proc_dir_entry *ent;
@@ -728,7 +728,7 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
728} 728}
729EXPORT_SYMBOL(create_proc_entry); 729EXPORT_SYMBOL(create_proc_entry);
730 730
731struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, 731struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
732 struct proc_dir_entry *parent, 732 struct proc_dir_entry *parent,
733 const struct file_operations *proc_fops, 733 const struct file_operations *proc_fops,
734 void *data) 734 void *data)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 7ed72d6c1c6f..84fd3235a590 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -7,6 +7,7 @@
7#include <linux/time.h> 7#include <linux/time.h>
8#include <linux/proc_fs.h> 8#include <linux/proc_fs.h>
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/pid_namespace.h>
10#include <linux/mm.h> 11#include <linux/mm.h>
11#include <linux/string.h> 12#include <linux/string.h>
12#include <linux/stat.h> 13#include <linux/stat.h>
@@ -17,7 +18,9 @@
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/module.h> 19#include <linux/module.h>
19#include <linux/sysctl.h> 20#include <linux/sysctl.h>
21#include <linux/seq_file.h>
20#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/mount.h>
21 24
22#include <asm/system.h> 25#include <asm/system.h>
23#include <asm/uaccess.h> 26#include <asm/uaccess.h>
@@ -77,7 +80,6 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
77static void proc_i_callback(struct rcu_head *head) 80static void proc_i_callback(struct rcu_head *head)
78{ 81{
79 struct inode *inode = container_of(head, struct inode, i_rcu); 82 struct inode *inode = container_of(head, struct inode, i_rcu);
80 INIT_LIST_HEAD(&inode->i_dentry);
81 kmem_cache_free(proc_inode_cachep, PROC_I(inode)); 83 kmem_cache_free(proc_inode_cachep, PROC_I(inode));
82} 84}
83 85
@@ -102,12 +104,27 @@ void __init proc_init_inodecache(void)
102 init_once); 104 init_once);
103} 105}
104 106
107static int proc_show_options(struct seq_file *seq, struct dentry *root)
108{
109 struct super_block *sb = root->d_sb;
110 struct pid_namespace *pid = sb->s_fs_info;
111
112 if (pid->pid_gid)
113 seq_printf(seq, ",gid=%lu", (unsigned long)pid->pid_gid);
114 if (pid->hide_pid != 0)
115 seq_printf(seq, ",hidepid=%u", pid->hide_pid);
116
117 return 0;
118}
119
105static const struct super_operations proc_sops = { 120static const struct super_operations proc_sops = {
106 .alloc_inode = proc_alloc_inode, 121 .alloc_inode = proc_alloc_inode,
107 .destroy_inode = proc_destroy_inode, 122 .destroy_inode = proc_destroy_inode,
108 .drop_inode = generic_delete_inode, 123 .drop_inode = generic_delete_inode,
109 .evict_inode = proc_evict_inode, 124 .evict_inode = proc_evict_inode,
110 .statfs = simple_statfs, 125 .statfs = simple_statfs,
126 .remount_fs = proc_remount,
127 .show_options = proc_show_options,
111}; 128};
112 129
113static void __pde_users_dec(struct proc_dir_entry *pde) 130static void __pde_users_dec(struct proc_dir_entry *pde)
@@ -445,7 +462,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
445 if (de->size) 462 if (de->size)
446 inode->i_size = de->size; 463 inode->i_size = de->size;
447 if (de->nlink) 464 if (de->nlink)
448 inode->i_nlink = de->nlink; 465 set_nlink(inode, de->nlink);
449 if (de->proc_iops) 466 if (de->proc_iops)
450 inode->i_op = de->proc_iops; 467 inode->i_op = de->proc_iops;
451 if (de->proc_fops) { 468 if (de->proc_fops) {
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 7838e5cfec14..292577531ad1 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -117,6 +117,7 @@ void pde_put(struct proc_dir_entry *pde);
117 117
118int proc_fill_super(struct super_block *); 118int proc_fill_super(struct super_block *);
119struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); 119struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
120int proc_remount(struct super_block *sb, int *flags, char *data);
120 121
121/* 122/*
122 * These are generic /proc routines that use the internal 123 * These are generic /proc routines that use the internal
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 586174168e2a..80e4645f7990 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -131,12 +131,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
131 K(i.freeswap), 131 K(i.freeswap),
132 K(global_page_state(NR_FILE_DIRTY)), 132 K(global_page_state(NR_FILE_DIRTY)),
133 K(global_page_state(NR_WRITEBACK)), 133 K(global_page_state(NR_WRITEBACK)),
134 K(global_page_state(NR_ANON_PAGES)
135#ifdef CONFIG_TRANSPARENT_HUGEPAGE 134#ifdef CONFIG_TRANSPARENT_HUGEPAGE
135 K(global_page_state(NR_ANON_PAGES)
136 + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * 136 + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
137 HPAGE_PMD_NR 137 HPAGE_PMD_NR),
138#else
139 K(global_page_state(NR_ANON_PAGES)),
138#endif 140#endif
139 ),
140 K(global_page_state(NR_FILE_MAPPED)), 141 K(global_page_state(NR_FILE_MAPPED)),
141 K(global_page_state(NR_SHMEM)), 142 K(global_page_state(NR_SHMEM)),
142 K(global_page_state(NR_SLAB_RECLAIMABLE) + 143 K(global_page_state(NR_SLAB_RECLAIMABLE) +
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index be177f702acb..27da860115c6 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -9,7 +9,6 @@
9#include <linux/file.h> 9#include <linux/file.h>
10#include <linux/utsname.h> 10#include <linux/utsname.h>
11#include <net/net_namespace.h> 11#include <net/net_namespace.h>
12#include <linux/mnt_namespace.h>
13#include <linux/ipc_namespace.h> 12#include <linux/ipc_namespace.h>
14#include <linux/pid_namespace.h> 13#include <linux/pid_namespace.h>
15#include "internal.h" 14#include "internal.h"
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index f738024ccc8e..06e1cc17caf6 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -179,7 +179,7 @@ const struct file_operations proc_net_operations = {
179 179
180 180
181struct proc_dir_entry *proc_net_fops_create(struct net *net, 181struct proc_dir_entry *proc_net_fops_create(struct net *net,
182 const char *name, mode_t mode, const struct file_operations *fops) 182 const char *name, umode_t mode, const struct file_operations *fops)
183{ 183{
184 return proc_create(name, mode, net->proc_net, fops); 184 return proc_create(name, mode, net->proc_net, fops);
185} 185}
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 1a77dbef226f..a6b62173d4c3 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -3,6 +3,7 @@
3 */ 3 */
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/sysctl.h> 5#include <linux/sysctl.h>
6#include <linux/poll.h>
6#include <linux/proc_fs.h> 7#include <linux/proc_fs.h>
7#include <linux/security.h> 8#include <linux/security.h>
8#include <linux/namei.h> 9#include <linux/namei.h>
@@ -14,6 +15,15 @@ static const struct inode_operations proc_sys_inode_operations;
14static const struct file_operations proc_sys_dir_file_operations; 15static const struct file_operations proc_sys_dir_file_operations;
15static const struct inode_operations proc_sys_dir_operations; 16static const struct inode_operations proc_sys_dir_operations;
16 17
18void proc_sys_poll_notify(struct ctl_table_poll *poll)
19{
20 if (!poll)
21 return;
22
23 atomic_inc(&poll->event);
24 wake_up_interruptible(&poll->wait);
25}
26
17static struct inode *proc_sys_make_inode(struct super_block *sb, 27static struct inode *proc_sys_make_inode(struct super_block *sb,
18 struct ctl_table_header *head, struct ctl_table *table) 28 struct ctl_table_header *head, struct ctl_table *table)
19{ 29{
@@ -39,7 +49,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
39 inode->i_fop = &proc_sys_file_operations; 49 inode->i_fop = &proc_sys_file_operations;
40 } else { 50 } else {
41 inode->i_mode |= S_IFDIR; 51 inode->i_mode |= S_IFDIR;
42 inode->i_nlink = 0; 52 clear_nlink(inode);
43 inode->i_op = &proc_sys_dir_operations; 53 inode->i_op = &proc_sys_dir_operations;
44 inode->i_fop = &proc_sys_dir_file_operations; 54 inode->i_fop = &proc_sys_dir_file_operations;
45 } 55 }
@@ -176,6 +186,39 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
176 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1); 186 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
177} 187}
178 188
189static int proc_sys_open(struct inode *inode, struct file *filp)
190{
191 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
192
193 if (table->poll)
194 filp->private_data = proc_sys_poll_event(table->poll);
195
196 return 0;
197}
198
199static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
200{
201 struct inode *inode = filp->f_path.dentry->d_inode;
202 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
203 unsigned long event = (unsigned long)filp->private_data;
204 unsigned int ret = DEFAULT_POLLMASK;
205
206 if (!table->proc_handler)
207 goto out;
208
209 if (!table->poll)
210 goto out;
211
212 poll_wait(filp, &table->poll->wait, wait);
213
214 if (event != atomic_read(&table->poll->event)) {
215 filp->private_data = proc_sys_poll_event(table->poll);
216 ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI;
217 }
218
219out:
220 return ret;
221}
179 222
180static int proc_sys_fill_cache(struct file *filp, void *dirent, 223static int proc_sys_fill_cache(struct file *filp, void *dirent,
181 filldir_t filldir, 224 filldir_t filldir,
@@ -364,12 +407,15 @@ static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
364} 407}
365 408
366static const struct file_operations proc_sys_file_operations = { 409static const struct file_operations proc_sys_file_operations = {
410 .open = proc_sys_open,
411 .poll = proc_sys_poll,
367 .read = proc_sys_read, 412 .read = proc_sys_read,
368 .write = proc_sys_write, 413 .write = proc_sys_write,
369 .llseek = default_llseek, 414 .llseek = default_llseek,
370}; 415};
371 416
372static const struct file_operations proc_sys_dir_file_operations = { 417static const struct file_operations proc_sys_dir_file_operations = {
418 .read = generic_read_dir,
373 .readdir = proc_sys_readdir, 419 .readdir = proc_sys_readdir,
374 .llseek = generic_file_llseek, 420 .llseek = generic_file_llseek,
375}; 421};
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9a8a2b77b874..46a15d8a29ca 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,6 +18,7 @@
18#include <linux/bitops.h> 18#include <linux/bitops.h>
19#include <linux/mount.h> 19#include <linux/mount.h>
20#include <linux/pid_namespace.h> 20#include <linux/pid_namespace.h>
21#include <linux/parser.h>
21 22
22#include "internal.h" 23#include "internal.h"
23 24
@@ -36,6 +37,63 @@ static int proc_set_super(struct super_block *sb, void *data)
36 return err; 37 return err;
37} 38}
38 39
40enum {
41 Opt_gid, Opt_hidepid, Opt_err,
42};
43
44static const match_table_t tokens = {
45 {Opt_hidepid, "hidepid=%u"},
46 {Opt_gid, "gid=%u"},
47 {Opt_err, NULL},
48};
49
50static int proc_parse_options(char *options, struct pid_namespace *pid)
51{
52 char *p;
53 substring_t args[MAX_OPT_ARGS];
54 int option;
55
56 if (!options)
57 return 1;
58
59 while ((p = strsep(&options, ",")) != NULL) {
60 int token;
61 if (!*p)
62 continue;
63
64 args[0].to = args[0].from = 0;
65 token = match_token(p, tokens, args);
66 switch (token) {
67 case Opt_gid:
68 if (match_int(&args[0], &option))
69 return 0;
70 pid->pid_gid = option;
71 break;
72 case Opt_hidepid:
73 if (match_int(&args[0], &option))
74 return 0;
75 if (option < 0 || option > 2) {
76 pr_err("proc: hidepid value must be between 0 and 2.\n");
77 return 0;
78 }
79 pid->hide_pid = option;
80 break;
81 default:
82 pr_err("proc: unrecognized mount option \"%s\" "
83 "or missing value\n", p);
84 return 0;
85 }
86 }
87
88 return 1;
89}
90
91int proc_remount(struct super_block *sb, int *flags, char *data)
92{
93 struct pid_namespace *pid = sb->s_fs_info;
94 return !proc_parse_options(data, pid);
95}
96
39static struct dentry *proc_mount(struct file_system_type *fs_type, 97static struct dentry *proc_mount(struct file_system_type *fs_type,
40 int flags, const char *dev_name, void *data) 98 int flags, const char *dev_name, void *data)
41{ 99{
@@ -43,11 +101,15 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
43 struct super_block *sb; 101 struct super_block *sb;
44 struct pid_namespace *ns; 102 struct pid_namespace *ns;
45 struct proc_inode *ei; 103 struct proc_inode *ei;
104 char *options;
46 105
47 if (flags & MS_KERNMOUNT) 106 if (flags & MS_KERNMOUNT) {
48 ns = (struct pid_namespace *)data; 107 ns = (struct pid_namespace *)data;
49 else 108 options = NULL;
109 } else {
50 ns = current->nsproxy->pid_ns; 110 ns = current->nsproxy->pid_ns;
111 options = data;
112 }
51 113
52 sb = sget(fs_type, proc_test_super, proc_set_super, ns); 114 sb = sget(fs_type, proc_test_super, proc_set_super, ns);
53 if (IS_ERR(sb)) 115 if (IS_ERR(sb))
@@ -55,6 +117,10 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
55 117
56 if (!sb->s_root) { 118 if (!sb->s_root) {
57 sb->s_flags = flags; 119 sb->s_flags = flags;
120 if (!proc_parse_options(options, ns)) {
121 deactivate_locked_super(sb);
122 return ERR_PTR(-EINVAL);
123 }
58 err = proc_fill_super(sb); 124 err = proc_fill_super(sb);
59 if (err) { 125 if (err) {
60 deactivate_locked_super(sb); 126 deactivate_locked_super(sb);
@@ -91,20 +157,18 @@ static struct file_system_type proc_fs_type = {
91 157
92void __init proc_root_init(void) 158void __init proc_root_init(void)
93{ 159{
94 struct vfsmount *mnt;
95 int err; 160 int err;
96 161
97 proc_init_inodecache(); 162 proc_init_inodecache();
98 err = register_filesystem(&proc_fs_type); 163 err = register_filesystem(&proc_fs_type);
99 if (err) 164 if (err)
100 return; 165 return;
101 mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); 166 err = pid_ns_prepare_proc(&init_pid_ns);
102 if (IS_ERR(mnt)) { 167 if (err) {
103 unregister_filesystem(&proc_fs_type); 168 unregister_filesystem(&proc_fs_type);
104 return; 169 return;
105 } 170 }
106 171
107 init_pid_ns.proc_mnt = mnt;
108 proc_symlink("mounts", NULL, "self/mounts"); 172 proc_symlink("mounts", NULL, "self/mounts");
109 173
110 proc_net_init(); 174 proc_net_init();
@@ -209,5 +273,5 @@ int pid_ns_prepare_proc(struct pid_namespace *ns)
209 273
210void pid_ns_release_proc(struct pid_namespace *ns) 274void pid_ns_release_proc(struct pid_namespace *ns)
211{ 275{
212 mntput(ns->proc_mnt); 276 kern_unmount(ns->proc_mnt);
213} 277}
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 9758b654a1bc..d76ca6ae2b1b 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -10,6 +10,7 @@
10#include <linux/time.h> 10#include <linux/time.h>
11#include <linux/irqnr.h> 11#include <linux/irqnr.h>
12#include <asm/cputime.h> 12#include <asm/cputime.h>
13#include <linux/tick.h>
13 14
14#ifndef arch_irq_stat_cpu 15#ifndef arch_irq_stat_cpu
15#define arch_irq_stat_cpu(cpu) 0 16#define arch_irq_stat_cpu(cpu) 0
@@ -21,38 +22,61 @@
21#define arch_idle_time(cpu) 0 22#define arch_idle_time(cpu) 0
22#endif 23#endif
23 24
25static u64 get_idle_time(int cpu)
26{
27 u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL);
28
29 if (idle_time == -1ULL) {
30 /* !NO_HZ so we can rely on cpustat.idle */
31 idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
32 idle += arch_idle_time(cpu);
33 } else
34 idle = usecs_to_cputime64(idle_time);
35
36 return idle;
37}
38
39static u64 get_iowait_time(int cpu)
40{
41 u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL);
42
43 if (iowait_time == -1ULL)
44 /* !NO_HZ so we can rely on cpustat.iowait */
45 iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
46 else
47 iowait = usecs_to_cputime64(iowait_time);
48
49 return iowait;
50}
51
24static int show_stat(struct seq_file *p, void *v) 52static int show_stat(struct seq_file *p, void *v)
25{ 53{
26 int i, j; 54 int i, j;
27 unsigned long jif; 55 unsigned long jif;
28 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; 56 u64 user, nice, system, idle, iowait, irq, softirq, steal;
29 cputime64_t guest, guest_nice; 57 u64 guest, guest_nice;
30 u64 sum = 0; 58 u64 sum = 0;
31 u64 sum_softirq = 0; 59 u64 sum_softirq = 0;
32 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; 60 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
33 struct timespec boottime; 61 struct timespec boottime;
34 62
35 user = nice = system = idle = iowait = 63 user = nice = system = idle = iowait =
36 irq = softirq = steal = cputime64_zero; 64 irq = softirq = steal = 0;
37 guest = guest_nice = cputime64_zero; 65 guest = guest_nice = 0;
38 getboottime(&boottime); 66 getboottime(&boottime);
39 jif = boottime.tv_sec; 67 jif = boottime.tv_sec;
40 68
41 for_each_possible_cpu(i) { 69 for_each_possible_cpu(i) {
42 user = cputime64_add(user, kstat_cpu(i).cpustat.user); 70 user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
43 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); 71 nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
44 system = cputime64_add(system, kstat_cpu(i).cpustat.system); 72 system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
45 idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); 73 idle += get_idle_time(i);
46 idle = cputime64_add(idle, arch_idle_time(i)); 74 iowait += get_iowait_time(i);
47 iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); 75 irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
48 irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); 76 softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
49 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); 77 steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
50 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); 78 guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
51 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); 79 guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
52 guest_nice = cputime64_add(guest_nice,
53 kstat_cpu(i).cpustat.guest_nice);
54 sum += kstat_cpu_irqs_sum(i);
55 sum += arch_irq_stat_cpu(i);
56 80
57 for (j = 0; j < NR_SOFTIRQS; j++) { 81 for (j = 0; j < NR_SOFTIRQS; j++) {
58 unsigned int softirq_stat = kstat_softirqs_cpu(j, i); 82 unsigned int softirq_stat = kstat_softirqs_cpu(j, i);
@@ -76,19 +100,17 @@ static int show_stat(struct seq_file *p, void *v)
76 (unsigned long long)cputime64_to_clock_t(guest), 100 (unsigned long long)cputime64_to_clock_t(guest),
77 (unsigned long long)cputime64_to_clock_t(guest_nice)); 101 (unsigned long long)cputime64_to_clock_t(guest_nice));
78 for_each_online_cpu(i) { 102 for_each_online_cpu(i) {
79
80 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ 103 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
81 user = kstat_cpu(i).cpustat.user; 104 user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
82 nice = kstat_cpu(i).cpustat.nice; 105 nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
83 system = kstat_cpu(i).cpustat.system; 106 system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
84 idle = kstat_cpu(i).cpustat.idle; 107 idle = get_idle_time(i);
85 idle = cputime64_add(idle, arch_idle_time(i)); 108 iowait = get_iowait_time(i);
86 iowait = kstat_cpu(i).cpustat.iowait; 109 irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
87 irq = kstat_cpu(i).cpustat.irq; 110 softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
88 softirq = kstat_cpu(i).cpustat.softirq; 111 steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
89 steal = kstat_cpu(i).cpustat.steal; 112 guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
90 guest = kstat_cpu(i).cpustat.guest; 113 guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
91 guest_nice = kstat_cpu(i).cpustat.guest_nice;
92 seq_printf(p, 114 seq_printf(p,
93 "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu " 115 "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
94 "%llu\n", 116 "%llu\n",
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5afaa58a8630..e418c5abdb0e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -44,6 +44,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
44 "VmPeak:\t%8lu kB\n" 44 "VmPeak:\t%8lu kB\n"
45 "VmSize:\t%8lu kB\n" 45 "VmSize:\t%8lu kB\n"
46 "VmLck:\t%8lu kB\n" 46 "VmLck:\t%8lu kB\n"
47 "VmPin:\t%8lu kB\n"
47 "VmHWM:\t%8lu kB\n" 48 "VmHWM:\t%8lu kB\n"
48 "VmRSS:\t%8lu kB\n" 49 "VmRSS:\t%8lu kB\n"
49 "VmData:\t%8lu kB\n" 50 "VmData:\t%8lu kB\n"
@@ -55,6 +56,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
55 hiwater_vm << (PAGE_SHIFT-10), 56 hiwater_vm << (PAGE_SHIFT-10),
56 (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), 57 (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
57 mm->locked_vm << (PAGE_SHIFT-10), 58 mm->locked_vm << (PAGE_SHIFT-10),
59 mm->pinned_vm << (PAGE_SHIFT-10),
58 hiwater_rss << (PAGE_SHIFT-10), 60 hiwater_rss << (PAGE_SHIFT-10),
59 total_rss << (PAGE_SHIFT-10), 61 total_rss << (PAGE_SHIFT-10),
60 data << (PAGE_SHIFT-10), 62 data << (PAGE_SHIFT-10),
@@ -1039,6 +1041,9 @@ static int show_numa_map(struct seq_file *m, void *v)
1039 seq_printf(m, " stack"); 1041 seq_printf(m, " stack");
1040 } 1042 }
1041 1043
1044 if (is_vm_hugetlb_page(vma))
1045 seq_printf(m, " huge");
1046
1042 walk_page_range(vma->vm_start, vma->vm_end, &walk); 1047 walk_page_range(vma->vm_start, vma->vm_end, &walk);
1043 1048
1044 if (!md->pages) 1049 if (!md->pages)
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 766b1d456050..9610ac772d7e 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -11,15 +11,20 @@ static int uptime_proc_show(struct seq_file *m, void *v)
11{ 11{
12 struct timespec uptime; 12 struct timespec uptime;
13 struct timespec idle; 13 struct timespec idle;
14 u64 idletime;
15 u64 nsec;
16 u32 rem;
14 int i; 17 int i;
15 cputime_t idletime = cputime_zero;
16 18
19 idletime = 0;
17 for_each_possible_cpu(i) 20 for_each_possible_cpu(i)
18 idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle); 21 idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
19 22
20 do_posix_clock_monotonic_gettime(&uptime); 23 do_posix_clock_monotonic_gettime(&uptime);
21 monotonic_to_bootbased(&uptime); 24 monotonic_to_bootbased(&uptime);
22 cputime_to_timespec(idletime, &idle); 25 nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
26 idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
27 idle.tv_nsec = rem;
23 seq_printf(m, "%lu.%02lu %lu.%02lu\n", 28 seq_printf(m, "%lu.%02lu %lu.%02lu\n",
24 (unsigned long) uptime.tv_sec, 29 (unsigned long) uptime.tv_sec,
25 (uptime.tv_nsec / (NSEC_PER_SEC / 100)), 30 (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index cd99bf557650..b0f450a2bb7c 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -12,6 +12,7 @@
12#include <linux/user.h> 12#include <linux/user.h>
13#include <linux/elf.h> 13#include <linux/elf.h>
14#include <linux/elfcore.h> 14#include <linux/elfcore.h>
15#include <linux/export.h>
15#include <linux/slab.h> 16#include <linux/slab.h>
16#include <linux/highmem.h> 17#include <linux/highmem.h>
17#include <linux/bootmem.h> 18#include <linux/bootmem.h>