diff options
Diffstat (limited to 'fs/proc')
| -rw-r--r-- | fs/proc/array.c | 134 | ||||
| -rw-r--r-- | fs/proc/base.c | 178 | ||||
| -rw-r--r-- | fs/proc/inode.c | 23 | ||||
| -rw-r--r-- | fs/proc/internal.h | 12 | ||||
| -rw-r--r-- | fs/proc/kcore.c | 8 | ||||
| -rw-r--r-- | fs/proc/namespaces.c | 8 | ||||
| -rw-r--r-- | fs/proc/page.c | 2 | ||||
| -rw-r--r-- | fs/proc/proc_sysctl.c | 1276 | ||||
| -rw-r--r-- | fs/proc/root.c | 11 | ||||
| -rw-r--r-- | fs/proc/stat.c | 96 | ||||
| -rw-r--r-- | fs/proc/task_mmu.c | 370 | ||||
| -rw-r--r-- | fs/proc/task_nommu.c | 69 | ||||
| -rw-r--r-- | fs/proc/vmcore.c | 23 |
13 files changed, 1836 insertions, 374 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c index c602b8d20f06..dc4c5a7b9ece 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
| @@ -81,6 +81,7 @@ | |||
| 81 | #include <linux/pid_namespace.h> | 81 | #include <linux/pid_namespace.h> |
| 82 | #include <linux/ptrace.h> | 82 | #include <linux/ptrace.h> |
| 83 | #include <linux/tracehook.h> | 83 | #include <linux/tracehook.h> |
| 84 | #include <linux/user_namespace.h> | ||
| 84 | 85 | ||
| 85 | #include <asm/pgtable.h> | 86 | #include <asm/pgtable.h> |
| 86 | #include <asm/processor.h> | 87 | #include <asm/processor.h> |
| @@ -161,6 +162,7 @@ static inline const char *get_task_state(struct task_struct *tsk) | |||
| 161 | static inline void task_state(struct seq_file *m, struct pid_namespace *ns, | 162 | static inline void task_state(struct seq_file *m, struct pid_namespace *ns, |
| 162 | struct pid *pid, struct task_struct *p) | 163 | struct pid *pid, struct task_struct *p) |
| 163 | { | 164 | { |
| 165 | struct user_namespace *user_ns = current_user_ns(); | ||
| 164 | struct group_info *group_info; | 166 | struct group_info *group_info; |
| 165 | int g; | 167 | int g; |
| 166 | struct fdtable *fdt = NULL; | 168 | struct fdtable *fdt = NULL; |
| @@ -189,8 +191,14 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, | |||
| 189 | task_tgid_nr_ns(p, ns), | 191 | task_tgid_nr_ns(p, ns), |
| 190 | pid_nr_ns(pid, ns), | 192 | pid_nr_ns(pid, ns), |
| 191 | ppid, tpid, | 193 | ppid, tpid, |
| 192 | cred->uid, cred->euid, cred->suid, cred->fsuid, | 194 | from_kuid_munged(user_ns, cred->uid), |
| 193 | cred->gid, cred->egid, cred->sgid, cred->fsgid); | 195 | from_kuid_munged(user_ns, cred->euid), |
| 196 | from_kuid_munged(user_ns, cred->suid), | ||
| 197 | from_kuid_munged(user_ns, cred->fsuid), | ||
| 198 | from_kgid_munged(user_ns, cred->gid), | ||
| 199 | from_kgid_munged(user_ns, cred->egid), | ||
| 200 | from_kgid_munged(user_ns, cred->sgid), | ||
| 201 | from_kgid_munged(user_ns, cred->fsgid)); | ||
| 194 | 202 | ||
| 195 | task_lock(p); | 203 | task_lock(p); |
| 196 | if (p->files) | 204 | if (p->files) |
| @@ -205,7 +213,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, | |||
| 205 | task_unlock(p); | 213 | task_unlock(p); |
| 206 | 214 | ||
| 207 | for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) | 215 | for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) |
| 208 | seq_printf(m, "%d ", GROUP_AT(group_info, g)); | 216 | seq_printf(m, "%d ", |
| 217 | from_kgid_munged(user_ns, GROUP_AT(group_info, g))); | ||
| 209 | put_cred(cred); | 218 | put_cred(cred); |
| 210 | 219 | ||
| 211 | seq_putc(m, '\n'); | 220 | seq_putc(m, '\n'); |
| @@ -462,59 +471,56 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
| 462 | /* convert nsec -> ticks */ | 471 | /* convert nsec -> ticks */ |
| 463 | start_time = nsec_to_clock_t(start_time); | 472 | start_time = nsec_to_clock_t(start_time); |
| 464 | 473 | ||
| 465 | seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ | 474 | seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state); |
| 466 | %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ | 475 | seq_put_decimal_ll(m, ' ', ppid); |
| 467 | %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld %lu %lu %lu\n", | 476 | seq_put_decimal_ll(m, ' ', pgid); |
| 468 | pid_nr_ns(pid, ns), | 477 | seq_put_decimal_ll(m, ' ', sid); |
| 469 | tcomm, | 478 | seq_put_decimal_ll(m, ' ', tty_nr); |
| 470 | state, | 479 | seq_put_decimal_ll(m, ' ', tty_pgrp); |
| 471 | ppid, | 480 | seq_put_decimal_ull(m, ' ', task->flags); |
| 472 | pgid, | 481 | seq_put_decimal_ull(m, ' ', min_flt); |
| 473 | sid, | 482 | seq_put_decimal_ull(m, ' ', cmin_flt); |
| 474 | tty_nr, | 483 | seq_put_decimal_ull(m, ' ', maj_flt); |
| 475 | tty_pgrp, | 484 | seq_put_decimal_ull(m, ' ', cmaj_flt); |
| 476 | task->flags, | 485 | seq_put_decimal_ull(m, ' ', cputime_to_clock_t(utime)); |
| 477 | min_flt, | 486 | seq_put_decimal_ull(m, ' ', cputime_to_clock_t(stime)); |
| 478 | cmin_flt, | 487 | seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cutime)); |
| 479 | maj_flt, | 488 | seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cstime)); |
| 480 | cmaj_flt, | 489 | seq_put_decimal_ll(m, ' ', priority); |
| 481 | cputime_to_clock_t(utime), | 490 | seq_put_decimal_ll(m, ' ', nice); |
| 482 | cputime_to_clock_t(stime), | 491 | seq_put_decimal_ll(m, ' ', num_threads); |
| 483 | cputime_to_clock_t(cutime), | 492 | seq_put_decimal_ull(m, ' ', 0); |
| 484 | cputime_to_clock_t(cstime), | 493 | seq_put_decimal_ull(m, ' ', start_time); |
| 485 | priority, | 494 | seq_put_decimal_ull(m, ' ', vsize); |
| 486 | nice, | 495 | seq_put_decimal_ll(m, ' ', mm ? get_mm_rss(mm) : 0); |
| 487 | num_threads, | 496 | seq_put_decimal_ull(m, ' ', rsslim); |
| 488 | start_time, | 497 | seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->start_code : 1) : 0); |
| 489 | vsize, | 498 | seq_put_decimal_ull(m, ' ', mm ? (permitted ? mm->end_code : 1) : 0); |
| 490 | mm ? get_mm_rss(mm) : 0, | 499 | seq_put_decimal_ull(m, ' ', (permitted && mm) ? mm->start_stack : 0); |
| 491 | rsslim, | 500 | seq_put_decimal_ull(m, ' ', esp); |
| 492 | mm ? (permitted ? mm->start_code : 1) : 0, | 501 | seq_put_decimal_ull(m, ' ', eip); |
| 493 | mm ? (permitted ? mm->end_code : 1) : 0, | 502 | /* The signal information here is obsolete. |
| 494 | (permitted && mm) ? mm->start_stack : 0, | 503 | * It must be decimal for Linux 2.0 compatibility. |
| 495 | esp, | 504 | * Use /proc/#/status for real-time signals. |
| 496 | eip, | 505 | */ |
| 497 | /* The signal information here is obsolete. | 506 | seq_put_decimal_ull(m, ' ', task->pending.signal.sig[0] & 0x7fffffffUL); |
| 498 | * It must be decimal for Linux 2.0 compatibility. | 507 | seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL); |
| 499 | * Use /proc/#/status for real-time signals. | 508 | seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL); |
| 500 | */ | 509 | seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL); |
| 501 | task->pending.signal.sig[0] & 0x7fffffffUL, | 510 | seq_put_decimal_ull(m, ' ', wchan); |
| 502 | task->blocked.sig[0] & 0x7fffffffUL, | 511 | seq_put_decimal_ull(m, ' ', 0); |
| 503 | sigign .sig[0] & 0x7fffffffUL, | 512 | seq_put_decimal_ull(m, ' ', 0); |
| 504 | sigcatch .sig[0] & 0x7fffffffUL, | 513 | seq_put_decimal_ll(m, ' ', task->exit_signal); |
| 505 | wchan, | 514 | seq_put_decimal_ll(m, ' ', task_cpu(task)); |
| 506 | 0UL, | 515 | seq_put_decimal_ull(m, ' ', task->rt_priority); |
| 507 | 0UL, | 516 | seq_put_decimal_ull(m, ' ', task->policy); |
| 508 | task->exit_signal, | 517 | seq_put_decimal_ull(m, ' ', delayacct_blkio_ticks(task)); |
| 509 | task_cpu(task), | 518 | seq_put_decimal_ull(m, ' ', cputime_to_clock_t(gtime)); |
| 510 | task->rt_priority, | 519 | seq_put_decimal_ll(m, ' ', cputime_to_clock_t(cgtime)); |
| 511 | task->policy, | 520 | seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_data : 0); |
| 512 | (unsigned long long)delayacct_blkio_ticks(task), | 521 | seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->end_data : 0); |
| 513 | cputime_to_clock_t(gtime), | 522 | seq_put_decimal_ull(m, ' ', (mm && permitted) ? mm->start_brk : 0); |
| 514 | cputime_to_clock_t(cgtime), | 523 | seq_putc(m, '\n'); |
| 515 | (mm && permitted) ? mm->start_data : 0, | ||
| 516 | (mm && permitted) ? mm->end_data : 0, | ||
| 517 | (mm && permitted) ? mm->start_brk : 0); | ||
| 518 | if (mm) | 524 | if (mm) |
| 519 | mmput(mm); | 525 | mmput(mm); |
| 520 | return 0; | 526 | return 0; |
| @@ -542,8 +548,20 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, | |||
| 542 | size = task_statm(mm, &shared, &text, &data, &resident); | 548 | size = task_statm(mm, &shared, &text, &data, &resident); |
| 543 | mmput(mm); | 549 | mmput(mm); |
| 544 | } | 550 | } |
| 545 | seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n", | 551 | /* |
| 546 | size, resident, shared, text, data); | 552 | * For quick read, open code by putting numbers directly |
| 553 | * expected format is | ||
| 554 | * seq_printf(m, "%lu %lu %lu %lu 0 %lu 0\n", | ||
| 555 | * size, resident, shared, text, data); | ||
| 556 | */ | ||
| 557 | seq_put_decimal_ull(m, 0, size); | ||
| 558 | seq_put_decimal_ull(m, ' ', resident); | ||
| 559 | seq_put_decimal_ull(m, ' ', shared); | ||
| 560 | seq_put_decimal_ull(m, ' ', text); | ||
| 561 | seq_put_decimal_ull(m, ' ', 0); | ||
| 562 | seq_put_decimal_ull(m, ' ', data); | ||
| 563 | seq_put_decimal_ull(m, ' ', 0); | ||
| 564 | seq_putc(m, '\n'); | ||
| 547 | 565 | ||
| 548 | return 0; | 566 | return 0; |
| 549 | } | 567 | } |
diff --git a/fs/proc/base.c b/fs/proc/base.c index d4548dd49b02..d7d711876b6a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
| @@ -81,6 +81,7 @@ | |||
| 81 | #include <linux/oom.h> | 81 | #include <linux/oom.h> |
| 82 | #include <linux/elf.h> | 82 | #include <linux/elf.h> |
| 83 | #include <linux/pid_namespace.h> | 83 | #include <linux/pid_namespace.h> |
| 84 | #include <linux/user_namespace.h> | ||
| 84 | #include <linux/fs_struct.h> | 85 | #include <linux/fs_struct.h> |
| 85 | #include <linux/slab.h> | 86 | #include <linux/slab.h> |
| 86 | #include <linux/flex_array.h> | 87 | #include <linux/flex_array.h> |
| @@ -410,12 +411,13 @@ static const struct file_operations proc_lstats_operations = { | |||
| 410 | 411 | ||
| 411 | static int proc_oom_score(struct task_struct *task, char *buffer) | 412 | static int proc_oom_score(struct task_struct *task, char *buffer) |
| 412 | { | 413 | { |
| 414 | unsigned long totalpages = totalram_pages + total_swap_pages; | ||
| 413 | unsigned long points = 0; | 415 | unsigned long points = 0; |
| 414 | 416 | ||
| 415 | read_lock(&tasklist_lock); | 417 | read_lock(&tasklist_lock); |
| 416 | if (pid_alive(task)) | 418 | if (pid_alive(task)) |
| 417 | points = oom_badness(task, NULL, NULL, | 419 | points = oom_badness(task, NULL, NULL, totalpages) * |
| 418 | totalram_pages + total_swap_pages); | 420 | 1000 / totalpages; |
| 419 | read_unlock(&tasklist_lock); | 421 | read_unlock(&tasklist_lock); |
| 420 | return sprintf(buffer, "%lu\n", points); | 422 | return sprintf(buffer, "%lu\n", points); |
| 421 | } | 423 | } |
| @@ -1310,8 +1312,7 @@ sched_autogroup_write(struct file *file, const char __user *buf, | |||
| 1310 | if (!p) | 1312 | if (!p) |
| 1311 | return -ESRCH; | 1313 | return -ESRCH; |
| 1312 | 1314 | ||
| 1313 | err = nice; | 1315 | err = proc_sched_autogroup_set_nice(p, nice); |
| 1314 | err = proc_sched_autogroup_set_nice(p, &err); | ||
| 1315 | if (err) | 1316 | if (err) |
| 1316 | count = err; | 1317 | count = err; |
| 1317 | 1318 | ||
| @@ -1562,8 +1563,8 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
| 1562 | generic_fillattr(inode, stat); | 1563 | generic_fillattr(inode, stat); |
| 1563 | 1564 | ||
| 1564 | rcu_read_lock(); | 1565 | rcu_read_lock(); |
| 1565 | stat->uid = 0; | 1566 | stat->uid = GLOBAL_ROOT_UID; |
| 1566 | stat->gid = 0; | 1567 | stat->gid = GLOBAL_ROOT_GID; |
| 1567 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | 1568 | task = pid_task(proc_pid(inode), PIDTYPE_PID); |
| 1568 | if (task) { | 1569 | if (task) { |
| 1569 | if (!has_pid_permissions(pid, task, 2)) { | 1570 | if (!has_pid_permissions(pid, task, 2)) { |
| @@ -1623,8 +1624,8 @@ int pid_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
| 1623 | inode->i_gid = cred->egid; | 1624 | inode->i_gid = cred->egid; |
| 1624 | rcu_read_unlock(); | 1625 | rcu_read_unlock(); |
| 1625 | } else { | 1626 | } else { |
| 1626 | inode->i_uid = 0; | 1627 | inode->i_uid = GLOBAL_ROOT_UID; |
| 1627 | inode->i_gid = 0; | 1628 | inode->i_gid = GLOBAL_ROOT_GID; |
| 1628 | } | 1629 | } |
| 1629 | inode->i_mode &= ~(S_ISUID | S_ISGID); | 1630 | inode->i_mode &= ~(S_ISUID | S_ISGID); |
| 1630 | security_task_to_inode(task, inode); | 1631 | security_task_to_inode(task, inode); |
| @@ -1754,7 +1755,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info) | |||
| 1754 | 1755 | ||
| 1755 | fdt = files_fdtable(files); | 1756 | fdt = files_fdtable(files); |
| 1756 | f_flags = file->f_flags & ~O_CLOEXEC; | 1757 | f_flags = file->f_flags & ~O_CLOEXEC; |
| 1757 | if (FD_ISSET(fd, fdt->close_on_exec)) | 1758 | if (close_on_exec(fd, fdt)) |
| 1758 | f_flags |= O_CLOEXEC; | 1759 | f_flags |= O_CLOEXEC; |
| 1759 | 1760 | ||
| 1760 | if (path) { | 1761 | if (path) { |
| @@ -1800,10 +1801,15 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
| 1800 | if (task) { | 1801 | if (task) { |
| 1801 | files = get_files_struct(task); | 1802 | files = get_files_struct(task); |
| 1802 | if (files) { | 1803 | if (files) { |
| 1804 | struct file *file; | ||
| 1803 | rcu_read_lock(); | 1805 | rcu_read_lock(); |
| 1804 | if (fcheck_files(files, fd)) { | 1806 | file = fcheck_files(files, fd); |
| 1807 | if (file) { | ||
| 1808 | unsigned i_mode, f_mode = file->f_mode; | ||
| 1809 | |||
| 1805 | rcu_read_unlock(); | 1810 | rcu_read_unlock(); |
| 1806 | put_files_struct(files); | 1811 | put_files_struct(files); |
| 1812 | |||
| 1807 | if (task_dumpable(task)) { | 1813 | if (task_dumpable(task)) { |
| 1808 | rcu_read_lock(); | 1814 | rcu_read_lock(); |
| 1809 | cred = __task_cred(task); | 1815 | cred = __task_cred(task); |
| @@ -1811,10 +1817,17 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
| 1811 | inode->i_gid = cred->egid; | 1817 | inode->i_gid = cred->egid; |
| 1812 | rcu_read_unlock(); | 1818 | rcu_read_unlock(); |
| 1813 | } else { | 1819 | } else { |
| 1814 | inode->i_uid = 0; | 1820 | inode->i_uid = GLOBAL_ROOT_UID; |
| 1815 | inode->i_gid = 0; | 1821 | inode->i_gid = GLOBAL_ROOT_GID; |
| 1816 | } | 1822 | } |
| 1817 | inode->i_mode &= ~(S_ISUID | S_ISGID); | 1823 | |
| 1824 | i_mode = S_IFLNK; | ||
| 1825 | if (f_mode & FMODE_READ) | ||
| 1826 | i_mode |= S_IRUSR | S_IXUSR; | ||
| 1827 | if (f_mode & FMODE_WRITE) | ||
| 1828 | i_mode |= S_IWUSR | S_IXUSR; | ||
| 1829 | inode->i_mode = i_mode; | ||
| 1830 | |||
| 1818 | security_task_to_inode(task, inode); | 1831 | security_task_to_inode(task, inode); |
| 1819 | put_task_struct(task); | 1832 | put_task_struct(task); |
| 1820 | return 1; | 1833 | return 1; |
| @@ -1838,8 +1851,6 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, | |||
| 1838 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 1851 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
| 1839 | { | 1852 | { |
| 1840 | unsigned fd = *(const unsigned *)ptr; | 1853 | unsigned fd = *(const unsigned *)ptr; |
| 1841 | struct file *file; | ||
| 1842 | struct files_struct *files; | ||
| 1843 | struct inode *inode; | 1854 | struct inode *inode; |
| 1844 | struct proc_inode *ei; | 1855 | struct proc_inode *ei; |
| 1845 | struct dentry *error = ERR_PTR(-ENOENT); | 1856 | struct dentry *error = ERR_PTR(-ENOENT); |
| @@ -1849,25 +1860,6 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, | |||
| 1849 | goto out; | 1860 | goto out; |
| 1850 | ei = PROC_I(inode); | 1861 | ei = PROC_I(inode); |
| 1851 | ei->fd = fd; | 1862 | ei->fd = fd; |
| 1852 | files = get_files_struct(task); | ||
| 1853 | if (!files) | ||
| 1854 | goto out_iput; | ||
| 1855 | inode->i_mode = S_IFLNK; | ||
| 1856 | |||
| 1857 | /* | ||
| 1858 | * We are not taking a ref to the file structure, so we must | ||
| 1859 | * hold ->file_lock. | ||
| 1860 | */ | ||
| 1861 | spin_lock(&files->file_lock); | ||
| 1862 | file = fcheck_files(files, fd); | ||
| 1863 | if (!file) | ||
| 1864 | goto out_unlock; | ||
| 1865 | if (file->f_mode & FMODE_READ) | ||
| 1866 | inode->i_mode |= S_IRUSR | S_IXUSR; | ||
| 1867 | if (file->f_mode & FMODE_WRITE) | ||
| 1868 | inode->i_mode |= S_IWUSR | S_IXUSR; | ||
| 1869 | spin_unlock(&files->file_lock); | ||
| 1870 | put_files_struct(files); | ||
| 1871 | 1863 | ||
| 1872 | inode->i_op = &proc_pid_link_inode_operations; | 1864 | inode->i_op = &proc_pid_link_inode_operations; |
| 1873 | inode->i_size = 64; | 1865 | inode->i_size = 64; |
| @@ -1880,12 +1872,6 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, | |||
| 1880 | 1872 | ||
| 1881 | out: | 1873 | out: |
| 1882 | return error; | 1874 | return error; |
| 1883 | out_unlock: | ||
| 1884 | spin_unlock(&files->file_lock); | ||
| 1885 | put_files_struct(files); | ||
| 1886 | out_iput: | ||
| 1887 | iput(inode); | ||
| 1888 | goto out; | ||
| 1889 | } | 1875 | } |
| 1890 | 1876 | ||
| 1891 | static struct dentry *proc_lookupfd_common(struct inode *dir, | 1877 | static struct dentry *proc_lookupfd_common(struct inode *dir, |
| @@ -2061,8 +2047,8 @@ static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
| 2061 | inode->i_gid = cred->egid; | 2047 | inode->i_gid = cred->egid; |
| 2062 | rcu_read_unlock(); | 2048 | rcu_read_unlock(); |
| 2063 | } else { | 2049 | } else { |
| 2064 | inode->i_uid = 0; | 2050 | inode->i_uid = GLOBAL_ROOT_UID; |
| 2065 | inode->i_gid = 0; | 2051 | inode->i_gid = GLOBAL_ROOT_GID; |
| 2066 | } | 2052 | } |
| 2067 | security_task_to_inode(task, inode); | 2053 | security_task_to_inode(task, inode); |
| 2068 | status = 1; | 2054 | status = 1; |
| @@ -2178,16 +2164,16 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, | |||
| 2178 | goto out; | 2164 | goto out; |
| 2179 | 2165 | ||
| 2180 | result = ERR_PTR(-EACCES); | 2166 | result = ERR_PTR(-EACCES); |
| 2181 | if (lock_trace(task)) | 2167 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
| 2182 | goto out_put_task; | 2168 | goto out_put_task; |
| 2183 | 2169 | ||
| 2184 | result = ERR_PTR(-ENOENT); | 2170 | result = ERR_PTR(-ENOENT); |
| 2185 | if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) | 2171 | if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) |
| 2186 | goto out_unlock; | 2172 | goto out_put_task; |
| 2187 | 2173 | ||
| 2188 | mm = get_task_mm(task); | 2174 | mm = get_task_mm(task); |
| 2189 | if (!mm) | 2175 | if (!mm) |
| 2190 | goto out_unlock; | 2176 | goto out_put_task; |
| 2191 | 2177 | ||
| 2192 | down_read(&mm->mmap_sem); | 2178 | down_read(&mm->mmap_sem); |
| 2193 | vma = find_exact_vma(mm, vm_start, vm_end); | 2179 | vma = find_exact_vma(mm, vm_start, vm_end); |
| @@ -2199,8 +2185,6 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, | |||
| 2199 | out_no_vma: | 2185 | out_no_vma: |
| 2200 | up_read(&mm->mmap_sem); | 2186 | up_read(&mm->mmap_sem); |
| 2201 | mmput(mm); | 2187 | mmput(mm); |
| 2202 | out_unlock: | ||
| 2203 | unlock_trace(task); | ||
| 2204 | out_put_task: | 2188 | out_put_task: |
| 2205 | put_task_struct(task); | 2189 | put_task_struct(task); |
| 2206 | out: | 2190 | out: |
| @@ -2234,7 +2218,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 2234 | goto out; | 2218 | goto out; |
| 2235 | 2219 | ||
| 2236 | ret = -EACCES; | 2220 | ret = -EACCES; |
| 2237 | if (lock_trace(task)) | 2221 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
| 2238 | goto out_put_task; | 2222 | goto out_put_task; |
| 2239 | 2223 | ||
| 2240 | ret = 0; | 2224 | ret = 0; |
| @@ -2242,12 +2226,12 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 2242 | case 0: | 2226 | case 0: |
| 2243 | ino = inode->i_ino; | 2227 | ino = inode->i_ino; |
| 2244 | if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0) | 2228 | if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0) |
| 2245 | goto out_unlock; | 2229 | goto out_put_task; |
| 2246 | filp->f_pos++; | 2230 | filp->f_pos++; |
| 2247 | case 1: | 2231 | case 1: |
| 2248 | ino = parent_ino(dentry); | 2232 | ino = parent_ino(dentry); |
| 2249 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | 2233 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) |
| 2250 | goto out_unlock; | 2234 | goto out_put_task; |
| 2251 | filp->f_pos++; | 2235 | filp->f_pos++; |
| 2252 | default: | 2236 | default: |
| 2253 | { | 2237 | { |
| @@ -2258,7 +2242,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 2258 | 2242 | ||
| 2259 | mm = get_task_mm(task); | 2243 | mm = get_task_mm(task); |
| 2260 | if (!mm) | 2244 | if (!mm) |
| 2261 | goto out_unlock; | 2245 | goto out_put_task; |
| 2262 | down_read(&mm->mmap_sem); | 2246 | down_read(&mm->mmap_sem); |
| 2263 | 2247 | ||
| 2264 | nr_files = 0; | 2248 | nr_files = 0; |
| @@ -2288,7 +2272,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 2288 | flex_array_free(fa); | 2272 | flex_array_free(fa); |
| 2289 | up_read(&mm->mmap_sem); | 2273 | up_read(&mm->mmap_sem); |
| 2290 | mmput(mm); | 2274 | mmput(mm); |
| 2291 | goto out_unlock; | 2275 | goto out_put_task; |
| 2292 | } | 2276 | } |
| 2293 | for (i = 0, vma = mm->mmap, pos = 2; vma; | 2277 | for (i = 0, vma = mm->mmap, pos = 2; vma; |
| 2294 | vma = vma->vm_next) { | 2278 | vma = vma->vm_next) { |
| @@ -2333,8 +2317,6 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 2333 | } | 2317 | } |
| 2334 | } | 2318 | } |
| 2335 | 2319 | ||
| 2336 | out_unlock: | ||
| 2337 | unlock_trace(task); | ||
| 2338 | out_put_task: | 2320 | out_put_task: |
| 2339 | put_task_struct(task); | 2321 | put_task_struct(task); |
| 2340 | out: | 2322 | out: |
| @@ -2944,6 +2926,74 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) | |||
| 2944 | } | 2926 | } |
| 2945 | #endif /* CONFIG_TASK_IO_ACCOUNTING */ | 2927 | #endif /* CONFIG_TASK_IO_ACCOUNTING */ |
| 2946 | 2928 | ||
| 2929 | #ifdef CONFIG_USER_NS | ||
| 2930 | static int proc_id_map_open(struct inode *inode, struct file *file, | ||
| 2931 | struct seq_operations *seq_ops) | ||
| 2932 | { | ||
| 2933 | struct user_namespace *ns = NULL; | ||
| 2934 | struct task_struct *task; | ||
| 2935 | struct seq_file *seq; | ||
| 2936 | int ret = -EINVAL; | ||
| 2937 | |||
| 2938 | task = get_proc_task(inode); | ||
| 2939 | if (task) { | ||
| 2940 | rcu_read_lock(); | ||
| 2941 | ns = get_user_ns(task_cred_xxx(task, user_ns)); | ||
| 2942 | rcu_read_unlock(); | ||
| 2943 | put_task_struct(task); | ||
| 2944 | } | ||
| 2945 | if (!ns) | ||
| 2946 | goto err; | ||
| 2947 | |||
| 2948 | ret = seq_open(file, seq_ops); | ||
| 2949 | if (ret) | ||
| 2950 | goto err_put_ns; | ||
| 2951 | |||
| 2952 | seq = file->private_data; | ||
| 2953 | seq->private = ns; | ||
| 2954 | |||
| 2955 | return 0; | ||
| 2956 | err_put_ns: | ||
| 2957 | put_user_ns(ns); | ||
| 2958 | err: | ||
| 2959 | return ret; | ||
| 2960 | } | ||
| 2961 | |||
| 2962 | static int proc_id_map_release(struct inode *inode, struct file *file) | ||
| 2963 | { | ||
| 2964 | struct seq_file *seq = file->private_data; | ||
| 2965 | struct user_namespace *ns = seq->private; | ||
| 2966 | put_user_ns(ns); | ||
| 2967 | return seq_release(inode, file); | ||
| 2968 | } | ||
| 2969 | |||
| 2970 | static int proc_uid_map_open(struct inode *inode, struct file *file) | ||
| 2971 | { | ||
| 2972 | return proc_id_map_open(inode, file, &proc_uid_seq_operations); | ||
| 2973 | } | ||
| 2974 | |||
| 2975 | static int proc_gid_map_open(struct inode *inode, struct file *file) | ||
| 2976 | { | ||
| 2977 | return proc_id_map_open(inode, file, &proc_gid_seq_operations); | ||
| 2978 | } | ||
| 2979 | |||
| 2980 | static const struct file_operations proc_uid_map_operations = { | ||
| 2981 | .open = proc_uid_map_open, | ||
| 2982 | .write = proc_uid_map_write, | ||
| 2983 | .read = seq_read, | ||
| 2984 | .llseek = seq_lseek, | ||
| 2985 | .release = proc_id_map_release, | ||
| 2986 | }; | ||
| 2987 | |||
| 2988 | static const struct file_operations proc_gid_map_operations = { | ||
| 2989 | .open = proc_gid_map_open, | ||
| 2990 | .write = proc_gid_map_write, | ||
| 2991 | .read = seq_read, | ||
| 2992 | .llseek = seq_lseek, | ||
| 2993 | .release = proc_id_map_release, | ||
| 2994 | }; | ||
| 2995 | #endif /* CONFIG_USER_NS */ | ||
| 2996 | |||
| 2947 | static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, | 2997 | static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, |
| 2948 | struct pid *pid, struct task_struct *task) | 2998 | struct pid *pid, struct task_struct *task) |
| 2949 | { | 2999 | { |
| @@ -2990,9 +3040,9 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
| 2990 | INF("cmdline", S_IRUGO, proc_pid_cmdline), | 3040 | INF("cmdline", S_IRUGO, proc_pid_cmdline), |
| 2991 | ONE("stat", S_IRUGO, proc_tgid_stat), | 3041 | ONE("stat", S_IRUGO, proc_tgid_stat), |
| 2992 | ONE("statm", S_IRUGO, proc_pid_statm), | 3042 | ONE("statm", S_IRUGO, proc_pid_statm), |
| 2993 | REG("maps", S_IRUGO, proc_maps_operations), | 3043 | REG("maps", S_IRUGO, proc_pid_maps_operations), |
| 2994 | #ifdef CONFIG_NUMA | 3044 | #ifdef CONFIG_NUMA |
| 2995 | REG("numa_maps", S_IRUGO, proc_numa_maps_operations), | 3045 | REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), |
| 2996 | #endif | 3046 | #endif |
| 2997 | REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), | 3047 | REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), |
| 2998 | LNK("cwd", proc_cwd_link), | 3048 | LNK("cwd", proc_cwd_link), |
| @@ -3003,7 +3053,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
| 3003 | REG("mountstats", S_IRUSR, proc_mountstats_operations), | 3053 | REG("mountstats", S_IRUSR, proc_mountstats_operations), |
| 3004 | #ifdef CONFIG_PROC_PAGE_MONITOR | 3054 | #ifdef CONFIG_PROC_PAGE_MONITOR |
| 3005 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), | 3055 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), |
| 3006 | REG("smaps", S_IRUGO, proc_smaps_operations), | 3056 | REG("smaps", S_IRUGO, proc_pid_smaps_operations), |
| 3007 | REG("pagemap", S_IRUGO, proc_pagemap_operations), | 3057 | REG("pagemap", S_IRUGO, proc_pagemap_operations), |
| 3008 | #endif | 3058 | #endif |
| 3009 | #ifdef CONFIG_SECURITY | 3059 | #ifdef CONFIG_SECURITY |
| @@ -3046,6 +3096,10 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
| 3046 | #ifdef CONFIG_HARDWALL | 3096 | #ifdef CONFIG_HARDWALL |
| 3047 | INF("hardwall", S_IRUGO, proc_pid_hardwall), | 3097 | INF("hardwall", S_IRUGO, proc_pid_hardwall), |
| 3048 | #endif | 3098 | #endif |
| 3099 | #ifdef CONFIG_USER_NS | ||
| 3100 | REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), | ||
| 3101 | REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), | ||
| 3102 | #endif | ||
| 3049 | }; | 3103 | }; |
| 3050 | 3104 | ||
| 3051 | static int proc_tgid_base_readdir(struct file * filp, | 3105 | static int proc_tgid_base_readdir(struct file * filp, |
| @@ -3349,9 +3403,9 @@ static const struct pid_entry tid_base_stuff[] = { | |||
| 3349 | INF("cmdline", S_IRUGO, proc_pid_cmdline), | 3403 | INF("cmdline", S_IRUGO, proc_pid_cmdline), |
| 3350 | ONE("stat", S_IRUGO, proc_tid_stat), | 3404 | ONE("stat", S_IRUGO, proc_tid_stat), |
| 3351 | ONE("statm", S_IRUGO, proc_pid_statm), | 3405 | ONE("statm", S_IRUGO, proc_pid_statm), |
| 3352 | REG("maps", S_IRUGO, proc_maps_operations), | 3406 | REG("maps", S_IRUGO, proc_tid_maps_operations), |
| 3353 | #ifdef CONFIG_NUMA | 3407 | #ifdef CONFIG_NUMA |
| 3354 | REG("numa_maps", S_IRUGO, proc_numa_maps_operations), | 3408 | REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations), |
| 3355 | #endif | 3409 | #endif |
| 3356 | REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), | 3410 | REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), |
| 3357 | LNK("cwd", proc_cwd_link), | 3411 | LNK("cwd", proc_cwd_link), |
| @@ -3361,7 +3415,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
| 3361 | REG("mountinfo", S_IRUGO, proc_mountinfo_operations), | 3415 | REG("mountinfo", S_IRUGO, proc_mountinfo_operations), |
| 3362 | #ifdef CONFIG_PROC_PAGE_MONITOR | 3416 | #ifdef CONFIG_PROC_PAGE_MONITOR |
| 3363 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), | 3417 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), |
| 3364 | REG("smaps", S_IRUGO, proc_smaps_operations), | 3418 | REG("smaps", S_IRUGO, proc_tid_smaps_operations), |
| 3365 | REG("pagemap", S_IRUGO, proc_pagemap_operations), | 3419 | REG("pagemap", S_IRUGO, proc_pagemap_operations), |
| 3366 | #endif | 3420 | #endif |
| 3367 | #ifdef CONFIG_SECURITY | 3421 | #ifdef CONFIG_SECURITY |
| @@ -3401,6 +3455,10 @@ static const struct pid_entry tid_base_stuff[] = { | |||
| 3401 | #ifdef CONFIG_HARDWALL | 3455 | #ifdef CONFIG_HARDWALL |
| 3402 | INF("hardwall", S_IRUGO, proc_pid_hardwall), | 3456 | INF("hardwall", S_IRUGO, proc_pid_hardwall), |
| 3403 | #endif | 3457 | #endif |
| 3458 | #ifdef CONFIG_USER_NS | ||
| 3459 | REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), | ||
| 3460 | REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), | ||
| 3461 | #endif | ||
| 3404 | }; | 3462 | }; |
| 3405 | 3463 | ||
| 3406 | static int proc_tid_base_readdir(struct file * filp, | 3464 | static int proc_tid_base_readdir(struct file * filp, |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 84fd3235a590..7ac817b64a71 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
| @@ -22,7 +22,6 @@ | |||
| 22 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
| 23 | #include <linux/mount.h> | 23 | #include <linux/mount.h> |
| 24 | 24 | ||
| 25 | #include <asm/system.h> | ||
| 26 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> |
| 27 | 26 | ||
| 28 | #include "internal.h" | 27 | #include "internal.h" |
| @@ -34,7 +33,7 @@ static void proc_evict_inode(struct inode *inode) | |||
| 34 | const struct proc_ns_operations *ns_ops; | 33 | const struct proc_ns_operations *ns_ops; |
| 35 | 34 | ||
| 36 | truncate_inode_pages(&inode->i_data, 0); | 35 | truncate_inode_pages(&inode->i_data, 0); |
| 37 | end_writeback(inode); | 36 | clear_inode(inode); |
| 38 | 37 | ||
| 39 | /* Stop tracking associated processes */ | 38 | /* Stop tracking associated processes */ |
| 40 | put_pid(PROC_I(inode)->pid); | 39 | put_pid(PROC_I(inode)->pid); |
| @@ -109,8 +108,8 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root) | |||
| 109 | struct super_block *sb = root->d_sb; | 108 | struct super_block *sb = root->d_sb; |
| 110 | struct pid_namespace *pid = sb->s_fs_info; | 109 | struct pid_namespace *pid = sb->s_fs_info; |
| 111 | 110 | ||
| 112 | if (pid->pid_gid) | 111 | if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID)) |
| 113 | seq_printf(seq, ",gid=%lu", (unsigned long)pid->pid_gid); | 112 | seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid)); |
| 114 | if (pid->hide_pid != 0) | 113 | if (pid->hide_pid != 0) |
| 115 | seq_printf(seq, ",hidepid=%u", pid->hide_pid); | 114 | seq_printf(seq, ",hidepid=%u", pid->hide_pid); |
| 116 | 115 | ||
| @@ -486,8 +485,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) | |||
| 486 | 485 | ||
| 487 | int proc_fill_super(struct super_block *s) | 486 | int proc_fill_super(struct super_block *s) |
| 488 | { | 487 | { |
| 489 | struct inode * root_inode; | ||
| 490 | |||
| 491 | s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; | 488 | s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; |
| 492 | s->s_blocksize = 1024; | 489 | s->s_blocksize = 1024; |
| 493 | s->s_blocksize_bits = 10; | 490 | s->s_blocksize_bits = 10; |
| @@ -496,19 +493,11 @@ int proc_fill_super(struct super_block *s) | |||
| 496 | s->s_time_gran = 1; | 493 | s->s_time_gran = 1; |
| 497 | 494 | ||
| 498 | pde_get(&proc_root); | 495 | pde_get(&proc_root); |
| 499 | root_inode = proc_get_inode(s, &proc_root); | 496 | s->s_root = d_make_root(proc_get_inode(s, &proc_root)); |
| 500 | if (!root_inode) | 497 | if (s->s_root) |
| 501 | goto out_no_root; | 498 | return 0; |
| 502 | root_inode->i_uid = 0; | ||
| 503 | root_inode->i_gid = 0; | ||
| 504 | s->s_root = d_alloc_root(root_inode); | ||
| 505 | if (!s->s_root) | ||
| 506 | goto out_no_root; | ||
| 507 | return 0; | ||
| 508 | 499 | ||
| 509 | out_no_root: | ||
| 510 | printk("proc_read_super: get root inode failed\n"); | 500 | printk("proc_read_super: get root inode failed\n"); |
| 511 | iput(root_inode); | ||
| 512 | pde_put(&proc_root); | 501 | pde_put(&proc_root); |
| 513 | return -ENOMEM; | 502 | return -ENOMEM; |
| 514 | } | 503 | } |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 292577531ad1..5f79bb8b4c60 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
| @@ -10,12 +10,15 @@ | |||
| 10 | */ | 10 | */ |
| 11 | 11 | ||
| 12 | #include <linux/proc_fs.h> | 12 | #include <linux/proc_fs.h> |
| 13 | struct ctl_table_header; | ||
| 13 | 14 | ||
| 14 | extern struct proc_dir_entry proc_root; | 15 | extern struct proc_dir_entry proc_root; |
| 15 | #ifdef CONFIG_PROC_SYSCTL | 16 | #ifdef CONFIG_PROC_SYSCTL |
| 16 | extern int proc_sys_init(void); | 17 | extern int proc_sys_init(void); |
| 18 | extern void sysctl_head_put(struct ctl_table_header *head); | ||
| 17 | #else | 19 | #else |
| 18 | static inline void proc_sys_init(void) { } | 20 | static inline void proc_sys_init(void) { } |
| 21 | static inline void sysctl_head_put(struct ctl_table_header *head) { } | ||
| 19 | #endif | 22 | #endif |
| 20 | #ifdef CONFIG_NET | 23 | #ifdef CONFIG_NET |
| 21 | extern int proc_net_init(void); | 24 | extern int proc_net_init(void); |
| @@ -53,9 +56,12 @@ extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, | |||
| 53 | struct pid *pid, struct task_struct *task); | 56 | struct pid *pid, struct task_struct *task); |
| 54 | extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); | 57 | extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); |
| 55 | 58 | ||
| 56 | extern const struct file_operations proc_maps_operations; | 59 | extern const struct file_operations proc_pid_maps_operations; |
| 57 | extern const struct file_operations proc_numa_maps_operations; | 60 | extern const struct file_operations proc_tid_maps_operations; |
| 58 | extern const struct file_operations proc_smaps_operations; | 61 | extern const struct file_operations proc_pid_numa_maps_operations; |
| 62 | extern const struct file_operations proc_tid_numa_maps_operations; | ||
| 63 | extern const struct file_operations proc_pid_smaps_operations; | ||
| 64 | extern const struct file_operations proc_tid_smaps_operations; | ||
| 59 | extern const struct file_operations proc_clear_refs_operations; | 65 | extern const struct file_operations proc_clear_refs_operations; |
| 60 | extern const struct file_operations proc_pagemap_operations; | 66 | extern const struct file_operations proc_pagemap_operations; |
| 61 | extern const struct file_operations proc_net_operations; | 67 | extern const struct file_operations proc_net_operations; |
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index d245cb23dd72..86c67eee439f 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
| @@ -157,7 +157,8 @@ static int kcore_update_ram(void) | |||
| 157 | 157 | ||
| 158 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | 158 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
| 159 | /* calculate vmemmap's address from given system ram pfn and register it */ | 159 | /* calculate vmemmap's address from given system ram pfn and register it */ |
| 160 | int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | 160 | static int |
| 161 | get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | ||
| 161 | { | 162 | { |
| 162 | unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT; | 163 | unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT; |
| 163 | unsigned long nr_pages = ent->size >> PAGE_SHIFT; | 164 | unsigned long nr_pages = ent->size >> PAGE_SHIFT; |
| @@ -189,7 +190,8 @@ int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | |||
| 189 | 190 | ||
| 190 | } | 191 | } |
| 191 | #else | 192 | #else |
| 192 | int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | 193 | static int |
| 194 | get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head) | ||
| 193 | { | 195 | { |
| 194 | return 1; | 196 | return 1; |
| 195 | } | 197 | } |
| @@ -513,7 +515,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) | |||
| 513 | 515 | ||
| 514 | n = copy_to_user(buffer, (char *)start, tsz); | 516 | n = copy_to_user(buffer, (char *)start, tsz); |
| 515 | /* | 517 | /* |
| 516 | * We cannot distingush between fault on source | 518 | * We cannot distinguish between fault on source |
| 517 | * and fault on destination. When this happens | 519 | * and fault on destination. When this happens |
| 518 | * we clear too and hope it will trigger the | 520 | * we clear too and hope it will trigger the |
| 519 | * EFAULT again. | 521 | * EFAULT again. |
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 27da860115c6..0d9e23a39e49 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c | |||
| @@ -53,7 +53,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, | |||
| 53 | ei->ns_ops = ns_ops; | 53 | ei->ns_ops = ns_ops; |
| 54 | ei->ns = ns; | 54 | ei->ns = ns; |
| 55 | 55 | ||
| 56 | dentry->d_op = &pid_dentry_operations; | 56 | d_set_d_op(dentry, &pid_dentry_operations); |
| 57 | d_add(dentry, inode); | 57 | d_add(dentry, inode); |
| 58 | /* Close the race of the process dying before we return the dentry */ | 58 | /* Close the race of the process dying before we return the dentry */ |
| 59 | if (pid_revalidate(dentry, NULL)) | 59 | if (pid_revalidate(dentry, NULL)) |
| @@ -156,15 +156,15 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir, | |||
| 156 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 156 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
| 157 | goto out; | 157 | goto out; |
| 158 | 158 | ||
| 159 | last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; | 159 | last = &ns_entries[ARRAY_SIZE(ns_entries)]; |
| 160 | for (entry = ns_entries; entry <= last; entry++) { | 160 | for (entry = ns_entries; entry < last; entry++) { |
| 161 | if (strlen((*entry)->name) != len) | 161 | if (strlen((*entry)->name) != len) |
| 162 | continue; | 162 | continue; |
| 163 | if (!memcmp(dentry->d_name.name, (*entry)->name, len)) | 163 | if (!memcmp(dentry->d_name.name, (*entry)->name, len)) |
| 164 | break; | 164 | break; |
| 165 | } | 165 | } |
| 166 | error = ERR_PTR(-ENOENT); | 166 | error = ERR_PTR(-ENOENT); |
| 167 | if (entry > last) | 167 | if (entry == last) |
| 168 | goto out; | 168 | goto out; |
| 169 | 169 | ||
| 170 | error = proc_ns_instantiate(dir, dentry, task, *entry); | 170 | error = proc_ns_instantiate(dir, dentry, task, *entry); |
diff --git a/fs/proc/page.c b/fs/proc/page.c index 6d8e6a9e93ab..7fcd0d60a968 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c | |||
| @@ -115,6 +115,8 @@ u64 stable_page_flags(struct page *page) | |||
| 115 | u |= 1 << KPF_COMPOUND_TAIL; | 115 | u |= 1 << KPF_COMPOUND_TAIL; |
| 116 | if (PageHuge(page)) | 116 | if (PageHuge(page)) |
| 117 | u |= 1 << KPF_HUGE; | 117 | u |= 1 << KPF_HUGE; |
| 118 | else if (PageTransCompound(page)) | ||
| 119 | u |= 1 << KPF_THP; | ||
| 118 | 120 | ||
| 119 | /* | 121 | /* |
| 120 | * Caveats on high order pages: page->_count will only be set | 122 | * Caveats on high order pages: page->_count will only be set |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index a6b62173d4c3..3476bca8f7af 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
| @@ -6,7 +6,10 @@ | |||
| 6 | #include <linux/poll.h> | 6 | #include <linux/poll.h> |
| 7 | #include <linux/proc_fs.h> | 7 | #include <linux/proc_fs.h> |
| 8 | #include <linux/security.h> | 8 | #include <linux/security.h> |
| 9 | #include <linux/sched.h> | ||
| 9 | #include <linux/namei.h> | 10 | #include <linux/namei.h> |
| 11 | #include <linux/mm.h> | ||
| 12 | #include <linux/module.h> | ||
| 10 | #include "internal.h" | 13 | #include "internal.h" |
| 11 | 14 | ||
| 12 | static const struct dentry_operations proc_sys_dentry_operations; | 15 | static const struct dentry_operations proc_sys_dentry_operations; |
| @@ -24,6 +27,371 @@ void proc_sys_poll_notify(struct ctl_table_poll *poll) | |||
| 24 | wake_up_interruptible(&poll->wait); | 27 | wake_up_interruptible(&poll->wait); |
| 25 | } | 28 | } |
| 26 | 29 | ||
| 30 | static struct ctl_table root_table[] = { | ||
| 31 | { | ||
| 32 | .procname = "", | ||
| 33 | .mode = S_IFDIR|S_IRUGO|S_IXUGO, | ||
| 34 | }, | ||
| 35 | { } | ||
| 36 | }; | ||
| 37 | static struct ctl_table_root sysctl_table_root = { | ||
| 38 | .default_set.dir.header = { | ||
| 39 | {{.count = 1, | ||
| 40 | .nreg = 1, | ||
| 41 | .ctl_table = root_table }}, | ||
| 42 | .ctl_table_arg = root_table, | ||
| 43 | .root = &sysctl_table_root, | ||
| 44 | .set = &sysctl_table_root.default_set, | ||
| 45 | }, | ||
| 46 | }; | ||
| 47 | |||
| 48 | static DEFINE_SPINLOCK(sysctl_lock); | ||
| 49 | |||
| 50 | static void drop_sysctl_table(struct ctl_table_header *header); | ||
| 51 | static int sysctl_follow_link(struct ctl_table_header **phead, | ||
| 52 | struct ctl_table **pentry, struct nsproxy *namespaces); | ||
| 53 | static int insert_links(struct ctl_table_header *head); | ||
| 54 | static void put_links(struct ctl_table_header *header); | ||
| 55 | |||
| 56 | static void sysctl_print_dir(struct ctl_dir *dir) | ||
| 57 | { | ||
| 58 | if (dir->header.parent) | ||
| 59 | sysctl_print_dir(dir->header.parent); | ||
| 60 | printk(KERN_CONT "%s/", dir->header.ctl_table[0].procname); | ||
| 61 | } | ||
| 62 | |||
| 63 | static int namecmp(const char *name1, int len1, const char *name2, int len2) | ||
| 64 | { | ||
| 65 | int minlen; | ||
| 66 | int cmp; | ||
| 67 | |||
| 68 | minlen = len1; | ||
| 69 | if (minlen > len2) | ||
| 70 | minlen = len2; | ||
| 71 | |||
| 72 | cmp = memcmp(name1, name2, minlen); | ||
| 73 | if (cmp == 0) | ||
| 74 | cmp = len1 - len2; | ||
| 75 | return cmp; | ||
| 76 | } | ||
| 77 | |||
| 78 | /* Called under sysctl_lock */ | ||
| 79 | static struct ctl_table *find_entry(struct ctl_table_header **phead, | ||
| 80 | struct ctl_dir *dir, const char *name, int namelen) | ||
| 81 | { | ||
| 82 | struct ctl_table_header *head; | ||
| 83 | struct ctl_table *entry; | ||
| 84 | struct rb_node *node = dir->root.rb_node; | ||
| 85 | |||
| 86 | while (node) | ||
| 87 | { | ||
| 88 | struct ctl_node *ctl_node; | ||
| 89 | const char *procname; | ||
| 90 | int cmp; | ||
| 91 | |||
| 92 | ctl_node = rb_entry(node, struct ctl_node, node); | ||
| 93 | head = ctl_node->header; | ||
| 94 | entry = &head->ctl_table[ctl_node - head->node]; | ||
| 95 | procname = entry->procname; | ||
| 96 | |||
| 97 | cmp = namecmp(name, namelen, procname, strlen(procname)); | ||
| 98 | if (cmp < 0) | ||
| 99 | node = node->rb_left; | ||
| 100 | else if (cmp > 0) | ||
| 101 | node = node->rb_right; | ||
| 102 | else { | ||
| 103 | *phead = head; | ||
| 104 | return entry; | ||
| 105 | } | ||
| 106 | } | ||
| 107 | return NULL; | ||
| 108 | } | ||
| 109 | |||
| 110 | static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry) | ||
| 111 | { | ||
| 112 | struct rb_node *node = &head->node[entry - head->ctl_table].node; | ||
| 113 | struct rb_node **p = &head->parent->root.rb_node; | ||
| 114 | struct rb_node *parent = NULL; | ||
| 115 | const char *name = entry->procname; | ||
| 116 | int namelen = strlen(name); | ||
| 117 | |||
| 118 | while (*p) { | ||
| 119 | struct ctl_table_header *parent_head; | ||
| 120 | struct ctl_table *parent_entry; | ||
| 121 | struct ctl_node *parent_node; | ||
| 122 | const char *parent_name; | ||
| 123 | int cmp; | ||
| 124 | |||
| 125 | parent = *p; | ||
| 126 | parent_node = rb_entry(parent, struct ctl_node, node); | ||
| 127 | parent_head = parent_node->header; | ||
| 128 | parent_entry = &parent_head->ctl_table[parent_node - parent_head->node]; | ||
| 129 | parent_name = parent_entry->procname; | ||
| 130 | |||
| 131 | cmp = namecmp(name, namelen, parent_name, strlen(parent_name)); | ||
| 132 | if (cmp < 0) | ||
| 133 | p = &(*p)->rb_left; | ||
| 134 | else if (cmp > 0) | ||
| 135 | p = &(*p)->rb_right; | ||
| 136 | else { | ||
| 137 | printk(KERN_ERR "sysctl duplicate entry: "); | ||
| 138 | sysctl_print_dir(head->parent); | ||
| 139 | printk(KERN_CONT "/%s\n", entry->procname); | ||
| 140 | return -EEXIST; | ||
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 | rb_link_node(node, parent, p); | ||
| 145 | return 0; | ||
| 146 | } | ||
| 147 | |||
| 148 | static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry) | ||
| 149 | { | ||
| 150 | struct rb_node *node = &head->node[entry - head->ctl_table].node; | ||
| 151 | |||
| 152 | rb_erase(node, &head->parent->root); | ||
| 153 | } | ||
| 154 | |||
| 155 | static void init_header(struct ctl_table_header *head, | ||
| 156 | struct ctl_table_root *root, struct ctl_table_set *set, | ||
| 157 | struct ctl_node *node, struct ctl_table *table) | ||
| 158 | { | ||
| 159 | head->ctl_table = table; | ||
| 160 | head->ctl_table_arg = table; | ||
| 161 | head->used = 0; | ||
| 162 | head->count = 1; | ||
| 163 | head->nreg = 1; | ||
| 164 | head->unregistering = NULL; | ||
| 165 | head->root = root; | ||
| 166 | head->set = set; | ||
| 167 | head->parent = NULL; | ||
| 168 | head->node = node; | ||
| 169 | if (node) { | ||
| 170 | struct ctl_table *entry; | ||
| 171 | for (entry = table; entry->procname; entry++, node++) { | ||
| 172 | rb_init_node(&node->node); | ||
| 173 | node->header = head; | ||
| 174 | } | ||
| 175 | } | ||
| 176 | } | ||
| 177 | |||
| 178 | static void erase_header(struct ctl_table_header *head) | ||
| 179 | { | ||
| 180 | struct ctl_table *entry; | ||
| 181 | for (entry = head->ctl_table; entry->procname; entry++) | ||
| 182 | erase_entry(head, entry); | ||
| 183 | } | ||
| 184 | |||
| 185 | static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) | ||
| 186 | { | ||
| 187 | struct ctl_table *entry; | ||
| 188 | int err; | ||
| 189 | |||
| 190 | dir->header.nreg++; | ||
| 191 | header->parent = dir; | ||
| 192 | err = insert_links(header); | ||
| 193 | if (err) | ||
| 194 | goto fail_links; | ||
| 195 | for (entry = header->ctl_table; entry->procname; entry++) { | ||
| 196 | err = insert_entry(header, entry); | ||
| 197 | if (err) | ||
| 198 | goto fail; | ||
| 199 | } | ||
| 200 | return 0; | ||
| 201 | fail: | ||
| 202 | erase_header(header); | ||
| 203 | put_links(header); | ||
| 204 | fail_links: | ||
| 205 | header->parent = NULL; | ||
| 206 | drop_sysctl_table(&dir->header); | ||
| 207 | return err; | ||
| 208 | } | ||
| 209 | |||
| 210 | /* called under sysctl_lock */ | ||
| 211 | static int use_table(struct ctl_table_header *p) | ||
| 212 | { | ||
| 213 | if (unlikely(p->unregistering)) | ||
| 214 | return 0; | ||
| 215 | p->used++; | ||
| 216 | return 1; | ||
| 217 | } | ||
| 218 | |||
| 219 | /* called under sysctl_lock */ | ||
| 220 | static void unuse_table(struct ctl_table_header *p) | ||
| 221 | { | ||
| 222 | if (!--p->used) | ||
| 223 | if (unlikely(p->unregistering)) | ||
| 224 | complete(p->unregistering); | ||
| 225 | } | ||
| 226 | |||
| 227 | /* called under sysctl_lock, will reacquire if has to wait */ | ||
| 228 | static void start_unregistering(struct ctl_table_header *p) | ||
| 229 | { | ||
| 230 | /* | ||
| 231 | * if p->used is 0, nobody will ever touch that entry again; | ||
| 232 | * we'll eliminate all paths to it before dropping sysctl_lock | ||
| 233 | */ | ||
| 234 | if (unlikely(p->used)) { | ||
| 235 | struct completion wait; | ||
| 236 | init_completion(&wait); | ||
| 237 | p->unregistering = &wait; | ||
| 238 | spin_unlock(&sysctl_lock); | ||
| 239 | wait_for_completion(&wait); | ||
| 240 | spin_lock(&sysctl_lock); | ||
| 241 | } else { | ||
| 242 | /* anything non-NULL; we'll never dereference it */ | ||
| 243 | p->unregistering = ERR_PTR(-EINVAL); | ||
| 244 | } | ||
| 245 | /* | ||
| 246 | * do not remove from the list until nobody holds it; walking the | ||
| 247 | * list in do_sysctl() relies on that. | ||
| 248 | */ | ||
| 249 | erase_header(p); | ||
| 250 | } | ||
| 251 | |||
| 252 | static void sysctl_head_get(struct ctl_table_header *head) | ||
| 253 | { | ||
| 254 | spin_lock(&sysctl_lock); | ||
| 255 | head->count++; | ||
| 256 | spin_unlock(&sysctl_lock); | ||
| 257 | } | ||
| 258 | |||
| 259 | void sysctl_head_put(struct ctl_table_header *head) | ||
| 260 | { | ||
| 261 | spin_lock(&sysctl_lock); | ||
| 262 | if (!--head->count) | ||
| 263 | kfree_rcu(head, rcu); | ||
| 264 | spin_unlock(&sysctl_lock); | ||
| 265 | } | ||
| 266 | |||
| 267 | static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) | ||
| 268 | { | ||
| 269 | if (!head) | ||
| 270 | BUG(); | ||
| 271 | spin_lock(&sysctl_lock); | ||
| 272 | if (!use_table(head)) | ||
| 273 | head = ERR_PTR(-ENOENT); | ||
| 274 | spin_unlock(&sysctl_lock); | ||
| 275 | return head; | ||
| 276 | } | ||
| 277 | |||
| 278 | static void sysctl_head_finish(struct ctl_table_header *head) | ||
| 279 | { | ||
| 280 | if (!head) | ||
| 281 | return; | ||
| 282 | spin_lock(&sysctl_lock); | ||
| 283 | unuse_table(head); | ||
| 284 | spin_unlock(&sysctl_lock); | ||
| 285 | } | ||
| 286 | |||
| 287 | static struct ctl_table_set * | ||
| 288 | lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces) | ||
| 289 | { | ||
| 290 | struct ctl_table_set *set = &root->default_set; | ||
| 291 | if (root->lookup) | ||
| 292 | set = root->lookup(root, namespaces); | ||
| 293 | return set; | ||
| 294 | } | ||
| 295 | |||
| 296 | static struct ctl_table *lookup_entry(struct ctl_table_header **phead, | ||
| 297 | struct ctl_dir *dir, | ||
| 298 | const char *name, int namelen) | ||
| 299 | { | ||
| 300 | struct ctl_table_header *head; | ||
| 301 | struct ctl_table *entry; | ||
| 302 | |||
| 303 | spin_lock(&sysctl_lock); | ||
| 304 | entry = find_entry(&head, dir, name, namelen); | ||
| 305 | if (entry && use_table(head)) | ||
| 306 | *phead = head; | ||
| 307 | else | ||
| 308 | entry = NULL; | ||
| 309 | spin_unlock(&sysctl_lock); | ||
| 310 | return entry; | ||
| 311 | } | ||
| 312 | |||
| 313 | static struct ctl_node *first_usable_entry(struct rb_node *node) | ||
| 314 | { | ||
| 315 | struct ctl_node *ctl_node; | ||
| 316 | |||
| 317 | for (;node; node = rb_next(node)) { | ||
| 318 | ctl_node = rb_entry(node, struct ctl_node, node); | ||
| 319 | if (use_table(ctl_node->header)) | ||
| 320 | return ctl_node; | ||
| 321 | } | ||
| 322 | return NULL; | ||
| 323 | } | ||
| 324 | |||
| 325 | static void first_entry(struct ctl_dir *dir, | ||
| 326 | struct ctl_table_header **phead, struct ctl_table **pentry) | ||
| 327 | { | ||
| 328 | struct ctl_table_header *head = NULL; | ||
| 329 | struct ctl_table *entry = NULL; | ||
| 330 | struct ctl_node *ctl_node; | ||
| 331 | |||
| 332 | spin_lock(&sysctl_lock); | ||
| 333 | ctl_node = first_usable_entry(rb_first(&dir->root)); | ||
| 334 | spin_unlock(&sysctl_lock); | ||
| 335 | if (ctl_node) { | ||
| 336 | head = ctl_node->header; | ||
| 337 | entry = &head->ctl_table[ctl_node - head->node]; | ||
| 338 | } | ||
| 339 | *phead = head; | ||
| 340 | *pentry = entry; | ||
| 341 | } | ||
| 342 | |||
| 343 | static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry) | ||
| 344 | { | ||
| 345 | struct ctl_table_header *head = *phead; | ||
| 346 | struct ctl_table *entry = *pentry; | ||
| 347 | struct ctl_node *ctl_node = &head->node[entry - head->ctl_table]; | ||
| 348 | |||
| 349 | spin_lock(&sysctl_lock); | ||
| 350 | unuse_table(head); | ||
| 351 | |||
| 352 | ctl_node = first_usable_entry(rb_next(&ctl_node->node)); | ||
| 353 | spin_unlock(&sysctl_lock); | ||
| 354 | head = NULL; | ||
| 355 | if (ctl_node) { | ||
| 356 | head = ctl_node->header; | ||
| 357 | entry = &head->ctl_table[ctl_node - head->node]; | ||
| 358 | } | ||
| 359 | *phead = head; | ||
| 360 | *pentry = entry; | ||
| 361 | } | ||
| 362 | |||
| 363 | void register_sysctl_root(struct ctl_table_root *root) | ||
| 364 | { | ||
| 365 | } | ||
| 366 | |||
| 367 | /* | ||
| 368 | * sysctl_perm does NOT grant the superuser all rights automatically, because | ||
| 369 | * some sysctl variables are readonly even to root. | ||
| 370 | */ | ||
| 371 | |||
| 372 | static int test_perm(int mode, int op) | ||
| 373 | { | ||
| 374 | if (uid_eq(current_euid(), GLOBAL_ROOT_UID)) | ||
| 375 | mode >>= 6; | ||
| 376 | else if (in_egroup_p(GLOBAL_ROOT_GID)) | ||
| 377 | mode >>= 3; | ||
| 378 | if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0) | ||
| 379 | return 0; | ||
| 380 | return -EACCES; | ||
| 381 | } | ||
| 382 | |||
| 383 | static int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) | ||
| 384 | { | ||
| 385 | int mode; | ||
| 386 | |||
| 387 | if (root->permissions) | ||
| 388 | mode = root->permissions(root, current->nsproxy, table); | ||
| 389 | else | ||
| 390 | mode = table->mode; | ||
| 391 | |||
| 392 | return test_perm(mode, op); | ||
| 393 | } | ||
| 394 | |||
| 27 | static struct inode *proc_sys_make_inode(struct super_block *sb, | 395 | static struct inode *proc_sys_make_inode(struct super_block *sb, |
| 28 | struct ctl_table_header *head, struct ctl_table *table) | 396 | struct ctl_table_header *head, struct ctl_table *table) |
| 29 | { | 397 | { |
| @@ -43,13 +411,12 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, | |||
| 43 | 411 | ||
| 44 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 412 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
| 45 | inode->i_mode = table->mode; | 413 | inode->i_mode = table->mode; |
| 46 | if (!table->child) { | 414 | if (!S_ISDIR(table->mode)) { |
| 47 | inode->i_mode |= S_IFREG; | 415 | inode->i_mode |= S_IFREG; |
| 48 | inode->i_op = &proc_sys_inode_operations; | 416 | inode->i_op = &proc_sys_inode_operations; |
| 49 | inode->i_fop = &proc_sys_file_operations; | 417 | inode->i_fop = &proc_sys_file_operations; |
| 50 | } else { | 418 | } else { |
| 51 | inode->i_mode |= S_IFDIR; | 419 | inode->i_mode |= S_IFDIR; |
| 52 | clear_nlink(inode); | ||
| 53 | inode->i_op = &proc_sys_dir_operations; | 420 | inode->i_op = &proc_sys_dir_operations; |
| 54 | inode->i_fop = &proc_sys_dir_file_operations; | 421 | inode->i_fop = &proc_sys_dir_file_operations; |
| 55 | } | 422 | } |
| @@ -57,70 +424,42 @@ out: | |||
| 57 | return inode; | 424 | return inode; |
| 58 | } | 425 | } |
| 59 | 426 | ||
| 60 | static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) | ||
| 61 | { | ||
| 62 | int len; | ||
| 63 | for ( ; p->procname; p++) { | ||
| 64 | |||
| 65 | if (!p->procname) | ||
| 66 | continue; | ||
| 67 | |||
| 68 | len = strlen(p->procname); | ||
| 69 | if (len != name->len) | ||
| 70 | continue; | ||
| 71 | |||
| 72 | if (memcmp(p->procname, name->name, len) != 0) | ||
| 73 | continue; | ||
| 74 | |||
| 75 | /* I have a match */ | ||
| 76 | return p; | ||
| 77 | } | ||
| 78 | return NULL; | ||
| 79 | } | ||
| 80 | |||
| 81 | static struct ctl_table_header *grab_header(struct inode *inode) | 427 | static struct ctl_table_header *grab_header(struct inode *inode) |
| 82 | { | 428 | { |
| 83 | if (PROC_I(inode)->sysctl) | 429 | struct ctl_table_header *head = PROC_I(inode)->sysctl; |
| 84 | return sysctl_head_grab(PROC_I(inode)->sysctl); | 430 | if (!head) |
| 85 | else | 431 | head = &sysctl_table_root.default_set.dir.header; |
| 86 | return sysctl_head_next(NULL); | 432 | return sysctl_head_grab(head); |
| 87 | } | 433 | } |
| 88 | 434 | ||
| 89 | static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, | 435 | static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, |
| 90 | struct nameidata *nd) | 436 | struct nameidata *nd) |
| 91 | { | 437 | { |
| 92 | struct ctl_table_header *head = grab_header(dir); | 438 | struct ctl_table_header *head = grab_header(dir); |
| 93 | struct ctl_table *table = PROC_I(dir)->sysctl_entry; | ||
| 94 | struct ctl_table_header *h = NULL; | 439 | struct ctl_table_header *h = NULL; |
| 95 | struct qstr *name = &dentry->d_name; | 440 | struct qstr *name = &dentry->d_name; |
| 96 | struct ctl_table *p; | 441 | struct ctl_table *p; |
| 97 | struct inode *inode; | 442 | struct inode *inode; |
| 98 | struct dentry *err = ERR_PTR(-ENOENT); | 443 | struct dentry *err = ERR_PTR(-ENOENT); |
| 444 | struct ctl_dir *ctl_dir; | ||
| 445 | int ret; | ||
| 99 | 446 | ||
| 100 | if (IS_ERR(head)) | 447 | if (IS_ERR(head)) |
| 101 | return ERR_CAST(head); | 448 | return ERR_CAST(head); |
| 102 | 449 | ||
| 103 | if (table && !table->child) { | 450 | ctl_dir = container_of(head, struct ctl_dir, header); |
| 104 | WARN_ON(1); | ||
| 105 | goto out; | ||
| 106 | } | ||
| 107 | |||
| 108 | table = table ? table->child : head->ctl_table; | ||
| 109 | |||
| 110 | p = find_in_table(table, name); | ||
| 111 | if (!p) { | ||
| 112 | for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { | ||
| 113 | if (h->attached_to != table) | ||
| 114 | continue; | ||
| 115 | p = find_in_table(h->attached_by, name); | ||
| 116 | if (p) | ||
| 117 | break; | ||
| 118 | } | ||
| 119 | } | ||
| 120 | 451 | ||
| 452 | p = lookup_entry(&h, ctl_dir, name->name, name->len); | ||
| 121 | if (!p) | 453 | if (!p) |
| 122 | goto out; | 454 | goto out; |
| 123 | 455 | ||
| 456 | if (S_ISLNK(p->mode)) { | ||
| 457 | ret = sysctl_follow_link(&h, &p, current->nsproxy); | ||
| 458 | err = ERR_PTR(ret); | ||
| 459 | if (ret) | ||
| 460 | goto out; | ||
| 461 | } | ||
| 462 | |||
| 124 | err = ERR_PTR(-ENOMEM); | 463 | err = ERR_PTR(-ENOMEM); |
| 125 | inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); | 464 | inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); |
| 126 | if (h) | 465 | if (h) |
| @@ -188,20 +527,32 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf, | |||
| 188 | 527 | ||
| 189 | static int proc_sys_open(struct inode *inode, struct file *filp) | 528 | static int proc_sys_open(struct inode *inode, struct file *filp) |
| 190 | { | 529 | { |
| 530 | struct ctl_table_header *head = grab_header(inode); | ||
| 191 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; | 531 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; |
| 192 | 532 | ||
| 533 | /* sysctl was unregistered */ | ||
| 534 | if (IS_ERR(head)) | ||
| 535 | return PTR_ERR(head); | ||
| 536 | |||
| 193 | if (table->poll) | 537 | if (table->poll) |
| 194 | filp->private_data = proc_sys_poll_event(table->poll); | 538 | filp->private_data = proc_sys_poll_event(table->poll); |
| 195 | 539 | ||
| 540 | sysctl_head_finish(head); | ||
| 541 | |||
| 196 | return 0; | 542 | return 0; |
| 197 | } | 543 | } |
| 198 | 544 | ||
| 199 | static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) | 545 | static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) |
| 200 | { | 546 | { |
| 201 | struct inode *inode = filp->f_path.dentry->d_inode; | 547 | struct inode *inode = filp->f_path.dentry->d_inode; |
| 548 | struct ctl_table_header *head = grab_header(inode); | ||
| 202 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; | 549 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; |
| 203 | unsigned long event = (unsigned long)filp->private_data; | ||
| 204 | unsigned int ret = DEFAULT_POLLMASK; | 550 | unsigned int ret = DEFAULT_POLLMASK; |
| 551 | unsigned long event; | ||
| 552 | |||
| 553 | /* sysctl was unregistered */ | ||
| 554 | if (IS_ERR(head)) | ||
| 555 | return POLLERR | POLLHUP; | ||
| 205 | 556 | ||
| 206 | if (!table->proc_handler) | 557 | if (!table->proc_handler) |
| 207 | goto out; | 558 | goto out; |
| @@ -209,6 +560,7 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) | |||
| 209 | if (!table->poll) | 560 | if (!table->poll) |
| 210 | goto out; | 561 | goto out; |
| 211 | 562 | ||
| 563 | event = (unsigned long)filp->private_data; | ||
| 212 | poll_wait(filp, &table->poll->wait, wait); | 564 | poll_wait(filp, &table->poll->wait, wait); |
| 213 | 565 | ||
| 214 | if (event != atomic_read(&table->poll->event)) { | 566 | if (event != atomic_read(&table->poll->event)) { |
| @@ -217,6 +569,8 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) | |||
| 217 | } | 569 | } |
| 218 | 570 | ||
| 219 | out: | 571 | out: |
| 572 | sysctl_head_finish(head); | ||
| 573 | |||
| 220 | return ret; | 574 | return ret; |
| 221 | } | 575 | } |
| 222 | 576 | ||
| @@ -258,28 +612,45 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent, | |||
| 258 | return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); | 612 | return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); |
| 259 | } | 613 | } |
| 260 | 614 | ||
| 615 | static int proc_sys_link_fill_cache(struct file *filp, void *dirent, | ||
| 616 | filldir_t filldir, | ||
| 617 | struct ctl_table_header *head, | ||
| 618 | struct ctl_table *table) | ||
| 619 | { | ||
| 620 | int err, ret = 0; | ||
| 621 | head = sysctl_head_grab(head); | ||
| 622 | |||
| 623 | if (S_ISLNK(table->mode)) { | ||
| 624 | /* It is not an error if we can not follow the link ignore it */ | ||
| 625 | err = sysctl_follow_link(&head, &table, current->nsproxy); | ||
| 626 | if (err) | ||
| 627 | goto out; | ||
| 628 | } | ||
| 629 | |||
| 630 | ret = proc_sys_fill_cache(filp, dirent, filldir, head, table); | ||
| 631 | out: | ||
| 632 | sysctl_head_finish(head); | ||
| 633 | return ret; | ||
| 634 | } | ||
| 635 | |||
| 261 | static int scan(struct ctl_table_header *head, ctl_table *table, | 636 | static int scan(struct ctl_table_header *head, ctl_table *table, |
| 262 | unsigned long *pos, struct file *file, | 637 | unsigned long *pos, struct file *file, |
| 263 | void *dirent, filldir_t filldir) | 638 | void *dirent, filldir_t filldir) |
| 264 | { | 639 | { |
| 640 | int res; | ||
| 265 | 641 | ||
| 266 | for (; table->procname; table++, (*pos)++) { | 642 | if ((*pos)++ < file->f_pos) |
| 267 | int res; | 643 | return 0; |
| 268 | |||
| 269 | /* Can't do anything without a proc name */ | ||
| 270 | if (!table->procname) | ||
| 271 | continue; | ||
| 272 | |||
| 273 | if (*pos < file->f_pos) | ||
| 274 | continue; | ||
| 275 | 644 | ||
| 645 | if (unlikely(S_ISLNK(table->mode))) | ||
| 646 | res = proc_sys_link_fill_cache(file, dirent, filldir, head, table); | ||
| 647 | else | ||
| 276 | res = proc_sys_fill_cache(file, dirent, filldir, head, table); | 648 | res = proc_sys_fill_cache(file, dirent, filldir, head, table); |
| 277 | if (res) | ||
| 278 | return res; | ||
| 279 | 649 | ||
| 280 | file->f_pos = *pos + 1; | 650 | if (res == 0) |
| 281 | } | 651 | file->f_pos = *pos; |
| 282 | return 0; | 652 | |
| 653 | return res; | ||
| 283 | } | 654 | } |
| 284 | 655 | ||
| 285 | static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | 656 | static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) |
| @@ -287,20 +658,16 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 287 | struct dentry *dentry = filp->f_path.dentry; | 658 | struct dentry *dentry = filp->f_path.dentry; |
| 288 | struct inode *inode = dentry->d_inode; | 659 | struct inode *inode = dentry->d_inode; |
| 289 | struct ctl_table_header *head = grab_header(inode); | 660 | struct ctl_table_header *head = grab_header(inode); |
| 290 | struct ctl_table *table = PROC_I(inode)->sysctl_entry; | ||
| 291 | struct ctl_table_header *h = NULL; | 661 | struct ctl_table_header *h = NULL; |
| 662 | struct ctl_table *entry; | ||
| 663 | struct ctl_dir *ctl_dir; | ||
| 292 | unsigned long pos; | 664 | unsigned long pos; |
| 293 | int ret = -EINVAL; | 665 | int ret = -EINVAL; |
| 294 | 666 | ||
| 295 | if (IS_ERR(head)) | 667 | if (IS_ERR(head)) |
| 296 | return PTR_ERR(head); | 668 | return PTR_ERR(head); |
| 297 | 669 | ||
| 298 | if (table && !table->child) { | 670 | ctl_dir = container_of(head, struct ctl_dir, header); |
| 299 | WARN_ON(1); | ||
| 300 | goto out; | ||
| 301 | } | ||
| 302 | |||
| 303 | table = table ? table->child : head->ctl_table; | ||
| 304 | 671 | ||
| 305 | ret = 0; | 672 | ret = 0; |
| 306 | /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ | 673 | /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ |
| @@ -318,14 +685,8 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 318 | } | 685 | } |
| 319 | pos = 2; | 686 | pos = 2; |
| 320 | 687 | ||
| 321 | ret = scan(head, table, &pos, filp, dirent, filldir); | 688 | for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { |
| 322 | if (ret) | 689 | ret = scan(h, entry, &pos, filp, dirent, filldir); |
| 323 | goto out; | ||
| 324 | |||
| 325 | for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { | ||
| 326 | if (h->attached_to != table) | ||
| 327 | continue; | ||
| 328 | ret = scan(h, h->attached_by, &pos, filp, dirent, filldir); | ||
| 329 | if (ret) { | 690 | if (ret) { |
| 330 | sysctl_head_finish(h); | 691 | sysctl_head_finish(h); |
| 331 | break; | 692 | break; |
| @@ -445,6 +806,21 @@ static int proc_sys_delete(const struct dentry *dentry) | |||
| 445 | return !!PROC_I(dentry->d_inode)->sysctl->unregistering; | 806 | return !!PROC_I(dentry->d_inode)->sysctl->unregistering; |
| 446 | } | 807 | } |
| 447 | 808 | ||
| 809 | static int sysctl_is_seen(struct ctl_table_header *p) | ||
| 810 | { | ||
| 811 | struct ctl_table_set *set = p->set; | ||
| 812 | int res; | ||
| 813 | spin_lock(&sysctl_lock); | ||
| 814 | if (p->unregistering) | ||
| 815 | res = 0; | ||
| 816 | else if (!set->is_seen) | ||
| 817 | res = 1; | ||
| 818 | else | ||
| 819 | res = set->is_seen(set); | ||
| 820 | spin_unlock(&sysctl_lock); | ||
| 821 | return res; | ||
| 822 | } | ||
| 823 | |||
| 448 | static int proc_sys_compare(const struct dentry *parent, | 824 | static int proc_sys_compare(const struct dentry *parent, |
| 449 | const struct inode *pinode, | 825 | const struct inode *pinode, |
| 450 | const struct dentry *dentry, const struct inode *inode, | 826 | const struct dentry *dentry, const struct inode *inode, |
| @@ -470,6 +846,753 @@ static const struct dentry_operations proc_sys_dentry_operations = { | |||
| 470 | .d_compare = proc_sys_compare, | 846 | .d_compare = proc_sys_compare, |
| 471 | }; | 847 | }; |
| 472 | 848 | ||
| 849 | static struct ctl_dir *find_subdir(struct ctl_dir *dir, | ||
| 850 | const char *name, int namelen) | ||
| 851 | { | ||
| 852 | struct ctl_table_header *head; | ||
| 853 | struct ctl_table *entry; | ||
| 854 | |||
| 855 | entry = find_entry(&head, dir, name, namelen); | ||
| 856 | if (!entry) | ||
| 857 | return ERR_PTR(-ENOENT); | ||
| 858 | if (!S_ISDIR(entry->mode)) | ||
| 859 | return ERR_PTR(-ENOTDIR); | ||
| 860 | return container_of(head, struct ctl_dir, header); | ||
| 861 | } | ||
| 862 | |||
| 863 | static struct ctl_dir *new_dir(struct ctl_table_set *set, | ||
| 864 | const char *name, int namelen) | ||
| 865 | { | ||
| 866 | struct ctl_table *table; | ||
| 867 | struct ctl_dir *new; | ||
| 868 | struct ctl_node *node; | ||
| 869 | char *new_name; | ||
| 870 | |||
| 871 | new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) + | ||
| 872 | sizeof(struct ctl_table)*2 + namelen + 1, | ||
| 873 | GFP_KERNEL); | ||
| 874 | if (!new) | ||
| 875 | return NULL; | ||
| 876 | |||
| 877 | node = (struct ctl_node *)(new + 1); | ||
| 878 | table = (struct ctl_table *)(node + 1); | ||
| 879 | new_name = (char *)(table + 2); | ||
| 880 | memcpy(new_name, name, namelen); | ||
| 881 | new_name[namelen] = '\0'; | ||
| 882 | table[0].procname = new_name; | ||
| 883 | table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO; | ||
| 884 | init_header(&new->header, set->dir.header.root, set, node, table); | ||
| 885 | |||
| 886 | return new; | ||
| 887 | } | ||
| 888 | |||
| 889 | /** | ||
| 890 | * get_subdir - find or create a subdir with the specified name. | ||
| 891 | * @dir: Directory to create the subdirectory in | ||
| 892 | * @name: The name of the subdirectory to find or create | ||
| 893 | * @namelen: The length of name | ||
| 894 | * | ||
| 895 | * Takes a directory with an elevated reference count so we know that | ||
| 896 | * if we drop the lock the directory will not go away. Upon success | ||
| 897 | * the reference is moved from @dir to the returned subdirectory. | ||
| 898 | * Upon error an error code is returned and the reference on @dir is | ||
| 899 | * simply dropped. | ||
| 900 | */ | ||
| 901 | static struct ctl_dir *get_subdir(struct ctl_dir *dir, | ||
| 902 | const char *name, int namelen) | ||
| 903 | { | ||
| 904 | struct ctl_table_set *set = dir->header.set; | ||
| 905 | struct ctl_dir *subdir, *new = NULL; | ||
| 906 | int err; | ||
| 907 | |||
| 908 | spin_lock(&sysctl_lock); | ||
| 909 | subdir = find_subdir(dir, name, namelen); | ||
| 910 | if (!IS_ERR(subdir)) | ||
| 911 | goto found; | ||
| 912 | if (PTR_ERR(subdir) != -ENOENT) | ||
| 913 | goto failed; | ||
| 914 | |||
| 915 | spin_unlock(&sysctl_lock); | ||
| 916 | new = new_dir(set, name, namelen); | ||
| 917 | spin_lock(&sysctl_lock); | ||
| 918 | subdir = ERR_PTR(-ENOMEM); | ||
| 919 | if (!new) | ||
| 920 | goto failed; | ||
| 921 | |||
| 922 | /* Was the subdir added while we dropped the lock? */ | ||
| 923 | subdir = find_subdir(dir, name, namelen); | ||
| 924 | if (!IS_ERR(subdir)) | ||
| 925 | goto found; | ||
| 926 | if (PTR_ERR(subdir) != -ENOENT) | ||
| 927 | goto failed; | ||
| 928 | |||
| 929 | /* Nope. Use the our freshly made directory entry. */ | ||
| 930 | err = insert_header(dir, &new->header); | ||
| 931 | subdir = ERR_PTR(err); | ||
| 932 | if (err) | ||
| 933 | goto failed; | ||
| 934 | subdir = new; | ||
| 935 | found: | ||
| 936 | subdir->header.nreg++; | ||
| 937 | failed: | ||
| 938 | if (unlikely(IS_ERR(subdir))) { | ||
| 939 | printk(KERN_ERR "sysctl could not get directory: "); | ||
| 940 | sysctl_print_dir(dir); | ||
| 941 | printk(KERN_CONT "/%*.*s %ld\n", | ||
| 942 | namelen, namelen, name, PTR_ERR(subdir)); | ||
| 943 | } | ||
| 944 | drop_sysctl_table(&dir->header); | ||
| 945 | if (new) | ||
| 946 | drop_sysctl_table(&new->header); | ||
| 947 | spin_unlock(&sysctl_lock); | ||
| 948 | return subdir; | ||
| 949 | } | ||
| 950 | |||
| 951 | static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir) | ||
| 952 | { | ||
| 953 | struct ctl_dir *parent; | ||
| 954 | const char *procname; | ||
| 955 | if (!dir->header.parent) | ||
| 956 | return &set->dir; | ||
| 957 | parent = xlate_dir(set, dir->header.parent); | ||
| 958 | if (IS_ERR(parent)) | ||
| 959 | return parent; | ||
| 960 | procname = dir->header.ctl_table[0].procname; | ||
| 961 | return find_subdir(parent, procname, strlen(procname)); | ||
| 962 | } | ||
| 963 | |||
| 964 | static int sysctl_follow_link(struct ctl_table_header **phead, | ||
| 965 | struct ctl_table **pentry, struct nsproxy *namespaces) | ||
| 966 | { | ||
| 967 | struct ctl_table_header *head; | ||
| 968 | struct ctl_table_root *root; | ||
| 969 | struct ctl_table_set *set; | ||
| 970 | struct ctl_table *entry; | ||
| 971 | struct ctl_dir *dir; | ||
| 972 | int ret; | ||
| 973 | |||
| 974 | ret = 0; | ||
| 975 | spin_lock(&sysctl_lock); | ||
| 976 | root = (*pentry)->data; | ||
| 977 | set = lookup_header_set(root, namespaces); | ||
| 978 | dir = xlate_dir(set, (*phead)->parent); | ||
| 979 | if (IS_ERR(dir)) | ||
| 980 | ret = PTR_ERR(dir); | ||
| 981 | else { | ||
| 982 | const char *procname = (*pentry)->procname; | ||
| 983 | head = NULL; | ||
| 984 | entry = find_entry(&head, dir, procname, strlen(procname)); | ||
| 985 | ret = -ENOENT; | ||
| 986 | if (entry && use_table(head)) { | ||
| 987 | unuse_table(*phead); | ||
| 988 | *phead = head; | ||
| 989 | *pentry = entry; | ||
| 990 | ret = 0; | ||
| 991 | } | ||
| 992 | } | ||
| 993 | |||
| 994 | spin_unlock(&sysctl_lock); | ||
| 995 | return ret; | ||
| 996 | } | ||
| 997 | |||
| 998 | static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) | ||
| 999 | { | ||
| 1000 | struct va_format vaf; | ||
| 1001 | va_list args; | ||
| 1002 | |||
| 1003 | va_start(args, fmt); | ||
| 1004 | vaf.fmt = fmt; | ||
| 1005 | vaf.va = &args; | ||
| 1006 | |||
| 1007 | printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n", | ||
| 1008 | path, table->procname, &vaf); | ||
| 1009 | |||
| 1010 | va_end(args); | ||
| 1011 | return -EINVAL; | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | static int sysctl_check_table(const char *path, struct ctl_table *table) | ||
| 1015 | { | ||
| 1016 | int err = 0; | ||
| 1017 | for (; table->procname; table++) { | ||
| 1018 | if (table->child) | ||
| 1019 | err = sysctl_err(path, table, "Not a file"); | ||
| 1020 | |||
| 1021 | if ((table->proc_handler == proc_dostring) || | ||
| 1022 | (table->proc_handler == proc_dointvec) || | ||
| 1023 | (table->proc_handler == proc_dointvec_minmax) || | ||
| 1024 | (table->proc_handler == proc_dointvec_jiffies) || | ||
| 1025 | (table->proc_handler == proc_dointvec_userhz_jiffies) || | ||
| 1026 | (table->proc_handler == proc_dointvec_ms_jiffies) || | ||
| 1027 | (table->proc_handler == proc_doulongvec_minmax) || | ||
| 1028 | (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { | ||
| 1029 | if (!table->data) | ||
| 1030 | err = sysctl_err(path, table, "No data"); | ||
| 1031 | if (!table->maxlen) | ||
| 1032 | err = sysctl_err(path, table, "No maxlen"); | ||
| 1033 | } | ||
| 1034 | if (!table->proc_handler) | ||
| 1035 | err = sysctl_err(path, table, "No proc_handler"); | ||
| 1036 | |||
| 1037 | if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode) | ||
| 1038 | err = sysctl_err(path, table, "bogus .mode 0%o", | ||
| 1039 | table->mode); | ||
| 1040 | } | ||
| 1041 | return err; | ||
| 1042 | } | ||
| 1043 | |||
| 1044 | static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table, | ||
| 1045 | struct ctl_table_root *link_root) | ||
| 1046 | { | ||
| 1047 | struct ctl_table *link_table, *entry, *link; | ||
| 1048 | struct ctl_table_header *links; | ||
| 1049 | struct ctl_node *node; | ||
| 1050 | char *link_name; | ||
| 1051 | int nr_entries, name_bytes; | ||
| 1052 | |||
| 1053 | name_bytes = 0; | ||
| 1054 | nr_entries = 0; | ||
| 1055 | for (entry = table; entry->procname; entry++) { | ||
| 1056 | nr_entries++; | ||
| 1057 | name_bytes += strlen(entry->procname) + 1; | ||
| 1058 | } | ||
| 1059 | |||
| 1060 | links = kzalloc(sizeof(struct ctl_table_header) + | ||
| 1061 | sizeof(struct ctl_node)*nr_entries + | ||
| 1062 | sizeof(struct ctl_table)*(nr_entries + 1) + | ||
| 1063 | name_bytes, | ||
| 1064 | GFP_KERNEL); | ||
| 1065 | |||
| 1066 | if (!links) | ||
| 1067 | return NULL; | ||
| 1068 | |||
| 1069 | node = (struct ctl_node *)(links + 1); | ||
| 1070 | link_table = (struct ctl_table *)(node + nr_entries); | ||
| 1071 | link_name = (char *)&link_table[nr_entries + 1]; | ||
| 1072 | |||
| 1073 | for (link = link_table, entry = table; entry->procname; link++, entry++) { | ||
| 1074 | int len = strlen(entry->procname) + 1; | ||
| 1075 | memcpy(link_name, entry->procname, len); | ||
| 1076 | link->procname = link_name; | ||
| 1077 | link->mode = S_IFLNK|S_IRWXUGO; | ||
| 1078 | link->data = link_root; | ||
| 1079 | link_name += len; | ||
| 1080 | } | ||
| 1081 | init_header(links, dir->header.root, dir->header.set, node, link_table); | ||
| 1082 | links->nreg = nr_entries; | ||
| 1083 | |||
| 1084 | return links; | ||
| 1085 | } | ||
| 1086 | |||
| 1087 | static bool get_links(struct ctl_dir *dir, | ||
| 1088 | struct ctl_table *table, struct ctl_table_root *link_root) | ||
| 1089 | { | ||
| 1090 | struct ctl_table_header *head; | ||
| 1091 | struct ctl_table *entry, *link; | ||
| 1092 | |||
| 1093 | /* Are there links available for every entry in table? */ | ||
| 1094 | for (entry = table; entry->procname; entry++) { | ||
| 1095 | const char *procname = entry->procname; | ||
| 1096 | link = find_entry(&head, dir, procname, strlen(procname)); | ||
| 1097 | if (!link) | ||
| 1098 | return false; | ||
| 1099 | if (S_ISDIR(link->mode) && S_ISDIR(entry->mode)) | ||
| 1100 | continue; | ||
| 1101 | if (S_ISLNK(link->mode) && (link->data == link_root)) | ||
| 1102 | continue; | ||
| 1103 | return false; | ||
| 1104 | } | ||
| 1105 | |||
| 1106 | /* The checks passed. Increase the registration count on the links */ | ||
| 1107 | for (entry = table; entry->procname; entry++) { | ||
| 1108 | const char *procname = entry->procname; | ||
| 1109 | link = find_entry(&head, dir, procname, strlen(procname)); | ||
| 1110 | head->nreg++; | ||
| 1111 | } | ||
| 1112 | return true; | ||
| 1113 | } | ||
| 1114 | |||
| 1115 | static int insert_links(struct ctl_table_header *head) | ||
| 1116 | { | ||
| 1117 | struct ctl_table_set *root_set = &sysctl_table_root.default_set; | ||
| 1118 | struct ctl_dir *core_parent = NULL; | ||
| 1119 | struct ctl_table_header *links; | ||
| 1120 | int err; | ||
| 1121 | |||
| 1122 | if (head->set == root_set) | ||
| 1123 | return 0; | ||
| 1124 | |||
| 1125 | core_parent = xlate_dir(root_set, head->parent); | ||
| 1126 | if (IS_ERR(core_parent)) | ||
| 1127 | return 0; | ||
| 1128 | |||
| 1129 | if (get_links(core_parent, head->ctl_table, head->root)) | ||
| 1130 | return 0; | ||
| 1131 | |||
| 1132 | core_parent->header.nreg++; | ||
| 1133 | spin_unlock(&sysctl_lock); | ||
| 1134 | |||
| 1135 | links = new_links(core_parent, head->ctl_table, head->root); | ||
| 1136 | |||
| 1137 | spin_lock(&sysctl_lock); | ||
| 1138 | err = -ENOMEM; | ||
| 1139 | if (!links) | ||
| 1140 | goto out; | ||
| 1141 | |||
| 1142 | err = 0; | ||
| 1143 | if (get_links(core_parent, head->ctl_table, head->root)) { | ||
| 1144 | kfree(links); | ||
| 1145 | goto out; | ||
| 1146 | } | ||
| 1147 | |||
| 1148 | err = insert_header(core_parent, links); | ||
| 1149 | if (err) | ||
| 1150 | kfree(links); | ||
| 1151 | out: | ||
| 1152 | drop_sysctl_table(&core_parent->header); | ||
| 1153 | return err; | ||
| 1154 | } | ||
| 1155 | |||
| 1156 | /** | ||
| 1157 | * __register_sysctl_table - register a leaf sysctl table | ||
| 1158 | * @set: Sysctl tree to register on | ||
| 1159 | * @path: The path to the directory the sysctl table is in. | ||
| 1160 | * @table: the top-level table structure | ||
| 1161 | * | ||
| 1162 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
| 1163 | * array. A completely 0 filled entry terminates the table. | ||
| 1164 | * | ||
| 1165 | * The members of the &struct ctl_table structure are used as follows: | ||
| 1166 | * | ||
| 1167 | * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not | ||
| 1168 | * enter a sysctl file | ||
| 1169 | * | ||
| 1170 | * data - a pointer to data for use by proc_handler | ||
| 1171 | * | ||
| 1172 | * maxlen - the maximum size in bytes of the data | ||
| 1173 | * | ||
| 1174 | * mode - the file permissions for the /proc/sys file | ||
| 1175 | * | ||
| 1176 | * child - must be %NULL. | ||
| 1177 | * | ||
| 1178 | * proc_handler - the text handler routine (described below) | ||
| 1179 | * | ||
| 1180 | * extra1, extra2 - extra pointers usable by the proc handler routines | ||
| 1181 | * | ||
| 1182 | * Leaf nodes in the sysctl tree will be represented by a single file | ||
| 1183 | * under /proc; non-leaf nodes will be represented by directories. | ||
| 1184 | * | ||
| 1185 | * There must be a proc_handler routine for any terminal nodes. | ||
| 1186 | * Several default handlers are available to cover common cases - | ||
| 1187 | * | ||
| 1188 | * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), | ||
| 1189 | * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), | ||
| 1190 | * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax() | ||
| 1191 | * | ||
| 1192 | * It is the handler's job to read the input buffer from user memory | ||
| 1193 | * and process it. The handler should return 0 on success. | ||
| 1194 | * | ||
| 1195 | * This routine returns %NULL on a failure to register, and a pointer | ||
| 1196 | * to the table header on success. | ||
| 1197 | */ | ||
| 1198 | struct ctl_table_header *__register_sysctl_table( | ||
| 1199 | struct ctl_table_set *set, | ||
| 1200 | const char *path, struct ctl_table *table) | ||
| 1201 | { | ||
| 1202 | struct ctl_table_root *root = set->dir.header.root; | ||
| 1203 | struct ctl_table_header *header; | ||
| 1204 | const char *name, *nextname; | ||
| 1205 | struct ctl_dir *dir; | ||
| 1206 | struct ctl_table *entry; | ||
| 1207 | struct ctl_node *node; | ||
| 1208 | int nr_entries = 0; | ||
| 1209 | |||
| 1210 | for (entry = table; entry->procname; entry++) | ||
| 1211 | nr_entries++; | ||
| 1212 | |||
| 1213 | header = kzalloc(sizeof(struct ctl_table_header) + | ||
| 1214 | sizeof(struct ctl_node)*nr_entries, GFP_KERNEL); | ||
| 1215 | if (!header) | ||
| 1216 | return NULL; | ||
| 1217 | |||
| 1218 | node = (struct ctl_node *)(header + 1); | ||
| 1219 | init_header(header, root, set, node, table); | ||
| 1220 | if (sysctl_check_table(path, table)) | ||
| 1221 | goto fail; | ||
| 1222 | |||
| 1223 | spin_lock(&sysctl_lock); | ||
| 1224 | dir = &set->dir; | ||
| 1225 | /* Reference moved down the diretory tree get_subdir */ | ||
| 1226 | dir->header.nreg++; | ||
| 1227 | spin_unlock(&sysctl_lock); | ||
| 1228 | |||
| 1229 | /* Find the directory for the ctl_table */ | ||
| 1230 | for (name = path; name; name = nextname) { | ||
| 1231 | int namelen; | ||
| 1232 | nextname = strchr(name, '/'); | ||
| 1233 | if (nextname) { | ||
| 1234 | namelen = nextname - name; | ||
| 1235 | nextname++; | ||
| 1236 | } else { | ||
| 1237 | namelen = strlen(name); | ||
| 1238 | } | ||
| 1239 | if (namelen == 0) | ||
| 1240 | continue; | ||
| 1241 | |||
| 1242 | dir = get_subdir(dir, name, namelen); | ||
| 1243 | if (IS_ERR(dir)) | ||
| 1244 | goto fail; | ||
| 1245 | } | ||
| 1246 | |||
| 1247 | spin_lock(&sysctl_lock); | ||
| 1248 | if (insert_header(dir, header)) | ||
| 1249 | goto fail_put_dir_locked; | ||
| 1250 | |||
| 1251 | drop_sysctl_table(&dir->header); | ||
| 1252 | spin_unlock(&sysctl_lock); | ||
| 1253 | |||
| 1254 | return header; | ||
| 1255 | |||
| 1256 | fail_put_dir_locked: | ||
| 1257 | drop_sysctl_table(&dir->header); | ||
| 1258 | spin_unlock(&sysctl_lock); | ||
| 1259 | fail: | ||
| 1260 | kfree(header); | ||
| 1261 | dump_stack(); | ||
| 1262 | return NULL; | ||
| 1263 | } | ||
| 1264 | |||
| 1265 | /** | ||
| 1266 | * register_sysctl - register a sysctl table | ||
| 1267 | * @path: The path to the directory the sysctl table is in. | ||
| 1268 | * @table: the table structure | ||
| 1269 | * | ||
| 1270 | * Register a sysctl table. @table should be a filled in ctl_table | ||
| 1271 | * array. A completely 0 filled entry terminates the table. | ||
| 1272 | * | ||
| 1273 | * See __register_sysctl_table for more details. | ||
| 1274 | */ | ||
| 1275 | struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table) | ||
| 1276 | { | ||
| 1277 | return __register_sysctl_table(&sysctl_table_root.default_set, | ||
| 1278 | path, table); | ||
| 1279 | } | ||
| 1280 | EXPORT_SYMBOL(register_sysctl); | ||
| 1281 | |||
| 1282 | static char *append_path(const char *path, char *pos, const char *name) | ||
| 1283 | { | ||
| 1284 | int namelen; | ||
| 1285 | namelen = strlen(name); | ||
| 1286 | if (((pos - path) + namelen + 2) >= PATH_MAX) | ||
| 1287 | return NULL; | ||
| 1288 | memcpy(pos, name, namelen); | ||
| 1289 | pos[namelen] = '/'; | ||
| 1290 | pos[namelen + 1] = '\0'; | ||
| 1291 | pos += namelen + 1; | ||
| 1292 | return pos; | ||
| 1293 | } | ||
| 1294 | |||
| 1295 | static int count_subheaders(struct ctl_table *table) | ||
| 1296 | { | ||
| 1297 | int has_files = 0; | ||
| 1298 | int nr_subheaders = 0; | ||
| 1299 | struct ctl_table *entry; | ||
| 1300 | |||
| 1301 | /* special case: no directory and empty directory */ | ||
| 1302 | if (!table || !table->procname) | ||
| 1303 | return 1; | ||
| 1304 | |||
| 1305 | for (entry = table; entry->procname; entry++) { | ||
| 1306 | if (entry->child) | ||
| 1307 | nr_subheaders += count_subheaders(entry->child); | ||
| 1308 | else | ||
| 1309 | has_files = 1; | ||
| 1310 | } | ||
| 1311 | return nr_subheaders + has_files; | ||
| 1312 | } | ||
| 1313 | |||
| 1314 | static int register_leaf_sysctl_tables(const char *path, char *pos, | ||
| 1315 | struct ctl_table_header ***subheader, struct ctl_table_set *set, | ||
| 1316 | struct ctl_table *table) | ||
| 1317 | { | ||
| 1318 | struct ctl_table *ctl_table_arg = NULL; | ||
| 1319 | struct ctl_table *entry, *files; | ||
| 1320 | int nr_files = 0; | ||
| 1321 | int nr_dirs = 0; | ||
| 1322 | int err = -ENOMEM; | ||
| 1323 | |||
| 1324 | for (entry = table; entry->procname; entry++) { | ||
| 1325 | if (entry->child) | ||
| 1326 | nr_dirs++; | ||
| 1327 | else | ||
| 1328 | nr_files++; | ||
| 1329 | } | ||
| 1330 | |||
| 1331 | files = table; | ||
| 1332 | /* If there are mixed files and directories we need a new table */ | ||
| 1333 | if (nr_dirs && nr_files) { | ||
| 1334 | struct ctl_table *new; | ||
| 1335 | files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1), | ||
| 1336 | GFP_KERNEL); | ||
| 1337 | if (!files) | ||
| 1338 | goto out; | ||
| 1339 | |||
| 1340 | ctl_table_arg = files; | ||
| 1341 | for (new = files, entry = table; entry->procname; entry++) { | ||
| 1342 | if (entry->child) | ||
| 1343 | continue; | ||
| 1344 | *new = *entry; | ||
| 1345 | new++; | ||
| 1346 | } | ||
| 1347 | } | ||
| 1348 | |||
| 1349 | /* Register everything except a directory full of subdirectories */ | ||
| 1350 | if (nr_files || !nr_dirs) { | ||
| 1351 | struct ctl_table_header *header; | ||
| 1352 | header = __register_sysctl_table(set, path, files); | ||
| 1353 | if (!header) { | ||
| 1354 | kfree(ctl_table_arg); | ||
| 1355 | goto out; | ||
| 1356 | } | ||
| 1357 | |||
| 1358 | /* Remember if we need to free the file table */ | ||
| 1359 | header->ctl_table_arg = ctl_table_arg; | ||
| 1360 | **subheader = header; | ||
| 1361 | (*subheader)++; | ||
| 1362 | } | ||
| 1363 | |||
| 1364 | /* Recurse into the subdirectories. */ | ||
| 1365 | for (entry = table; entry->procname; entry++) { | ||
| 1366 | char *child_pos; | ||
| 1367 | |||
| 1368 | if (!entry->child) | ||
| 1369 | continue; | ||
| 1370 | |||
| 1371 | err = -ENAMETOOLONG; | ||
| 1372 | child_pos = append_path(path, pos, entry->procname); | ||
| 1373 | if (!child_pos) | ||
| 1374 | goto out; | ||
| 1375 | |||
| 1376 | err = register_leaf_sysctl_tables(path, child_pos, subheader, | ||
| 1377 | set, entry->child); | ||
| 1378 | pos[0] = '\0'; | ||
| 1379 | if (err) | ||
| 1380 | goto out; | ||
| 1381 | } | ||
| 1382 | err = 0; | ||
| 1383 | out: | ||
| 1384 | /* On failure our caller will unregister all registered subheaders */ | ||
| 1385 | return err; | ||
| 1386 | } | ||
| 1387 | |||
| 1388 | /** | ||
| 1389 | * __register_sysctl_paths - register a sysctl table hierarchy | ||
| 1390 | * @set: Sysctl tree to register on | ||
| 1391 | * @path: The path to the directory the sysctl table is in. | ||
| 1392 | * @table: the top-level table structure | ||
| 1393 | * | ||
| 1394 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
| 1395 | * array. A completely 0 filled entry terminates the table. | ||
| 1396 | * | ||
| 1397 | * See __register_sysctl_table for more details. | ||
| 1398 | */ | ||
| 1399 | struct ctl_table_header *__register_sysctl_paths( | ||
| 1400 | struct ctl_table_set *set, | ||
| 1401 | const struct ctl_path *path, struct ctl_table *table) | ||
| 1402 | { | ||
| 1403 | struct ctl_table *ctl_table_arg = table; | ||
| 1404 | int nr_subheaders = count_subheaders(table); | ||
| 1405 | struct ctl_table_header *header = NULL, **subheaders, **subheader; | ||
| 1406 | const struct ctl_path *component; | ||
| 1407 | char *new_path, *pos; | ||
| 1408 | |||
| 1409 | pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL); | ||
| 1410 | if (!new_path) | ||
| 1411 | return NULL; | ||
| 1412 | |||
| 1413 | pos[0] = '\0'; | ||
| 1414 | for (component = path; component->procname; component++) { | ||
| 1415 | pos = append_path(new_path, pos, component->procname); | ||
| 1416 | if (!pos) | ||
| 1417 | goto out; | ||
| 1418 | } | ||
| 1419 | while (table->procname && table->child && !table[1].procname) { | ||
| 1420 | pos = append_path(new_path, pos, table->procname); | ||
| 1421 | if (!pos) | ||
| 1422 | goto out; | ||
| 1423 | table = table->child; | ||
| 1424 | } | ||
| 1425 | if (nr_subheaders == 1) { | ||
| 1426 | header = __register_sysctl_table(set, new_path, table); | ||
| 1427 | if (header) | ||
| 1428 | header->ctl_table_arg = ctl_table_arg; | ||
| 1429 | } else { | ||
| 1430 | header = kzalloc(sizeof(*header) + | ||
| 1431 | sizeof(*subheaders)*nr_subheaders, GFP_KERNEL); | ||
| 1432 | if (!header) | ||
| 1433 | goto out; | ||
| 1434 | |||
| 1435 | subheaders = (struct ctl_table_header **) (header + 1); | ||
| 1436 | subheader = subheaders; | ||
| 1437 | header->ctl_table_arg = ctl_table_arg; | ||
| 1438 | |||
| 1439 | if (register_leaf_sysctl_tables(new_path, pos, &subheader, | ||
| 1440 | set, table)) | ||
| 1441 | goto err_register_leaves; | ||
| 1442 | } | ||
| 1443 | |||
| 1444 | out: | ||
| 1445 | kfree(new_path); | ||
| 1446 | return header; | ||
| 1447 | |||
| 1448 | err_register_leaves: | ||
| 1449 | while (subheader > subheaders) { | ||
| 1450 | struct ctl_table_header *subh = *(--subheader); | ||
| 1451 | struct ctl_table *table = subh->ctl_table_arg; | ||
| 1452 | unregister_sysctl_table(subh); | ||
| 1453 | kfree(table); | ||
| 1454 | } | ||
| 1455 | kfree(header); | ||
| 1456 | header = NULL; | ||
| 1457 | goto out; | ||
| 1458 | } | ||
| 1459 | |||
| 1460 | /** | ||
| 1461 | * register_sysctl_table_path - register a sysctl table hierarchy | ||
| 1462 | * @path: The path to the directory the sysctl table is in. | ||
| 1463 | * @table: the top-level table structure | ||
| 1464 | * | ||
| 1465 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
| 1466 | * array. A completely 0 filled entry terminates the table. | ||
| 1467 | * | ||
| 1468 | * See __register_sysctl_paths for more details. | ||
| 1469 | */ | ||
| 1470 | struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, | ||
| 1471 | struct ctl_table *table) | ||
| 1472 | { | ||
| 1473 | return __register_sysctl_paths(&sysctl_table_root.default_set, | ||
| 1474 | path, table); | ||
| 1475 | } | ||
| 1476 | EXPORT_SYMBOL(register_sysctl_paths); | ||
| 1477 | |||
| 1478 | /** | ||
| 1479 | * register_sysctl_table - register a sysctl table hierarchy | ||
| 1480 | * @table: the top-level table structure | ||
| 1481 | * | ||
| 1482 | * Register a sysctl table hierarchy. @table should be a filled in ctl_table | ||
| 1483 | * array. A completely 0 filled entry terminates the table. | ||
| 1484 | * | ||
| 1485 | * See register_sysctl_paths for more details. | ||
| 1486 | */ | ||
| 1487 | struct ctl_table_header *register_sysctl_table(struct ctl_table *table) | ||
| 1488 | { | ||
| 1489 | static const struct ctl_path null_path[] = { {} }; | ||
| 1490 | |||
| 1491 | return register_sysctl_paths(null_path, table); | ||
| 1492 | } | ||
| 1493 | EXPORT_SYMBOL(register_sysctl_table); | ||
| 1494 | |||
| 1495 | static void put_links(struct ctl_table_header *header) | ||
| 1496 | { | ||
| 1497 | struct ctl_table_set *root_set = &sysctl_table_root.default_set; | ||
| 1498 | struct ctl_table_root *root = header->root; | ||
| 1499 | struct ctl_dir *parent = header->parent; | ||
| 1500 | struct ctl_dir *core_parent; | ||
| 1501 | struct ctl_table *entry; | ||
| 1502 | |||
| 1503 | if (header->set == root_set) | ||
| 1504 | return; | ||
| 1505 | |||
| 1506 | core_parent = xlate_dir(root_set, parent); | ||
| 1507 | if (IS_ERR(core_parent)) | ||
| 1508 | return; | ||
| 1509 | |||
| 1510 | for (entry = header->ctl_table; entry->procname; entry++) { | ||
| 1511 | struct ctl_table_header *link_head; | ||
| 1512 | struct ctl_table *link; | ||
| 1513 | const char *name = entry->procname; | ||
| 1514 | |||
| 1515 | link = find_entry(&link_head, core_parent, name, strlen(name)); | ||
| 1516 | if (link && | ||
| 1517 | ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) || | ||
| 1518 | (S_ISLNK(link->mode) && (link->data == root)))) { | ||
| 1519 | drop_sysctl_table(link_head); | ||
| 1520 | } | ||
| 1521 | else { | ||
| 1522 | printk(KERN_ERR "sysctl link missing during unregister: "); | ||
| 1523 | sysctl_print_dir(parent); | ||
| 1524 | printk(KERN_CONT "/%s\n", name); | ||
| 1525 | } | ||
| 1526 | } | ||
| 1527 | } | ||
| 1528 | |||
| 1529 | static void drop_sysctl_table(struct ctl_table_header *header) | ||
| 1530 | { | ||
| 1531 | struct ctl_dir *parent = header->parent; | ||
| 1532 | |||
| 1533 | if (--header->nreg) | ||
| 1534 | return; | ||
| 1535 | |||
| 1536 | put_links(header); | ||
| 1537 | start_unregistering(header); | ||
| 1538 | if (!--header->count) | ||
| 1539 | kfree_rcu(header, rcu); | ||
| 1540 | |||
| 1541 | if (parent) | ||
| 1542 | drop_sysctl_table(&parent->header); | ||
| 1543 | } | ||
| 1544 | |||
| 1545 | /** | ||
| 1546 | * unregister_sysctl_table - unregister a sysctl table hierarchy | ||
| 1547 | * @header: the header returned from register_sysctl_table | ||
| 1548 | * | ||
| 1549 | * Unregisters the sysctl table and all children. proc entries may not | ||
| 1550 | * actually be removed until they are no longer used by anyone. | ||
| 1551 | */ | ||
| 1552 | void unregister_sysctl_table(struct ctl_table_header * header) | ||
| 1553 | { | ||
| 1554 | int nr_subheaders; | ||
| 1555 | might_sleep(); | ||
| 1556 | |||
| 1557 | if (header == NULL) | ||
| 1558 | return; | ||
| 1559 | |||
| 1560 | nr_subheaders = count_subheaders(header->ctl_table_arg); | ||
| 1561 | if (unlikely(nr_subheaders > 1)) { | ||
| 1562 | struct ctl_table_header **subheaders; | ||
| 1563 | int i; | ||
| 1564 | |||
| 1565 | subheaders = (struct ctl_table_header **)(header + 1); | ||
| 1566 | for (i = nr_subheaders -1; i >= 0; i--) { | ||
| 1567 | struct ctl_table_header *subh = subheaders[i]; | ||
| 1568 | struct ctl_table *table = subh->ctl_table_arg; | ||
| 1569 | unregister_sysctl_table(subh); | ||
| 1570 | kfree(table); | ||
| 1571 | } | ||
| 1572 | kfree(header); | ||
| 1573 | return; | ||
| 1574 | } | ||
| 1575 | |||
| 1576 | spin_lock(&sysctl_lock); | ||
| 1577 | drop_sysctl_table(header); | ||
| 1578 | spin_unlock(&sysctl_lock); | ||
| 1579 | } | ||
| 1580 | EXPORT_SYMBOL(unregister_sysctl_table); | ||
| 1581 | |||
| 1582 | void setup_sysctl_set(struct ctl_table_set *set, | ||
| 1583 | struct ctl_table_root *root, | ||
| 1584 | int (*is_seen)(struct ctl_table_set *)) | ||
| 1585 | { | ||
| 1586 | memset(set, 0, sizeof(*set)); | ||
| 1587 | set->is_seen = is_seen; | ||
| 1588 | init_header(&set->dir.header, root, set, NULL, root_table); | ||
| 1589 | } | ||
| 1590 | |||
| 1591 | void retire_sysctl_set(struct ctl_table_set *set) | ||
| 1592 | { | ||
| 1593 | WARN_ON(!RB_EMPTY_ROOT(&set->dir.root)); | ||
| 1594 | } | ||
| 1595 | |||
| 473 | int __init proc_sys_init(void) | 1596 | int __init proc_sys_init(void) |
| 474 | { | 1597 | { |
| 475 | struct proc_dir_entry *proc_sys_root; | 1598 | struct proc_dir_entry *proc_sys_root; |
| @@ -478,5 +1601,6 @@ int __init proc_sys_init(void) | |||
| 478 | proc_sys_root->proc_iops = &proc_sys_dir_operations; | 1601 | proc_sys_root->proc_iops = &proc_sys_dir_operations; |
| 479 | proc_sys_root->proc_fops = &proc_sys_dir_file_operations; | 1602 | proc_sys_root->proc_fops = &proc_sys_dir_file_operations; |
| 480 | proc_sys_root->nlink = 0; | 1603 | proc_sys_root->nlink = 0; |
| 481 | return 0; | 1604 | |
| 1605 | return sysctl_init(); | ||
| 482 | } | 1606 | } |
diff --git a/fs/proc/root.c b/fs/proc/root.c index 46a15d8a29ca..7c30fce037c0 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
| @@ -67,7 +67,7 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) | |||
| 67 | case Opt_gid: | 67 | case Opt_gid: |
| 68 | if (match_int(&args[0], &option)) | 68 | if (match_int(&args[0], &option)) |
| 69 | return 0; | 69 | return 0; |
| 70 | pid->pid_gid = option; | 70 | pid->pid_gid = make_kgid(current_user_ns(), option); |
| 71 | break; | 71 | break; |
| 72 | case Opt_hidepid: | 72 | case Opt_hidepid: |
| 73 | if (match_int(&args[0], &option)) | 73 | if (match_int(&args[0], &option)) |
| @@ -115,12 +115,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, | |||
| 115 | if (IS_ERR(sb)) | 115 | if (IS_ERR(sb)) |
| 116 | return ERR_CAST(sb); | 116 | return ERR_CAST(sb); |
| 117 | 117 | ||
| 118 | if (!proc_parse_options(options, ns)) { | ||
| 119 | deactivate_locked_super(sb); | ||
| 120 | return ERR_PTR(-EINVAL); | ||
| 121 | } | ||
| 122 | |||
| 118 | if (!sb->s_root) { | 123 | if (!sb->s_root) { |
| 119 | sb->s_flags = flags; | 124 | sb->s_flags = flags; |
| 120 | if (!proc_parse_options(options, ns)) { | ||
| 121 | deactivate_locked_super(sb); | ||
| 122 | return ERR_PTR(-EINVAL); | ||
| 123 | } | ||
| 124 | err = proc_fill_super(sb); | 125 | err = proc_fill_super(sb); |
| 125 | if (err) { | 126 | if (err) { |
| 126 | deactivate_locked_super(sb); | 127 | deactivate_locked_super(sb); |
diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 121f77cfef76..64c3b3172367 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c | |||
| @@ -18,19 +18,39 @@ | |||
| 18 | #ifndef arch_irq_stat | 18 | #ifndef arch_irq_stat |
| 19 | #define arch_irq_stat() 0 | 19 | #define arch_irq_stat() 0 |
| 20 | #endif | 20 | #endif |
| 21 | #ifndef arch_idle_time | 21 | |
| 22 | #define arch_idle_time(cpu) 0 | 22 | #ifdef arch_idle_time |
| 23 | #endif | 23 | |
| 24 | static cputime64_t get_idle_time(int cpu) | ||
| 25 | { | ||
| 26 | cputime64_t idle; | ||
| 27 | |||
| 28 | idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; | ||
| 29 | if (cpu_online(cpu) && !nr_iowait_cpu(cpu)) | ||
| 30 | idle += arch_idle_time(cpu); | ||
| 31 | return idle; | ||
| 32 | } | ||
| 33 | |||
| 34 | static cputime64_t get_iowait_time(int cpu) | ||
| 35 | { | ||
| 36 | cputime64_t iowait; | ||
| 37 | |||
| 38 | iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; | ||
| 39 | if (cpu_online(cpu) && nr_iowait_cpu(cpu)) | ||
| 40 | iowait += arch_idle_time(cpu); | ||
| 41 | return iowait; | ||
| 42 | } | ||
| 43 | |||
| 44 | #else | ||
| 24 | 45 | ||
| 25 | static u64 get_idle_time(int cpu) | 46 | static u64 get_idle_time(int cpu) |
| 26 | { | 47 | { |
| 27 | u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL); | 48 | u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL); |
| 28 | 49 | ||
| 29 | if (idle_time == -1ULL) { | 50 | if (idle_time == -1ULL) |
| 30 | /* !NO_HZ so we can rely on cpustat.idle */ | 51 | /* !NO_HZ so we can rely on cpustat.idle */ |
| 31 | idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; | 52 | idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; |
| 32 | idle += arch_idle_time(cpu); | 53 | else |
| 33 | } else | ||
| 34 | idle = usecs_to_cputime64(idle_time); | 54 | idle = usecs_to_cputime64(idle_time); |
| 35 | 55 | ||
| 36 | return idle; | 56 | return idle; |
| @@ -49,6 +69,8 @@ static u64 get_iowait_time(int cpu) | |||
| 49 | return iowait; | 69 | return iowait; |
| 50 | } | 70 | } |
| 51 | 71 | ||
| 72 | #endif | ||
| 73 | |||
| 52 | static int show_stat(struct seq_file *p, void *v) | 74 | static int show_stat(struct seq_file *p, void *v) |
| 53 | { | 75 | { |
| 54 | int i, j; | 76 | int i, j; |
| @@ -89,18 +111,19 @@ static int show_stat(struct seq_file *p, void *v) | |||
| 89 | } | 111 | } |
| 90 | sum += arch_irq_stat(); | 112 | sum += arch_irq_stat(); |
| 91 | 113 | ||
| 92 | seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu " | 114 | seq_puts(p, "cpu "); |
| 93 | "%llu\n", | 115 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user)); |
| 94 | (unsigned long long)cputime64_to_clock_t(user), | 116 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice)); |
| 95 | (unsigned long long)cputime64_to_clock_t(nice), | 117 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system)); |
| 96 | (unsigned long long)cputime64_to_clock_t(system), | 118 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle)); |
| 97 | (unsigned long long)cputime64_to_clock_t(idle), | 119 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait)); |
| 98 | (unsigned long long)cputime64_to_clock_t(iowait), | 120 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq)); |
| 99 | (unsigned long long)cputime64_to_clock_t(irq), | 121 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq)); |
| 100 | (unsigned long long)cputime64_to_clock_t(softirq), | 122 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal)); |
| 101 | (unsigned long long)cputime64_to_clock_t(steal), | 123 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest)); |
| 102 | (unsigned long long)cputime64_to_clock_t(guest), | 124 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice)); |
| 103 | (unsigned long long)cputime64_to_clock_t(guest_nice)); | 125 | seq_putc(p, '\n'); |
| 126 | |||
| 104 | for_each_online_cpu(i) { | 127 | for_each_online_cpu(i) { |
| 105 | /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ | 128 | /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ |
| 106 | user = kcpustat_cpu(i).cpustat[CPUTIME_USER]; | 129 | user = kcpustat_cpu(i).cpustat[CPUTIME_USER]; |
| @@ -113,26 +136,24 @@ static int show_stat(struct seq_file *p, void *v) | |||
| 113 | steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; | 136 | steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; |
| 114 | guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; | 137 | guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; |
| 115 | guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; | 138 | guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; |
| 116 | seq_printf(p, | 139 | seq_printf(p, "cpu%d", i); |
| 117 | "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu " | 140 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user)); |
| 118 | "%llu\n", | 141 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice)); |
| 119 | i, | 142 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system)); |
| 120 | (unsigned long long)cputime64_to_clock_t(user), | 143 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle)); |
| 121 | (unsigned long long)cputime64_to_clock_t(nice), | 144 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait)); |
| 122 | (unsigned long long)cputime64_to_clock_t(system), | 145 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq)); |
| 123 | (unsigned long long)cputime64_to_clock_t(idle), | 146 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq)); |
| 124 | (unsigned long long)cputime64_to_clock_t(iowait), | 147 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal)); |
| 125 | (unsigned long long)cputime64_to_clock_t(irq), | 148 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest)); |
| 126 | (unsigned long long)cputime64_to_clock_t(softirq), | 149 | seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice)); |
| 127 | (unsigned long long)cputime64_to_clock_t(steal), | 150 | seq_putc(p, '\n'); |
| 128 | (unsigned long long)cputime64_to_clock_t(guest), | ||
| 129 | (unsigned long long)cputime64_to_clock_t(guest_nice)); | ||
| 130 | } | 151 | } |
| 131 | seq_printf(p, "intr %llu", (unsigned long long)sum); | 152 | seq_printf(p, "intr %llu", (unsigned long long)sum); |
| 132 | 153 | ||
| 133 | /* sum again ? it could be updated? */ | 154 | /* sum again ? it could be updated? */ |
| 134 | for_each_irq_nr(j) | 155 | for_each_irq_nr(j) |
| 135 | seq_printf(p, " %u", kstat_irqs(j)); | 156 | seq_put_decimal_ull(p, ' ', kstat_irqs(j)); |
| 136 | 157 | ||
| 137 | seq_printf(p, | 158 | seq_printf(p, |
| 138 | "\nctxt %llu\n" | 159 | "\nctxt %llu\n" |
| @@ -149,7 +170,7 @@ static int show_stat(struct seq_file *p, void *v) | |||
| 149 | seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq); | 170 | seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq); |
| 150 | 171 | ||
| 151 | for (i = 0; i < NR_SOFTIRQS; i++) | 172 | for (i = 0; i < NR_SOFTIRQS; i++) |
| 152 | seq_printf(p, " %u", per_softirq_sums[i]); | 173 | seq_put_decimal_ull(p, ' ', per_softirq_sums[i]); |
| 153 | seq_putc(p, '\n'); | 174 | seq_putc(p, '\n'); |
| 154 | 175 | ||
| 155 | return 0; | 176 | return 0; |
| @@ -157,11 +178,14 @@ static int show_stat(struct seq_file *p, void *v) | |||
| 157 | 178 | ||
| 158 | static int stat_open(struct inode *inode, struct file *file) | 179 | static int stat_open(struct inode *inode, struct file *file) |
| 159 | { | 180 | { |
| 160 | unsigned size = 4096 * (1 + num_possible_cpus() / 32); | 181 | unsigned size = 1024 + 128 * num_possible_cpus(); |
| 161 | char *buf; | 182 | char *buf; |
| 162 | struct seq_file *m; | 183 | struct seq_file *m; |
| 163 | int res; | 184 | int res; |
| 164 | 185 | ||
| 186 | /* minimum size to display an interrupt count : 2 bytes */ | ||
| 187 | size += 2 * nr_irqs; | ||
| 188 | |||
| 165 | /* don't ask for more than the kmalloc() max size */ | 189 | /* don't ask for more than the kmalloc() max size */ |
| 166 | if (size > KMALLOC_MAX_SIZE) | 190 | if (size > KMALLOC_MAX_SIZE) |
| 167 | size = KMALLOC_MAX_SIZE; | 191 | size = KMALLOC_MAX_SIZE; |
| @@ -173,7 +197,7 @@ static int stat_open(struct inode *inode, struct file *file) | |||
| 173 | if (!res) { | 197 | if (!res) { |
| 174 | m = file->private_data; | 198 | m = file->private_data; |
| 175 | m->buf = buf; | 199 | m->buf = buf; |
| 176 | m->size = size; | 200 | m->size = ksize(buf); |
| 177 | } else | 201 | } else |
| 178 | kfree(buf); | 202 | kfree(buf); |
| 179 | return res; | 203 | return res; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7dcd2a250495..7faaf2acc570 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
| @@ -209,16 +209,20 @@ static int do_maps_open(struct inode *inode, struct file *file, | |||
| 209 | return ret; | 209 | return ret; |
| 210 | } | 210 | } |
| 211 | 211 | ||
| 212 | static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | 212 | static void |
| 213 | show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) | ||
| 213 | { | 214 | { |
| 214 | struct mm_struct *mm = vma->vm_mm; | 215 | struct mm_struct *mm = vma->vm_mm; |
| 215 | struct file *file = vma->vm_file; | 216 | struct file *file = vma->vm_file; |
| 217 | struct proc_maps_private *priv = m->private; | ||
| 218 | struct task_struct *task = priv->task; | ||
| 216 | vm_flags_t flags = vma->vm_flags; | 219 | vm_flags_t flags = vma->vm_flags; |
| 217 | unsigned long ino = 0; | 220 | unsigned long ino = 0; |
| 218 | unsigned long long pgoff = 0; | 221 | unsigned long long pgoff = 0; |
| 219 | unsigned long start, end; | 222 | unsigned long start, end; |
| 220 | dev_t dev = 0; | 223 | dev_t dev = 0; |
| 221 | int len; | 224 | int len; |
| 225 | const char *name = NULL; | ||
| 222 | 226 | ||
| 223 | if (file) { | 227 | if (file) { |
| 224 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 228 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
| @@ -252,36 +256,57 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | |||
| 252 | if (file) { | 256 | if (file) { |
| 253 | pad_len_spaces(m, len); | 257 | pad_len_spaces(m, len); |
| 254 | seq_path(m, &file->f_path, "\n"); | 258 | seq_path(m, &file->f_path, "\n"); |
| 255 | } else { | 259 | goto done; |
| 256 | const char *name = arch_vma_name(vma); | 260 | } |
| 257 | if (!name) { | 261 | |
| 258 | if (mm) { | 262 | name = arch_vma_name(vma); |
| 259 | if (vma->vm_start <= mm->brk && | 263 | if (!name) { |
| 260 | vma->vm_end >= mm->start_brk) { | 264 | pid_t tid; |
| 261 | name = "[heap]"; | 265 | |
| 262 | } else if (vma->vm_start <= mm->start_stack && | 266 | if (!mm) { |
| 263 | vma->vm_end >= mm->start_stack) { | 267 | name = "[vdso]"; |
| 264 | name = "[stack]"; | 268 | goto done; |
| 265 | } | 269 | } |
| 270 | |||
| 271 | if (vma->vm_start <= mm->brk && | ||
| 272 | vma->vm_end >= mm->start_brk) { | ||
| 273 | name = "[heap]"; | ||
| 274 | goto done; | ||
| 275 | } | ||
| 276 | |||
| 277 | tid = vm_is_stack(task, vma, is_pid); | ||
| 278 | |||
| 279 | if (tid != 0) { | ||
| 280 | /* | ||
| 281 | * Thread stack in /proc/PID/task/TID/maps or | ||
| 282 | * the main process stack. | ||
| 283 | */ | ||
| 284 | if (!is_pid || (vma->vm_start <= mm->start_stack && | ||
| 285 | vma->vm_end >= mm->start_stack)) { | ||
| 286 | name = "[stack]"; | ||
| 266 | } else { | 287 | } else { |
| 267 | name = "[vdso]"; | 288 | /* Thread stack in /proc/PID/maps */ |
| 289 | pad_len_spaces(m, len); | ||
| 290 | seq_printf(m, "[stack:%d]", tid); | ||
| 268 | } | 291 | } |
| 269 | } | 292 | } |
| 270 | if (name) { | 293 | } |
| 271 | pad_len_spaces(m, len); | 294 | |
| 272 | seq_puts(m, name); | 295 | done: |
| 273 | } | 296 | if (name) { |
| 297 | pad_len_spaces(m, len); | ||
| 298 | seq_puts(m, name); | ||
| 274 | } | 299 | } |
| 275 | seq_putc(m, '\n'); | 300 | seq_putc(m, '\n'); |
| 276 | } | 301 | } |
| 277 | 302 | ||
| 278 | static int show_map(struct seq_file *m, void *v) | 303 | static int show_map(struct seq_file *m, void *v, int is_pid) |
| 279 | { | 304 | { |
| 280 | struct vm_area_struct *vma = v; | 305 | struct vm_area_struct *vma = v; |
| 281 | struct proc_maps_private *priv = m->private; | 306 | struct proc_maps_private *priv = m->private; |
| 282 | struct task_struct *task = priv->task; | 307 | struct task_struct *task = priv->task; |
| 283 | 308 | ||
| 284 | show_map_vma(m, vma); | 309 | show_map_vma(m, vma, is_pid); |
| 285 | 310 | ||
| 286 | if (m->count < m->size) /* vma is copied successfully */ | 311 | if (m->count < m->size) /* vma is copied successfully */ |
| 287 | m->version = (vma != get_gate_vma(task->mm)) | 312 | m->version = (vma != get_gate_vma(task->mm)) |
| @@ -289,20 +314,49 @@ static int show_map(struct seq_file *m, void *v) | |||
| 289 | return 0; | 314 | return 0; |
| 290 | } | 315 | } |
| 291 | 316 | ||
| 317 | static int show_pid_map(struct seq_file *m, void *v) | ||
| 318 | { | ||
| 319 | return show_map(m, v, 1); | ||
| 320 | } | ||
| 321 | |||
| 322 | static int show_tid_map(struct seq_file *m, void *v) | ||
| 323 | { | ||
| 324 | return show_map(m, v, 0); | ||
| 325 | } | ||
| 326 | |||
| 292 | static const struct seq_operations proc_pid_maps_op = { | 327 | static const struct seq_operations proc_pid_maps_op = { |
| 293 | .start = m_start, | 328 | .start = m_start, |
| 294 | .next = m_next, | 329 | .next = m_next, |
| 295 | .stop = m_stop, | 330 | .stop = m_stop, |
| 296 | .show = show_map | 331 | .show = show_pid_map |
| 297 | }; | 332 | }; |
| 298 | 333 | ||
| 299 | static int maps_open(struct inode *inode, struct file *file) | 334 | static const struct seq_operations proc_tid_maps_op = { |
| 335 | .start = m_start, | ||
| 336 | .next = m_next, | ||
| 337 | .stop = m_stop, | ||
| 338 | .show = show_tid_map | ||
| 339 | }; | ||
| 340 | |||
| 341 | static int pid_maps_open(struct inode *inode, struct file *file) | ||
| 300 | { | 342 | { |
| 301 | return do_maps_open(inode, file, &proc_pid_maps_op); | 343 | return do_maps_open(inode, file, &proc_pid_maps_op); |
| 302 | } | 344 | } |
| 303 | 345 | ||
| 304 | const struct file_operations proc_maps_operations = { | 346 | static int tid_maps_open(struct inode *inode, struct file *file) |
| 305 | .open = maps_open, | 347 | { |
| 348 | return do_maps_open(inode, file, &proc_tid_maps_op); | ||
| 349 | } | ||
| 350 | |||
| 351 | const struct file_operations proc_pid_maps_operations = { | ||
| 352 | .open = pid_maps_open, | ||
| 353 | .read = seq_read, | ||
| 354 | .llseek = seq_lseek, | ||
| 355 | .release = seq_release_private, | ||
| 356 | }; | ||
| 357 | |||
| 358 | const struct file_operations proc_tid_maps_operations = { | ||
| 359 | .open = tid_maps_open, | ||
| 306 | .read = seq_read, | 360 | .read = seq_read, |
| 307 | .llseek = seq_lseek, | 361 | .llseek = seq_lseek, |
| 308 | .release = seq_release_private, | 362 | .release = seq_release_private, |
| @@ -394,21 +448,15 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
| 394 | pte_t *pte; | 448 | pte_t *pte; |
| 395 | spinlock_t *ptl; | 449 | spinlock_t *ptl; |
| 396 | 450 | ||
| 397 | spin_lock(&walk->mm->page_table_lock); | 451 | if (pmd_trans_huge_lock(pmd, vma) == 1) { |
| 398 | if (pmd_trans_huge(*pmd)) { | 452 | smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); |
| 399 | if (pmd_trans_splitting(*pmd)) { | ||
| 400 | spin_unlock(&walk->mm->page_table_lock); | ||
| 401 | wait_split_huge_page(vma->anon_vma, pmd); | ||
| 402 | } else { | ||
| 403 | smaps_pte_entry(*(pte_t *)pmd, addr, | ||
| 404 | HPAGE_PMD_SIZE, walk); | ||
| 405 | spin_unlock(&walk->mm->page_table_lock); | ||
| 406 | mss->anonymous_thp += HPAGE_PMD_SIZE; | ||
| 407 | return 0; | ||
| 408 | } | ||
| 409 | } else { | ||
| 410 | spin_unlock(&walk->mm->page_table_lock); | 453 | spin_unlock(&walk->mm->page_table_lock); |
| 454 | mss->anonymous_thp += HPAGE_PMD_SIZE; | ||
| 455 | return 0; | ||
| 411 | } | 456 | } |
| 457 | |||
| 458 | if (pmd_trans_unstable(pmd)) | ||
| 459 | return 0; | ||
| 412 | /* | 460 | /* |
| 413 | * The mmap_sem held all the way back in m_start() is what | 461 | * The mmap_sem held all the way back in m_start() is what |
| 414 | * keeps khugepaged out of here and from collapsing things | 462 | * keeps khugepaged out of here and from collapsing things |
| @@ -422,7 +470,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
| 422 | return 0; | 470 | return 0; |
| 423 | } | 471 | } |
| 424 | 472 | ||
| 425 | static int show_smap(struct seq_file *m, void *v) | 473 | static int show_smap(struct seq_file *m, void *v, int is_pid) |
| 426 | { | 474 | { |
| 427 | struct proc_maps_private *priv = m->private; | 475 | struct proc_maps_private *priv = m->private; |
| 428 | struct task_struct *task = priv->task; | 476 | struct task_struct *task = priv->task; |
| @@ -440,7 +488,7 @@ static int show_smap(struct seq_file *m, void *v) | |||
| 440 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | 488 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) |
| 441 | walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); | 489 | walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); |
| 442 | 490 | ||
| 443 | show_map_vma(m, vma); | 491 | show_map_vma(m, vma, is_pid); |
| 444 | 492 | ||
| 445 | seq_printf(m, | 493 | seq_printf(m, |
| 446 | "Size: %8lu kB\n" | 494 | "Size: %8lu kB\n" |
| @@ -479,20 +527,49 @@ static int show_smap(struct seq_file *m, void *v) | |||
| 479 | return 0; | 527 | return 0; |
| 480 | } | 528 | } |
| 481 | 529 | ||
| 530 | static int show_pid_smap(struct seq_file *m, void *v) | ||
| 531 | { | ||
| 532 | return show_smap(m, v, 1); | ||
| 533 | } | ||
| 534 | |||
| 535 | static int show_tid_smap(struct seq_file *m, void *v) | ||
| 536 | { | ||
| 537 | return show_smap(m, v, 0); | ||
| 538 | } | ||
| 539 | |||
| 482 | static const struct seq_operations proc_pid_smaps_op = { | 540 | static const struct seq_operations proc_pid_smaps_op = { |
| 483 | .start = m_start, | 541 | .start = m_start, |
| 484 | .next = m_next, | 542 | .next = m_next, |
| 485 | .stop = m_stop, | 543 | .stop = m_stop, |
| 486 | .show = show_smap | 544 | .show = show_pid_smap |
| 545 | }; | ||
| 546 | |||
| 547 | static const struct seq_operations proc_tid_smaps_op = { | ||
| 548 | .start = m_start, | ||
| 549 | .next = m_next, | ||
| 550 | .stop = m_stop, | ||
| 551 | .show = show_tid_smap | ||
| 487 | }; | 552 | }; |
| 488 | 553 | ||
| 489 | static int smaps_open(struct inode *inode, struct file *file) | 554 | static int pid_smaps_open(struct inode *inode, struct file *file) |
| 490 | { | 555 | { |
| 491 | return do_maps_open(inode, file, &proc_pid_smaps_op); | 556 | return do_maps_open(inode, file, &proc_pid_smaps_op); |
| 492 | } | 557 | } |
| 493 | 558 | ||
| 494 | const struct file_operations proc_smaps_operations = { | 559 | static int tid_smaps_open(struct inode *inode, struct file *file) |
| 495 | .open = smaps_open, | 560 | { |
| 561 | return do_maps_open(inode, file, &proc_tid_smaps_op); | ||
| 562 | } | ||
| 563 | |||
| 564 | const struct file_operations proc_pid_smaps_operations = { | ||
| 565 | .open = pid_smaps_open, | ||
| 566 | .read = seq_read, | ||
| 567 | .llseek = seq_lseek, | ||
| 568 | .release = seq_release_private, | ||
| 569 | }; | ||
| 570 | |||
| 571 | const struct file_operations proc_tid_smaps_operations = { | ||
| 572 | .open = tid_smaps_open, | ||
| 496 | .read = seq_read, | 573 | .read = seq_read, |
| 497 | .llseek = seq_lseek, | 574 | .llseek = seq_lseek, |
| 498 | .release = seq_release_private, | 575 | .release = seq_release_private, |
| @@ -507,6 +584,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
| 507 | struct page *page; | 584 | struct page *page; |
| 508 | 585 | ||
| 509 | split_huge_page_pmd(walk->mm, pmd); | 586 | split_huge_page_pmd(walk->mm, pmd); |
| 587 | if (pmd_trans_unstable(pmd)) | ||
| 588 | return 0; | ||
| 510 | 589 | ||
| 511 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 590 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
| 512 | for (; addr != end; pte++, addr += PAGE_SIZE) { | 591 | for (; addr != end; pte++, addr += PAGE_SIZE) { |
| @@ -518,9 +597,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
| 518 | if (!page) | 597 | if (!page) |
| 519 | continue; | 598 | continue; |
| 520 | 599 | ||
| 521 | if (PageReserved(page)) | ||
| 522 | continue; | ||
| 523 | |||
| 524 | /* Clear accessed and referenced bits. */ | 600 | /* Clear accessed and referenced bits. */ |
| 525 | ptep_test_and_clear_young(vma, addr, pte); | 601 | ptep_test_and_clear_young(vma, addr, pte); |
| 526 | ClearPageReferenced(page); | 602 | ClearPageReferenced(page); |
| @@ -598,11 +674,18 @@ const struct file_operations proc_clear_refs_operations = { | |||
| 598 | .llseek = noop_llseek, | 674 | .llseek = noop_llseek, |
| 599 | }; | 675 | }; |
| 600 | 676 | ||
| 677 | typedef struct { | ||
| 678 | u64 pme; | ||
| 679 | } pagemap_entry_t; | ||
| 680 | |||
| 601 | struct pagemapread { | 681 | struct pagemapread { |
| 602 | int pos, len; | 682 | int pos, len; |
| 603 | u64 *buffer; | 683 | pagemap_entry_t *buffer; |
| 604 | }; | 684 | }; |
| 605 | 685 | ||
| 686 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) | ||
| 687 | #define PAGEMAP_WALK_MASK (PMD_MASK) | ||
| 688 | |||
| 606 | #define PM_ENTRY_BYTES sizeof(u64) | 689 | #define PM_ENTRY_BYTES sizeof(u64) |
| 607 | #define PM_STATUS_BITS 3 | 690 | #define PM_STATUS_BITS 3 |
| 608 | #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) | 691 | #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) |
| @@ -620,10 +703,15 @@ struct pagemapread { | |||
| 620 | #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) | 703 | #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) |
| 621 | #define PM_END_OF_BUFFER 1 | 704 | #define PM_END_OF_BUFFER 1 |
| 622 | 705 | ||
| 623 | static int add_to_pagemap(unsigned long addr, u64 pfn, | 706 | static inline pagemap_entry_t make_pme(u64 val) |
| 707 | { | ||
| 708 | return (pagemap_entry_t) { .pme = val }; | ||
| 709 | } | ||
| 710 | |||
| 711 | static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme, | ||
| 624 | struct pagemapread *pm) | 712 | struct pagemapread *pm) |
| 625 | { | 713 | { |
| 626 | pm->buffer[pm->pos++] = pfn; | 714 | pm->buffer[pm->pos++] = *pme; |
| 627 | if (pm->pos >= pm->len) | 715 | if (pm->pos >= pm->len) |
| 628 | return PM_END_OF_BUFFER; | 716 | return PM_END_OF_BUFFER; |
| 629 | return 0; | 717 | return 0; |
| @@ -635,8 +723,10 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, | |||
| 635 | struct pagemapread *pm = walk->private; | 723 | struct pagemapread *pm = walk->private; |
| 636 | unsigned long addr; | 724 | unsigned long addr; |
| 637 | int err = 0; | 725 | int err = 0; |
| 726 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); | ||
| 727 | |||
| 638 | for (addr = start; addr < end; addr += PAGE_SIZE) { | 728 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
| 639 | err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); | 729 | err = add_to_pagemap(addr, &pme, pm); |
| 640 | if (err) | 730 | if (err) |
| 641 | break; | 731 | break; |
| 642 | } | 732 | } |
| @@ -649,18 +739,40 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte) | |||
| 649 | return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); | 739 | return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); |
| 650 | } | 740 | } |
| 651 | 741 | ||
| 652 | static u64 pte_to_pagemap_entry(pte_t pte) | 742 | static void pte_to_pagemap_entry(pagemap_entry_t *pme, pte_t pte) |
| 653 | { | 743 | { |
| 654 | u64 pme = 0; | ||
| 655 | if (is_swap_pte(pte)) | 744 | if (is_swap_pte(pte)) |
| 656 | pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) | 745 | *pme = make_pme(PM_PFRAME(swap_pte_to_pagemap_entry(pte)) |
| 657 | | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; | 746 | | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP); |
| 658 | else if (pte_present(pte)) | 747 | else if (pte_present(pte)) |
| 659 | pme = PM_PFRAME(pte_pfn(pte)) | 748 | *pme = make_pme(PM_PFRAME(pte_pfn(pte)) |
| 660 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; | 749 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); |
| 661 | return pme; | 750 | else |
| 751 | *pme = make_pme(PM_NOT_PRESENT); | ||
| 662 | } | 752 | } |
| 663 | 753 | ||
| 754 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
| 755 | static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | ||
| 756 | pmd_t pmd, int offset) | ||
| 757 | { | ||
| 758 | /* | ||
| 759 | * Currently pmd for thp is always present because thp can not be | ||
| 760 | * swapped-out, migrated, or HWPOISONed (split in such cases instead.) | ||
| 761 | * This if-check is just to prepare for future implementation. | ||
| 762 | */ | ||
| 763 | if (pmd_present(pmd)) | ||
| 764 | *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) | ||
| 765 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); | ||
| 766 | else | ||
| 767 | *pme = make_pme(PM_NOT_PRESENT); | ||
| 768 | } | ||
| 769 | #else | ||
| 770 | static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | ||
| 771 | pmd_t pmd, int offset) | ||
| 772 | { | ||
| 773 | } | ||
| 774 | #endif | ||
| 775 | |||
| 664 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 776 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
| 665 | struct mm_walk *walk) | 777 | struct mm_walk *walk) |
| 666 | { | 778 | { |
| @@ -668,29 +780,46 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
| 668 | struct pagemapread *pm = walk->private; | 780 | struct pagemapread *pm = walk->private; |
| 669 | pte_t *pte; | 781 | pte_t *pte; |
| 670 | int err = 0; | 782 | int err = 0; |
| 671 | 783 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); | |
| 672 | split_huge_page_pmd(walk->mm, pmd); | ||
| 673 | 784 | ||
| 674 | /* find the first VMA at or above 'addr' */ | 785 | /* find the first VMA at or above 'addr' */ |
| 675 | vma = find_vma(walk->mm, addr); | 786 | vma = find_vma(walk->mm, addr); |
| 787 | if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { | ||
| 788 | for (; addr != end; addr += PAGE_SIZE) { | ||
| 789 | unsigned long offset; | ||
| 790 | |||
| 791 | offset = (addr & ~PAGEMAP_WALK_MASK) >> | ||
| 792 | PAGE_SHIFT; | ||
| 793 | thp_pmd_to_pagemap_entry(&pme, *pmd, offset); | ||
| 794 | err = add_to_pagemap(addr, &pme, pm); | ||
| 795 | if (err) | ||
| 796 | break; | ||
| 797 | } | ||
| 798 | spin_unlock(&walk->mm->page_table_lock); | ||
| 799 | return err; | ||
| 800 | } | ||
| 801 | |||
| 802 | if (pmd_trans_unstable(pmd)) | ||
| 803 | return 0; | ||
| 676 | for (; addr != end; addr += PAGE_SIZE) { | 804 | for (; addr != end; addr += PAGE_SIZE) { |
| 677 | u64 pfn = PM_NOT_PRESENT; | ||
| 678 | 805 | ||
| 679 | /* check to see if we've left 'vma' behind | 806 | /* check to see if we've left 'vma' behind |
| 680 | * and need a new, higher one */ | 807 | * and need a new, higher one */ |
| 681 | if (vma && (addr >= vma->vm_end)) | 808 | if (vma && (addr >= vma->vm_end)) { |
| 682 | vma = find_vma(walk->mm, addr); | 809 | vma = find_vma(walk->mm, addr); |
| 810 | pme = make_pme(PM_NOT_PRESENT); | ||
| 811 | } | ||
| 683 | 812 | ||
| 684 | /* check that 'vma' actually covers this address, | 813 | /* check that 'vma' actually covers this address, |
| 685 | * and that it isn't a huge page vma */ | 814 | * and that it isn't a huge page vma */ |
| 686 | if (vma && (vma->vm_start <= addr) && | 815 | if (vma && (vma->vm_start <= addr) && |
| 687 | !is_vm_hugetlb_page(vma)) { | 816 | !is_vm_hugetlb_page(vma)) { |
| 688 | pte = pte_offset_map(pmd, addr); | 817 | pte = pte_offset_map(pmd, addr); |
| 689 | pfn = pte_to_pagemap_entry(*pte); | 818 | pte_to_pagemap_entry(&pme, *pte); |
| 690 | /* unmap before userspace copy */ | 819 | /* unmap before userspace copy */ |
| 691 | pte_unmap(pte); | 820 | pte_unmap(pte); |
| 692 | } | 821 | } |
| 693 | err = add_to_pagemap(addr, pfn, pm); | 822 | err = add_to_pagemap(addr, &pme, pm); |
| 694 | if (err) | 823 | if (err) |
| 695 | return err; | 824 | return err; |
| 696 | } | 825 | } |
| @@ -701,13 +830,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
| 701 | } | 830 | } |
| 702 | 831 | ||
| 703 | #ifdef CONFIG_HUGETLB_PAGE | 832 | #ifdef CONFIG_HUGETLB_PAGE |
| 704 | static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) | 833 | static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, |
| 834 | pte_t pte, int offset) | ||
| 705 | { | 835 | { |
| 706 | u64 pme = 0; | ||
| 707 | if (pte_present(pte)) | 836 | if (pte_present(pte)) |
| 708 | pme = PM_PFRAME(pte_pfn(pte) + offset) | 837 | *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) |
| 709 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; | 838 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); |
| 710 | return pme; | 839 | else |
| 840 | *pme = make_pme(PM_NOT_PRESENT); | ||
| 711 | } | 841 | } |
| 712 | 842 | ||
| 713 | /* This function walks within one hugetlb entry in the single call */ | 843 | /* This function walks within one hugetlb entry in the single call */ |
| @@ -717,12 +847,12 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
| 717 | { | 847 | { |
| 718 | struct pagemapread *pm = walk->private; | 848 | struct pagemapread *pm = walk->private; |
| 719 | int err = 0; | 849 | int err = 0; |
| 720 | u64 pfn; | 850 | pagemap_entry_t pme; |
| 721 | 851 | ||
| 722 | for (; addr != end; addr += PAGE_SIZE) { | 852 | for (; addr != end; addr += PAGE_SIZE) { |
| 723 | int offset = (addr & ~hmask) >> PAGE_SHIFT; | 853 | int offset = (addr & ~hmask) >> PAGE_SHIFT; |
| 724 | pfn = huge_pte_to_pagemap_entry(*pte, offset); | 854 | huge_pte_to_pagemap_entry(&pme, *pte, offset); |
| 725 | err = add_to_pagemap(addr, pfn, pm); | 855 | err = add_to_pagemap(addr, &pme, pm); |
| 726 | if (err) | 856 | if (err) |
| 727 | return err; | 857 | return err; |
| 728 | } | 858 | } |
| @@ -757,8 +887,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
| 757 | * determine which areas of memory are actually mapped and llseek to | 887 | * determine which areas of memory are actually mapped and llseek to |
| 758 | * skip over unmapped regions. | 888 | * skip over unmapped regions. |
| 759 | */ | 889 | */ |
| 760 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) | ||
| 761 | #define PAGEMAP_WALK_MASK (PMD_MASK) | ||
| 762 | static ssize_t pagemap_read(struct file *file, char __user *buf, | 890 | static ssize_t pagemap_read(struct file *file, char __user *buf, |
| 763 | size_t count, loff_t *ppos) | 891 | size_t count, loff_t *ppos) |
| 764 | { | 892 | { |
| @@ -941,26 +1069,21 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | |||
| 941 | pte_t *pte; | 1069 | pte_t *pte; |
| 942 | 1070 | ||
| 943 | md = walk->private; | 1071 | md = walk->private; |
| 944 | spin_lock(&walk->mm->page_table_lock); | 1072 | |
| 945 | if (pmd_trans_huge(*pmd)) { | 1073 | if (pmd_trans_huge_lock(pmd, md->vma) == 1) { |
| 946 | if (pmd_trans_splitting(*pmd)) { | 1074 | pte_t huge_pte = *(pte_t *)pmd; |
| 947 | spin_unlock(&walk->mm->page_table_lock); | 1075 | struct page *page; |
| 948 | wait_split_huge_page(md->vma->anon_vma, pmd); | 1076 | |
| 949 | } else { | 1077 | page = can_gather_numa_stats(huge_pte, md->vma, addr); |
| 950 | pte_t huge_pte = *(pte_t *)pmd; | 1078 | if (page) |
| 951 | struct page *page; | 1079 | gather_stats(page, md, pte_dirty(huge_pte), |
| 952 | 1080 | HPAGE_PMD_SIZE/PAGE_SIZE); | |
| 953 | page = can_gather_numa_stats(huge_pte, md->vma, addr); | ||
| 954 | if (page) | ||
| 955 | gather_stats(page, md, pte_dirty(huge_pte), | ||
| 956 | HPAGE_PMD_SIZE/PAGE_SIZE); | ||
| 957 | spin_unlock(&walk->mm->page_table_lock); | ||
| 958 | return 0; | ||
| 959 | } | ||
| 960 | } else { | ||
| 961 | spin_unlock(&walk->mm->page_table_lock); | 1081 | spin_unlock(&walk->mm->page_table_lock); |
| 1082 | return 0; | ||
| 962 | } | 1083 | } |
| 963 | 1084 | ||
| 1085 | if (pmd_trans_unstable(pmd)) | ||
| 1086 | return 0; | ||
| 964 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); | 1087 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); |
| 965 | do { | 1088 | do { |
| 966 | struct page *page = can_gather_numa_stats(*pte, md->vma, addr); | 1089 | struct page *page = can_gather_numa_stats(*pte, md->vma, addr); |
| @@ -1002,7 +1125,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | |||
| 1002 | /* | 1125 | /* |
| 1003 | * Display pages allocated per node and memory policy via /proc. | 1126 | * Display pages allocated per node and memory policy via /proc. |
| 1004 | */ | 1127 | */ |
| 1005 | static int show_numa_map(struct seq_file *m, void *v) | 1128 | static int show_numa_map(struct seq_file *m, void *v, int is_pid) |
| 1006 | { | 1129 | { |
| 1007 | struct numa_maps_private *numa_priv = m->private; | 1130 | struct numa_maps_private *numa_priv = m->private; |
| 1008 | struct proc_maps_private *proc_priv = &numa_priv->proc_maps; | 1131 | struct proc_maps_private *proc_priv = &numa_priv->proc_maps; |
| @@ -1039,9 +1162,19 @@ static int show_numa_map(struct seq_file *m, void *v) | |||
| 1039 | seq_path(m, &file->f_path, "\n\t= "); | 1162 | seq_path(m, &file->f_path, "\n\t= "); |
| 1040 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { | 1163 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { |
| 1041 | seq_printf(m, " heap"); | 1164 | seq_printf(m, " heap"); |
| 1042 | } else if (vma->vm_start <= mm->start_stack && | 1165 | } else { |
| 1043 | vma->vm_end >= mm->start_stack) { | 1166 | pid_t tid = vm_is_stack(proc_priv->task, vma, is_pid); |
| 1044 | seq_printf(m, " stack"); | 1167 | if (tid != 0) { |
| 1168 | /* | ||
| 1169 | * Thread stack in /proc/PID/task/TID/maps or | ||
| 1170 | * the main process stack. | ||
| 1171 | */ | ||
| 1172 | if (!is_pid || (vma->vm_start <= mm->start_stack && | ||
| 1173 | vma->vm_end >= mm->start_stack)) | ||
| 1174 | seq_printf(m, " stack"); | ||
| 1175 | else | ||
| 1176 | seq_printf(m, " stack:%d", tid); | ||
| 1177 | } | ||
| 1045 | } | 1178 | } |
| 1046 | 1179 | ||
| 1047 | if (is_vm_hugetlb_page(vma)) | 1180 | if (is_vm_hugetlb_page(vma)) |
| @@ -1084,21 +1217,39 @@ out: | |||
| 1084 | return 0; | 1217 | return 0; |
| 1085 | } | 1218 | } |
| 1086 | 1219 | ||
| 1220 | static int show_pid_numa_map(struct seq_file *m, void *v) | ||
| 1221 | { | ||
| 1222 | return show_numa_map(m, v, 1); | ||
| 1223 | } | ||
| 1224 | |||
| 1225 | static int show_tid_numa_map(struct seq_file *m, void *v) | ||
| 1226 | { | ||
| 1227 | return show_numa_map(m, v, 0); | ||
| 1228 | } | ||
| 1229 | |||
| 1087 | static const struct seq_operations proc_pid_numa_maps_op = { | 1230 | static const struct seq_operations proc_pid_numa_maps_op = { |
| 1088 | .start = m_start, | 1231 | .start = m_start, |
| 1089 | .next = m_next, | 1232 | .next = m_next, |
| 1090 | .stop = m_stop, | 1233 | .stop = m_stop, |
| 1091 | .show = show_numa_map, | 1234 | .show = show_pid_numa_map, |
| 1235 | }; | ||
| 1236 | |||
| 1237 | static const struct seq_operations proc_tid_numa_maps_op = { | ||
| 1238 | .start = m_start, | ||
| 1239 | .next = m_next, | ||
| 1240 | .stop = m_stop, | ||
| 1241 | .show = show_tid_numa_map, | ||
| 1092 | }; | 1242 | }; |
| 1093 | 1243 | ||
| 1094 | static int numa_maps_open(struct inode *inode, struct file *file) | 1244 | static int numa_maps_open(struct inode *inode, struct file *file, |
| 1245 | const struct seq_operations *ops) | ||
| 1095 | { | 1246 | { |
| 1096 | struct numa_maps_private *priv; | 1247 | struct numa_maps_private *priv; |
| 1097 | int ret = -ENOMEM; | 1248 | int ret = -ENOMEM; |
| 1098 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | 1249 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); |
| 1099 | if (priv) { | 1250 | if (priv) { |
| 1100 | priv->proc_maps.pid = proc_pid(inode); | 1251 | priv->proc_maps.pid = proc_pid(inode); |
| 1101 | ret = seq_open(file, &proc_pid_numa_maps_op); | 1252 | ret = seq_open(file, ops); |
| 1102 | if (!ret) { | 1253 | if (!ret) { |
| 1103 | struct seq_file *m = file->private_data; | 1254 | struct seq_file *m = file->private_data; |
| 1104 | m->private = priv; | 1255 | m->private = priv; |
| @@ -1109,8 +1260,25 @@ static int numa_maps_open(struct inode *inode, struct file *file) | |||
| 1109 | return ret; | 1260 | return ret; |
| 1110 | } | 1261 | } |
| 1111 | 1262 | ||
| 1112 | const struct file_operations proc_numa_maps_operations = { | 1263 | static int pid_numa_maps_open(struct inode *inode, struct file *file) |
| 1113 | .open = numa_maps_open, | 1264 | { |
| 1265 | return numa_maps_open(inode, file, &proc_pid_numa_maps_op); | ||
| 1266 | } | ||
| 1267 | |||
| 1268 | static int tid_numa_maps_open(struct inode *inode, struct file *file) | ||
| 1269 | { | ||
| 1270 | return numa_maps_open(inode, file, &proc_tid_numa_maps_op); | ||
| 1271 | } | ||
| 1272 | |||
| 1273 | const struct file_operations proc_pid_numa_maps_operations = { | ||
| 1274 | .open = pid_numa_maps_open, | ||
| 1275 | .read = seq_read, | ||
| 1276 | .llseek = seq_lseek, | ||
| 1277 | .release = seq_release_private, | ||
| 1278 | }; | ||
| 1279 | |||
| 1280 | const struct file_operations proc_tid_numa_maps_operations = { | ||
| 1281 | .open = tid_numa_maps_open, | ||
| 1114 | .read = seq_read, | 1282 | .read = seq_read, |
| 1115 | .llseek = seq_lseek, | 1283 | .llseek = seq_lseek, |
| 1116 | .release = seq_release_private, | 1284 | .release = seq_release_private, |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 980de547c070..74fe164d1b23 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
| @@ -134,9 +134,11 @@ static void pad_len_spaces(struct seq_file *m, int len) | |||
| 134 | /* | 134 | /* |
| 135 | * display a single VMA to a sequenced file | 135 | * display a single VMA to a sequenced file |
| 136 | */ | 136 | */ |
| 137 | static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) | 137 | static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, |
| 138 | int is_pid) | ||
| 138 | { | 139 | { |
| 139 | struct mm_struct *mm = vma->vm_mm; | 140 | struct mm_struct *mm = vma->vm_mm; |
| 141 | struct proc_maps_private *priv = m->private; | ||
| 140 | unsigned long ino = 0; | 142 | unsigned long ino = 0; |
| 141 | struct file *file; | 143 | struct file *file; |
| 142 | dev_t dev = 0; | 144 | dev_t dev = 0; |
| @@ -168,10 +170,19 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) | |||
| 168 | pad_len_spaces(m, len); | 170 | pad_len_spaces(m, len); |
| 169 | seq_path(m, &file->f_path, ""); | 171 | seq_path(m, &file->f_path, ""); |
| 170 | } else if (mm) { | 172 | } else if (mm) { |
| 171 | if (vma->vm_start <= mm->start_stack && | 173 | pid_t tid = vm_is_stack(priv->task, vma, is_pid); |
| 172 | vma->vm_end >= mm->start_stack) { | 174 | |
| 175 | if (tid != 0) { | ||
| 173 | pad_len_spaces(m, len); | 176 | pad_len_spaces(m, len); |
| 174 | seq_puts(m, "[stack]"); | 177 | /* |
| 178 | * Thread stack in /proc/PID/task/TID/maps or | ||
| 179 | * the main process stack. | ||
| 180 | */ | ||
| 181 | if (!is_pid || (vma->vm_start <= mm->start_stack && | ||
| 182 | vma->vm_end >= mm->start_stack)) | ||
| 183 | seq_printf(m, "[stack]"); | ||
| 184 | else | ||
| 185 | seq_printf(m, "[stack:%d]", tid); | ||
| 175 | } | 186 | } |
| 176 | } | 187 | } |
| 177 | 188 | ||
| @@ -182,11 +193,22 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) | |||
| 182 | /* | 193 | /* |
| 183 | * display mapping lines for a particular process's /proc/pid/maps | 194 | * display mapping lines for a particular process's /proc/pid/maps |
| 184 | */ | 195 | */ |
| 185 | static int show_map(struct seq_file *m, void *_p) | 196 | static int show_map(struct seq_file *m, void *_p, int is_pid) |
| 186 | { | 197 | { |
| 187 | struct rb_node *p = _p; | 198 | struct rb_node *p = _p; |
| 188 | 199 | ||
| 189 | return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb)); | 200 | return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb), |
| 201 | is_pid); | ||
| 202 | } | ||
| 203 | |||
| 204 | static int show_pid_map(struct seq_file *m, void *_p) | ||
| 205 | { | ||
| 206 | return show_map(m, _p, 1); | ||
| 207 | } | ||
| 208 | |||
| 209 | static int show_tid_map(struct seq_file *m, void *_p) | ||
| 210 | { | ||
| 211 | return show_map(m, _p, 0); | ||
| 190 | } | 212 | } |
| 191 | 213 | ||
| 192 | static void *m_start(struct seq_file *m, loff_t *pos) | 214 | static void *m_start(struct seq_file *m, loff_t *pos) |
| @@ -240,10 +262,18 @@ static const struct seq_operations proc_pid_maps_ops = { | |||
| 240 | .start = m_start, | 262 | .start = m_start, |
| 241 | .next = m_next, | 263 | .next = m_next, |
| 242 | .stop = m_stop, | 264 | .stop = m_stop, |
| 243 | .show = show_map | 265 | .show = show_pid_map |
| 266 | }; | ||
| 267 | |||
| 268 | static const struct seq_operations proc_tid_maps_ops = { | ||
| 269 | .start = m_start, | ||
| 270 | .next = m_next, | ||
| 271 | .stop = m_stop, | ||
| 272 | .show = show_tid_map | ||
| 244 | }; | 273 | }; |
| 245 | 274 | ||
| 246 | static int maps_open(struct inode *inode, struct file *file) | 275 | static int maps_open(struct inode *inode, struct file *file, |
| 276 | const struct seq_operations *ops) | ||
| 247 | { | 277 | { |
| 248 | struct proc_maps_private *priv; | 278 | struct proc_maps_private *priv; |
| 249 | int ret = -ENOMEM; | 279 | int ret = -ENOMEM; |
| @@ -251,7 +281,7 @@ static int maps_open(struct inode *inode, struct file *file) | |||
| 251 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | 281 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); |
| 252 | if (priv) { | 282 | if (priv) { |
| 253 | priv->pid = proc_pid(inode); | 283 | priv->pid = proc_pid(inode); |
| 254 | ret = seq_open(file, &proc_pid_maps_ops); | 284 | ret = seq_open(file, ops); |
| 255 | if (!ret) { | 285 | if (!ret) { |
| 256 | struct seq_file *m = file->private_data; | 286 | struct seq_file *m = file->private_data; |
| 257 | m->private = priv; | 287 | m->private = priv; |
| @@ -262,8 +292,25 @@ static int maps_open(struct inode *inode, struct file *file) | |||
| 262 | return ret; | 292 | return ret; |
| 263 | } | 293 | } |
| 264 | 294 | ||
| 265 | const struct file_operations proc_maps_operations = { | 295 | static int pid_maps_open(struct inode *inode, struct file *file) |
| 266 | .open = maps_open, | 296 | { |
| 297 | return maps_open(inode, file, &proc_pid_maps_ops); | ||
| 298 | } | ||
| 299 | |||
| 300 | static int tid_maps_open(struct inode *inode, struct file *file) | ||
| 301 | { | ||
| 302 | return maps_open(inode, file, &proc_tid_maps_ops); | ||
| 303 | } | ||
| 304 | |||
| 305 | const struct file_operations proc_pid_maps_operations = { | ||
| 306 | .open = pid_maps_open, | ||
| 307 | .read = seq_read, | ||
| 308 | .llseek = seq_lseek, | ||
| 309 | .release = seq_release_private, | ||
| 310 | }; | ||
| 311 | |||
| 312 | const struct file_operations proc_tid_maps_operations = { | ||
| 313 | .open = tid_maps_open, | ||
| 267 | .read = seq_read, | 314 | .read = seq_read, |
| 268 | .llseek = seq_lseek, | 315 | .llseek = seq_lseek, |
| 269 | .release = seq_release_private, | 316 | .release = seq_release_private, |
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index b0f450a2bb7c..0d5071d29985 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
| @@ -700,3 +700,26 @@ static int __init vmcore_init(void) | |||
| 700 | return 0; | 700 | return 0; |
| 701 | } | 701 | } |
| 702 | module_init(vmcore_init) | 702 | module_init(vmcore_init) |
| 703 | |||
| 704 | /* Cleanup function for vmcore module. */ | ||
| 705 | void vmcore_cleanup(void) | ||
| 706 | { | ||
| 707 | struct list_head *pos, *next; | ||
| 708 | |||
| 709 | if (proc_vmcore) { | ||
| 710 | remove_proc_entry(proc_vmcore->name, proc_vmcore->parent); | ||
| 711 | proc_vmcore = NULL; | ||
| 712 | } | ||
| 713 | |||
| 714 | /* clear the vmcore list. */ | ||
| 715 | list_for_each_safe(pos, next, &vmcore_list) { | ||
| 716 | struct vmcore *m; | ||
| 717 | |||
| 718 | m = list_entry(pos, struct vmcore, list); | ||
| 719 | list_del(&m->list); | ||
| 720 | kfree(m); | ||
| 721 | } | ||
| 722 | kfree(elfcorebuf); | ||
| 723 | elfcorebuf = NULL; | ||
| 724 | } | ||
| 725 | EXPORT_SYMBOL_GPL(vmcore_cleanup); | ||
