aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/Kconfig12
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c89
-rw-r--r--fs/proc/generic.c3
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/meminfo.c9
-rw-r--r--fs/proc/proc_net.c4
-rw-r--r--fs/proc/proc_sysctl.c4
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/proc/stat.c4
-rw-r--r--fs/proc/task_mmu.c83
11 files changed, 162 insertions, 52 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 15af6222f8a..ddb83a0e15e 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -67,3 +67,15 @@ config PROC_PAGE_MONITOR
67 /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, 67 /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap,
68 /proc/kpagecount, and /proc/kpageflags. Disabling these 68 /proc/kpagecount, and /proc/kpageflags. Disabling these
69 interfaces will reduce the size of the kernel by approximately 4kb. 69 interfaces will reduce the size of the kernel by approximately 4kb.
70
71config REPORT_PRESENT_CPUS
72 default n
73 depends on PROC_FS && SMP
74 bool "Report present cpus instead of online cpus"
75 help
76 This is a work around to report Present CPUs instead of Online CPUs.
77 Some power savings implements use CPU hotplug for power domains.
78 It is a bug to enable this on a server or other architecture that
79 uses cpu hotplug in the correct way.
80
81
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 9b45ee84fbc..3a1dafd228d 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -172,7 +172,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
172 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; 172 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
173 tpid = 0; 173 tpid = 0;
174 if (pid_alive(p)) { 174 if (pid_alive(p)) {
175 struct task_struct *tracer = tracehook_tracer_task(p); 175 struct task_struct *tracer = ptrace_parent(p);
176 if (tracer) 176 if (tracer)
177 tpid = task_pid_nr_ns(tracer, ns); 177 tpid = task_pid_nr_ns(tracer, ns);
178 } 178 }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index fc5bc276769..fc03d161a1d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -133,6 +133,12 @@ struct pid_entry {
133 NULL, &proc_single_file_operations, \ 133 NULL, &proc_single_file_operations, \
134 { .proc_show = show } ) 134 { .proc_show = show } )
135 135
136/* ANDROID is for special files in /proc. */
137#define ANDROID(NAME, MODE, OTYPE) \
138 NOD(NAME, (S_IFREG|(MODE)), \
139 &proc_##OTYPE##_inode_operations, \
140 &proc_##OTYPE##_operations, {})
141
136/* 142/*
137 * Count the number of hardlinks for the pid_entry table, excluding the . 143 * Count the number of hardlinks for the pid_entry table, excluding the .
138 * and .. links. 144 * and .. links.
@@ -216,7 +222,7 @@ static struct mm_struct *__check_mem_permission(struct task_struct *task)
216 if (task_is_stopped_or_traced(task)) { 222 if (task_is_stopped_or_traced(task)) {
217 int match; 223 int match;
218 rcu_read_lock(); 224 rcu_read_lock();
219 match = (tracehook_tracer_task(task) == current); 225 match = (ptrace_parent(task) == current);
220 rcu_read_unlock(); 226 rcu_read_unlock();
221 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) 227 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
222 return mm; 228 return mm;
@@ -263,7 +269,8 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
263 269
264 mm = get_task_mm(task); 270 mm = get_task_mm(task);
265 if (mm && mm != current->mm && 271 if (mm && mm != current->mm &&
266 !ptrace_may_access(task, PTRACE_MODE_READ)) { 272 !ptrace_may_access(task, PTRACE_MODE_READ) &&
273 !capable(CAP_SYS_RESOURCE)) {
267 mmput(mm); 274 mmput(mm);
268 mm = ERR_PTR(-EACCES); 275 mm = ERR_PTR(-EACCES);
269 } 276 }
@@ -673,7 +680,7 @@ static int mounts_open_common(struct inode *inode, struct file *file,
673 p->m.private = p; 680 p->m.private = p;
674 p->ns = ns; 681 p->ns = ns;
675 p->root = root; 682 p->root = root;
676 p->event = ns->event; 683 p->m.poll_event = ns->event;
677 684
678 return 0; 685 return 0;
679 686
@@ -884,6 +891,10 @@ out_no_task:
884 return ret; 891 return ret;
885} 892}
886 893
894#define mem_write NULL
895
896#ifndef mem_write
897/* This is a security hazard */
887static ssize_t mem_write(struct file * file, const char __user *buf, 898static ssize_t mem_write(struct file * file, const char __user *buf,
888 size_t count, loff_t *ppos) 899 size_t count, loff_t *ppos)
889{ 900{
@@ -942,6 +953,7 @@ out_task:
942out_no_task: 953out_no_task:
943 return copied; 954 return copied;
944} 955}
956#endif
945 957
946loff_t mem_lseek(struct file *file, loff_t offset, int orig) 958loff_t mem_lseek(struct file *file, loff_t offset, int orig)
947{ 959{
@@ -1118,10 +1130,9 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1118 * Warn that /proc/pid/oom_adj is deprecated, see 1130 * Warn that /proc/pid/oom_adj is deprecated, see
1119 * Documentation/feature-removal-schedule.txt. 1131 * Documentation/feature-removal-schedule.txt.
1120 */ 1132 */
1121 printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, " 1133 printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
1122 "please use /proc/%d/oom_score_adj instead.\n", 1134 current->comm, task_pid_nr(current), task_pid_nr(task),
1123 current->comm, task_pid_nr(current), 1135 task_pid_nr(task));
1124 task_pid_nr(task), task_pid_nr(task));
1125 task->signal->oom_adj = oom_adjust; 1136 task->signal->oom_adj = oom_adjust;
1126 /* 1137 /*
1127 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum 1138 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
@@ -1141,6 +1152,38 @@ out:
1141 return err < 0 ? err : count; 1152 return err < 0 ? err : count;
1142} 1153}
1143 1154
1155static int oom_adjust_permission(struct inode *inode, int mask)
1156{
1157 uid_t uid;
1158 struct task_struct *p;
1159
1160 if (mask & MAY_NOT_BLOCK)
1161 return -ECHILD;
1162
1163 p = get_proc_task(inode);
1164 if(p) {
1165 uid = task_uid(p);
1166 put_task_struct(p);
1167 }
1168
1169 /*
1170 * System Server (uid == 1000) is granted access to oom_adj of all
1171 * android applications (uid > 10000) as and services (uid >= 1000)
1172 */
1173 if (p && (current_fsuid() == 1000) && (uid >= 1000)) {
1174 if (inode->i_mode >> 6 & mask) {
1175 return 0;
1176 }
1177 }
1178
1179 /* Fall back to default. */
1180 return generic_permission(inode, mask);
1181}
1182
1183static const struct inode_operations proc_oom_adjust_inode_operations = {
1184 .permission = oom_adjust_permission,
1185};
1186
1144static const struct file_operations proc_oom_adjust_operations = { 1187static const struct file_operations proc_oom_adjust_operations = {
1145 .read = oom_adjust_read, 1188 .read = oom_adjust_read,
1146 .write = oom_adjust_write, 1189 .write = oom_adjust_write,
@@ -1920,6 +1963,14 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1920 spin_lock(&files->file_lock); 1963 spin_lock(&files->file_lock);
1921 file = fcheck_files(files, fd); 1964 file = fcheck_files(files, fd);
1922 if (file) { 1965 if (file) {
1966 unsigned int f_flags;
1967 struct fdtable *fdt;
1968
1969 fdt = files_fdtable(files);
1970 f_flags = file->f_flags & ~O_CLOEXEC;
1971 if (FD_ISSET(fd, fdt->close_on_exec))
1972 f_flags |= O_CLOEXEC;
1973
1923 if (path) { 1974 if (path) {
1924 *path = file->f_path; 1975 *path = file->f_path;
1925 path_get(&file->f_path); 1976 path_get(&file->f_path);
@@ -1929,7 +1980,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1929 "pos:\t%lli\n" 1980 "pos:\t%lli\n"
1930 "flags:\t0%o\n", 1981 "flags:\t0%o\n",
1931 (long long) file->f_pos, 1982 (long long) file->f_pos,
1932 file->f_flags); 1983 f_flags);
1933 spin_unlock(&files->file_lock); 1984 spin_unlock(&files->file_lock);
1934 put_files_struct(files); 1985 put_files_struct(files);
1935 return 0; 1986 return 0;
@@ -2167,9 +2218,9 @@ static const struct file_operations proc_fd_operations = {
2167 * /proc/pid/fd needs a special permission handler so that a process can still 2218 * /proc/pid/fd needs a special permission handler so that a process can still
2168 * access /proc/self/fd after it has executed a setuid(). 2219 * access /proc/self/fd after it has executed a setuid().
2169 */ 2220 */
2170static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags) 2221static int proc_fd_permission(struct inode *inode, int mask)
2171{ 2222{
2172 int rv = generic_permission(inode, mask, flags, NULL); 2223 int rv = generic_permission(inode, mask);
2173 if (rv == 0) 2224 if (rv == 0)
2174 return 0; 2225 return 0;
2175 if (task_pid(current) == proc_pid(inode)) 2226 if (task_pid(current) == proc_pid(inode))
@@ -2707,9 +2758,16 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2707{ 2758{
2708 struct task_io_accounting acct = task->ioac; 2759 struct task_io_accounting acct = task->ioac;
2709 unsigned long flags; 2760 unsigned long flags;
2761 int result;
2762
2763 result = mutex_lock_killable(&task->signal->cred_guard_mutex);
2764 if (result)
2765 return result;
2710 2766
2711 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 2767 if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
2712 return -EACCES; 2768 result = -EACCES;
2769 goto out_unlock;
2770 }
2713 2771
2714 if (whole && lock_task_sighand(task, &flags)) { 2772 if (whole && lock_task_sighand(task, &flags)) {
2715 struct task_struct *t = task; 2773 struct task_struct *t = task;
@@ -2720,7 +2778,7 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2720 2778
2721 unlock_task_sighand(task, &flags); 2779 unlock_task_sighand(task, &flags);
2722 } 2780 }
2723 return sprintf(buffer, 2781 result = sprintf(buffer,
2724 "rchar: %llu\n" 2782 "rchar: %llu\n"
2725 "wchar: %llu\n" 2783 "wchar: %llu\n"
2726 "syscr: %llu\n" 2784 "syscr: %llu\n"
@@ -2735,6 +2793,9 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2735 (unsigned long long)acct.read_bytes, 2793 (unsigned long long)acct.read_bytes,
2736 (unsigned long long)acct.write_bytes, 2794 (unsigned long long)acct.write_bytes,
2737 (unsigned long long)acct.cancelled_write_bytes); 2795 (unsigned long long)acct.cancelled_write_bytes);
2796out_unlock:
2797 mutex_unlock(&task->signal->cred_guard_mutex);
2798 return result;
2738} 2799}
2739 2800
2740static int proc_tid_io_accounting(struct task_struct *task, char *buffer) 2801static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
@@ -2829,7 +2890,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2829 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2890 REG("cgroup", S_IRUGO, proc_cgroup_operations),
2830#endif 2891#endif
2831 INF("oom_score", S_IRUGO, proc_oom_score), 2892 INF("oom_score", S_IRUGO, proc_oom_score),
2832 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), 2893 ANDROID("oom_adj",S_IRUGO|S_IWUSR, oom_adjust),
2833 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2894 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
2834#ifdef CONFIG_AUDITSYSCALL 2895#ifdef CONFIG_AUDITSYSCALL
2835 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 2896 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index f1637f17c37..9d99131d0d6 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -620,8 +620,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
620 if (!ent) goto out; 620 if (!ent) goto out;
621 621
622 memset(ent, 0, sizeof(struct proc_dir_entry)); 622 memset(ent, 0, sizeof(struct proc_dir_entry));
623 memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); 623 memcpy(ent->name, fn, len + 1);
624 ent->name = ((char *) ent) + sizeof(*ent);
625 ent->namelen = len; 624 ent->namelen = len;
626 ent->mode = mode; 625 ent->mode = mode;
627 ent->nlink = nlink; 626 ent->nlink = nlink;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 74b48cfa1bb..7ed72d6c1c6 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -319,7 +319,7 @@ static int proc_reg_open(struct inode *inode, struct file *file)
319 if (!pde->proc_fops) { 319 if (!pde->proc_fops) {
320 spin_unlock(&pde->pde_unload_lock); 320 spin_unlock(&pde->pde_unload_lock);
321 kfree(pdeo); 321 kfree(pdeo);
322 return -EINVAL; 322 return -ENOENT;
323 } 323 }
324 pde->pde_users++; 324 pde->pde_users++;
325 open = pde->proc_fops->open; 325 open = pde->proc_fops->open;
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index ed257d14156..80e4645f799 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -10,7 +10,7 @@
10#include <linux/seq_file.h> 10#include <linux/seq_file.h>
11#include <linux/swap.h> 11#include <linux/swap.h>
12#include <linux/vmstat.h> 12#include <linux/vmstat.h>
13#include <asm/atomic.h> 13#include <linux/atomic.h>
14#include <asm/page.h> 14#include <asm/page.h>
15#include <asm/pgtable.h> 15#include <asm/pgtable.h>
16#include "internal.h" 16#include "internal.h"
@@ -131,12 +131,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
131 K(i.freeswap), 131 K(i.freeswap),
132 K(global_page_state(NR_FILE_DIRTY)), 132 K(global_page_state(NR_FILE_DIRTY)),
133 K(global_page_state(NR_WRITEBACK)), 133 K(global_page_state(NR_WRITEBACK)),
134 K(global_page_state(NR_ANON_PAGES)
135#ifdef CONFIG_TRANSPARENT_HUGEPAGE 134#ifdef CONFIG_TRANSPARENT_HUGEPAGE
135 K(global_page_state(NR_ANON_PAGES)
136 + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * 136 + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
137 HPAGE_PMD_NR 137 HPAGE_PMD_NR),
138#else
139 K(global_page_state(NR_ANON_PAGES)),
138#endif 140#endif
139 ),
140 K(global_page_state(NR_FILE_MAPPED)), 141 K(global_page_state(NR_FILE_MAPPED)),
141 K(global_page_state(NR_SHMEM)), 142 K(global_page_state(NR_SHMEM)),
142 K(global_page_state(NR_SLAB_RECLAIMABLE) + 143 K(global_page_state(NR_SLAB_RECLAIMABLE) +
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 9020ac15baa..f738024ccc8 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -197,15 +197,15 @@ static __net_init int proc_net_ns_init(struct net *net)
197 int err; 197 int err;
198 198
199 err = -ENOMEM; 199 err = -ENOMEM;
200 netd = kzalloc(sizeof(*netd), GFP_KERNEL); 200 netd = kzalloc(sizeof(*netd) + 4, GFP_KERNEL);
201 if (!netd) 201 if (!netd)
202 goto out; 202 goto out;
203 203
204 netd->data = net; 204 netd->data = net;
205 netd->nlink = 2; 205 netd->nlink = 2;
206 netd->name = "net";
207 netd->namelen = 3; 206 netd->namelen = 3;
208 netd->parent = &proc_root; 207 netd->parent = &proc_root;
208 memcpy(netd->name, "net", 4);
209 209
210 err = -EEXIST; 210 err = -EEXIST;
211 net_statd = proc_net_mkdir(net, "stat", netd); 211 net_statd = proc_net_mkdir(net, "stat", netd);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index d167de365a8..1a77dbef226 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -294,7 +294,7 @@ out:
294 return ret; 294 return ret;
295} 295}
296 296
297static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags) 297static int proc_sys_permission(struct inode *inode, int mask)
298{ 298{
299 /* 299 /*
300 * sysctl entries that are not writeable, 300 * sysctl entries that are not writeable,
@@ -316,7 +316,7 @@ static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags)
316 if (!table) /* global root - r-xr-xr-x */ 316 if (!table) /* global root - r-xr-xr-x */
317 error = mask & MAY_WRITE ? -EACCES : 0; 317 error = mask & MAY_WRITE ? -EACCES : 0;
318 else /* Use the permissions on the sysctl table entry */ 318 else /* Use the permissions on the sysctl table entry */
319 error = sysctl_perm(head->root, table, mask); 319 error = sysctl_perm(head->root, table, mask & ~MAY_NOT_BLOCK);
320 320
321 sysctl_head_finish(head); 321 sysctl_head_finish(head);
322 return error; 322 return error;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index d6c3b416529..9a8a2b77b87 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -186,13 +186,13 @@ static const struct inode_operations proc_root_inode_operations = {
186struct proc_dir_entry proc_root = { 186struct proc_dir_entry proc_root = {
187 .low_ino = PROC_ROOT_INO, 187 .low_ino = PROC_ROOT_INO,
188 .namelen = 5, 188 .namelen = 5,
189 .name = "/proc",
190 .mode = S_IFDIR | S_IRUGO | S_IXUGO, 189 .mode = S_IFDIR | S_IRUGO | S_IXUGO,
191 .nlink = 2, 190 .nlink = 2,
192 .count = ATOMIC_INIT(1), 191 .count = ATOMIC_INIT(1),
193 .proc_iops = &proc_root_inode_operations, 192 .proc_iops = &proc_root_inode_operations,
194 .proc_fops = &proc_root_operations, 193 .proc_fops = &proc_root_operations,
195 .parent = &proc_root, 194 .parent = &proc_root,
195 .name = "/proc",
196}; 196};
197 197
198int pid_ns_prepare_proc(struct pid_namespace *ns) 198int pid_ns_prepare_proc(struct pid_namespace *ns)
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 9758b654a1b..4b758ad5c83 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -75,7 +75,11 @@ static int show_stat(struct seq_file *p, void *v)
75 (unsigned long long)cputime64_to_clock_t(steal), 75 (unsigned long long)cputime64_to_clock_t(steal),
76 (unsigned long long)cputime64_to_clock_t(guest), 76 (unsigned long long)cputime64_to_clock_t(guest),
77 (unsigned long long)cputime64_to_clock_t(guest_nice)); 77 (unsigned long long)cputime64_to_clock_t(guest_nice));
78#if defined(CONFIG_REPORT_PRESENT_CPUS)
79 for_each_present_cpu(i) {
80#else
78 for_each_online_cpu(i) { 81 for_each_online_cpu(i) {
82#endif
79 83
80 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ 84 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
81 user = kstat_cpu(i).cpustat.user; 85 user = kstat_cpu(i).cpustat.user;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 25b6a887adb..c7d4ee663f1 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -877,30 +877,54 @@ struct numa_maps_private {
877 struct numa_maps md; 877 struct numa_maps md;
878}; 878};
879 879
880static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) 880static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
881 unsigned long nr_pages)
881{ 882{
882 int count = page_mapcount(page); 883 int count = page_mapcount(page);
883 884
884 md->pages++; 885 md->pages += nr_pages;
885 if (pte_dirty || PageDirty(page)) 886 if (pte_dirty || PageDirty(page))
886 md->dirty++; 887 md->dirty += nr_pages;
887 888
888 if (PageSwapCache(page)) 889 if (PageSwapCache(page))
889 md->swapcache++; 890 md->swapcache += nr_pages;
890 891
891 if (PageActive(page) || PageUnevictable(page)) 892 if (PageActive(page) || PageUnevictable(page))
892 md->active++; 893 md->active += nr_pages;
893 894
894 if (PageWriteback(page)) 895 if (PageWriteback(page))
895 md->writeback++; 896 md->writeback += nr_pages;
896 897
897 if (PageAnon(page)) 898 if (PageAnon(page))
898 md->anon++; 899 md->anon += nr_pages;
899 900
900 if (count > md->mapcount_max) 901 if (count > md->mapcount_max)
901 md->mapcount_max = count; 902 md->mapcount_max = count;
902 903
903 md->node[page_to_nid(page)]++; 904 md->node[page_to_nid(page)] += nr_pages;
905}
906
907static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
908 unsigned long addr)
909{
910 struct page *page;
911 int nid;
912
913 if (!pte_present(pte))
914 return NULL;
915
916 page = vm_normal_page(vma, addr, pte);
917 if (!page)
918 return NULL;
919
920 if (PageReserved(page))
921 return NULL;
922
923 nid = page_to_nid(page);
924 if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
925 return NULL;
926
927 return page;
904} 928}
905 929
906static int gather_pte_stats(pmd_t *pmd, unsigned long addr, 930static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
@@ -912,26 +936,32 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
912 pte_t *pte; 936 pte_t *pte;
913 937
914 md = walk->private; 938 md = walk->private;
915 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); 939 spin_lock(&walk->mm->page_table_lock);
916 do { 940 if (pmd_trans_huge(*pmd)) {
917 struct page *page; 941 if (pmd_trans_splitting(*pmd)) {
918 int nid; 942 spin_unlock(&walk->mm->page_table_lock);
943 wait_split_huge_page(md->vma->anon_vma, pmd);
944 } else {
945 pte_t huge_pte = *(pte_t *)pmd;
946 struct page *page;
919 947
920 if (!pte_present(*pte)) 948 page = can_gather_numa_stats(huge_pte, md->vma, addr);
921 continue; 949 if (page)
950 gather_stats(page, md, pte_dirty(huge_pte),
951 HPAGE_PMD_SIZE/PAGE_SIZE);
952 spin_unlock(&walk->mm->page_table_lock);
953 return 0;
954 }
955 } else {
956 spin_unlock(&walk->mm->page_table_lock);
957 }
922 958
923 page = vm_normal_page(md->vma, addr, *pte); 959 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
960 do {
961 struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
924 if (!page) 962 if (!page)
925 continue; 963 continue;
926 964 gather_stats(page, md, pte_dirty(*pte), 1);
927 if (PageReserved(page))
928 continue;
929
930 nid = page_to_nid(page);
931 if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
932 continue;
933
934 gather_stats(page, md, pte_dirty(*pte));
935 965
936 } while (pte++, addr += PAGE_SIZE, addr != end); 966 } while (pte++, addr += PAGE_SIZE, addr != end);
937 pte_unmap_unlock(orig_pte, ptl); 967 pte_unmap_unlock(orig_pte, ptl);
@@ -952,7 +982,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
952 return 0; 982 return 0;
953 983
954 md = walk->private; 984 md = walk->private;
955 gather_stats(page, md, pte_dirty(*pte)); 985 gather_stats(page, md, pte_dirty(*pte), 1);
956 return 0; 986 return 0;
957} 987}
958 988
@@ -1009,6 +1039,9 @@ static int show_numa_map(struct seq_file *m, void *v)
1009 seq_printf(m, " stack"); 1039 seq_printf(m, " stack");
1010 } 1040 }
1011 1041
1042 if (is_vm_hugetlb_page(vma))
1043 seq_printf(m, " huge");
1044
1012 walk_page_range(vma->vm_start, vma->vm_end, &walk); 1045 walk_page_range(vma->vm_start, vma->vm_end, &walk);
1013 1046
1014 if (!md->pages) 1047 if (!md->pages)