diff options
author | Stephen Wilson <wilsons@start.ca> | 2011-05-24 20:12:49 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-25 11:39:35 -0400 |
commit | 5b52fc890bece77bffb9fade69239f71384ef02b (patch) | |
tree | 52848297984dba4c0f4106c5303a1a4bf0db92b0 | |
parent | f2beb7983613ecca20a61604f01ab50cc7a797e6 (diff) |
proc: allocate storage for numa_maps statistics once
In show_numa_map() we collect statistics into a numa_maps structure.
Since the number of NUMA nodes can be very large, this structure is not a
candidate for stack allocation.
Instead of going thru a kmalloc()+kfree() cycle each time show_numa_map()
is invoked, perform the allocation just once when /proc/pid/numa_maps is
opened.
Performing the allocation when numa_maps is opened, and thus before a
reference to the target tasks mm is taken, eliminates a potential
stalemate condition in the oom-killer as originally described by Hugh
Dickins:
... imagine what happens if the system is out of memory, and the mm
we're looking at is selected for killing by the OOM killer: while
we wait in __get_free_page for more memory, no memory is freed
from the selected mm because it cannot reach exit_mmap while we hold
that reference.
Signed-off-by: Stephen Wilson <wilsons@start.ca>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/proc/task_mmu.c | 36 |
1 files changed, 27 insertions, 9 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 2ed53d18b2ef..2c9db29ea358 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -871,6 +871,11 @@ struct numa_maps { | |||
871 | unsigned long node[MAX_NUMNODES]; | 871 | unsigned long node[MAX_NUMNODES]; |
872 | }; | 872 | }; |
873 | 873 | ||
874 | struct numa_maps_private { | ||
875 | struct proc_maps_private proc_maps; | ||
876 | struct numa_maps md; | ||
877 | }; | ||
878 | |||
874 | static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) | 879 | static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) |
875 | { | 880 | { |
876 | int count = page_mapcount(page); | 881 | int count = page_mapcount(page); |
@@ -963,9 +968,10 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | |||
963 | */ | 968 | */ |
964 | static int show_numa_map(struct seq_file *m, void *v) | 969 | static int show_numa_map(struct seq_file *m, void *v) |
965 | { | 970 | { |
966 | struct proc_maps_private *priv = m->private; | 971 | struct numa_maps_private *numa_priv = m->private; |
972 | struct proc_maps_private *proc_priv = &numa_priv->proc_maps; | ||
967 | struct vm_area_struct *vma = v; | 973 | struct vm_area_struct *vma = v; |
968 | struct numa_maps *md; | 974 | struct numa_maps *md = &numa_priv->md; |
969 | struct file *file = vma->vm_file; | 975 | struct file *file = vma->vm_file; |
970 | struct mm_struct *mm = vma->vm_mm; | 976 | struct mm_struct *mm = vma->vm_mm; |
971 | struct mm_walk walk = {}; | 977 | struct mm_walk walk = {}; |
@@ -976,9 +982,8 @@ static int show_numa_map(struct seq_file *m, void *v) | |||
976 | if (!mm) | 982 | if (!mm) |
977 | return 0; | 983 | return 0; |
978 | 984 | ||
979 | md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL); | 985 | /* Ensure we start with an empty set of numa_maps statistics. */ |
980 | if (!md) | 986 | memset(md, 0, sizeof(*md)); |
981 | return 0; | ||
982 | 987 | ||
983 | md->vma = vma; | 988 | md->vma = vma; |
984 | 989 | ||
@@ -987,7 +992,7 @@ static int show_numa_map(struct seq_file *m, void *v) | |||
987 | walk.private = md; | 992 | walk.private = md; |
988 | walk.mm = mm; | 993 | walk.mm = mm; |
989 | 994 | ||
990 | pol = get_vma_policy(priv->task, vma, vma->vm_start); | 995 | pol = get_vma_policy(proc_priv->task, vma, vma->vm_start); |
991 | mpol_to_str(buffer, sizeof(buffer), pol, 0); | 996 | mpol_to_str(buffer, sizeof(buffer), pol, 0); |
992 | mpol_cond_put(pol); | 997 | mpol_cond_put(pol); |
993 | 998 | ||
@@ -1034,12 +1039,12 @@ static int show_numa_map(struct seq_file *m, void *v) | |||
1034 | seq_printf(m, " N%d=%lu", n, md->node[n]); | 1039 | seq_printf(m, " N%d=%lu", n, md->node[n]); |
1035 | out: | 1040 | out: |
1036 | seq_putc(m, '\n'); | 1041 | seq_putc(m, '\n'); |
1037 | kfree(md); | ||
1038 | 1042 | ||
1039 | if (m->count < m->size) | 1043 | if (m->count < m->size) |
1040 | m->version = (vma != priv->tail_vma) ? vma->vm_start : 0; | 1044 | m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0; |
1041 | return 0; | 1045 | return 0; |
1042 | } | 1046 | } |
1047 | |||
1043 | static const struct seq_operations proc_pid_numa_maps_op = { | 1048 | static const struct seq_operations proc_pid_numa_maps_op = { |
1044 | .start = m_start, | 1049 | .start = m_start, |
1045 | .next = m_next, | 1050 | .next = m_next, |
@@ -1049,7 +1054,20 @@ static const struct seq_operations proc_pid_numa_maps_op = { | |||
1049 | 1054 | ||
1050 | static int numa_maps_open(struct inode *inode, struct file *file) | 1055 | static int numa_maps_open(struct inode *inode, struct file *file) |
1051 | { | 1056 | { |
1052 | return do_maps_open(inode, file, &proc_pid_numa_maps_op); | 1057 | struct numa_maps_private *priv; |
1058 | int ret = -ENOMEM; | ||
1059 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
1060 | if (priv) { | ||
1061 | priv->proc_maps.pid = proc_pid(inode); | ||
1062 | ret = seq_open(file, &proc_pid_numa_maps_op); | ||
1063 | if (!ret) { | ||
1064 | struct seq_file *m = file->private_data; | ||
1065 | m->private = priv; | ||
1066 | } else { | ||
1067 | kfree(priv); | ||
1068 | } | ||
1069 | } | ||
1070 | return ret; | ||
1053 | } | 1071 | } |
1054 | 1072 | ||
1055 | const struct file_operations proc_numa_maps_operations = { | 1073 | const struct file_operations proc_numa_maps_operations = { |