aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Wilson <wilsons@start.ca>2011-05-24 20:12:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-25 11:39:35 -0400
commit5b52fc890bece77bffb9fade69239f71384ef02b (patch)
tree52848297984dba4c0f4106c5303a1a4bf0db92b0
parentf2beb7983613ecca20a61604f01ab50cc7a797e6 (diff)
proc: allocate storage for numa_maps statistics once
In show_numa_map() we collect statistics into a numa_maps structure. Since the number of NUMA nodes can be very large, this structure is not a candidate for stack allocation. Instead of going thru a kmalloc()+kfree() cycle each time show_numa_map() is invoked, perform the allocation just once when /proc/pid/numa_maps is opened. Performing the allocation when numa_maps is opened, and thus before a reference to the target tasks mm is taken, eliminates a potential stalemate condition in the oom-killer as originally described by Hugh Dickins: ... imagine what happens if the system is out of memory, and the mm we're looking at is selected for killing by the OOM killer: while we wait in __get_free_page for more memory, no memory is freed from the selected mm because it cannot reach exit_mmap while we hold that reference. Signed-off-by: Stephen Wilson <wilsons@start.ca> Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Hugh Dickins <hughd@google.com> Cc: David Rientjes <rientjes@google.com> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Alexey Dobriyan <adobriyan@gmail.com> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/proc/task_mmu.c36
1 files changed, 27 insertions, 9 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2ed53d18b2ef..2c9db29ea358 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -871,6 +871,11 @@ struct numa_maps {
871 unsigned long node[MAX_NUMNODES]; 871 unsigned long node[MAX_NUMNODES];
872}; 872};
873 873
874struct numa_maps_private {
875 struct proc_maps_private proc_maps;
876 struct numa_maps md;
877};
878
874static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) 879static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty)
875{ 880{
876 int count = page_mapcount(page); 881 int count = page_mapcount(page);
@@ -963,9 +968,10 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
963 */ 968 */
964static int show_numa_map(struct seq_file *m, void *v) 969static int show_numa_map(struct seq_file *m, void *v)
965{ 970{
966 struct proc_maps_private *priv = m->private; 971 struct numa_maps_private *numa_priv = m->private;
972 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
967 struct vm_area_struct *vma = v; 973 struct vm_area_struct *vma = v;
968 struct numa_maps *md; 974 struct numa_maps *md = &numa_priv->md;
969 struct file *file = vma->vm_file; 975 struct file *file = vma->vm_file;
970 struct mm_struct *mm = vma->vm_mm; 976 struct mm_struct *mm = vma->vm_mm;
971 struct mm_walk walk = {}; 977 struct mm_walk walk = {};
@@ -976,9 +982,8 @@ static int show_numa_map(struct seq_file *m, void *v)
976 if (!mm) 982 if (!mm)
977 return 0; 983 return 0;
978 984
979 md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL); 985 /* Ensure we start with an empty set of numa_maps statistics. */
980 if (!md) 986 memset(md, 0, sizeof(*md));
981 return 0;
982 987
983 md->vma = vma; 988 md->vma = vma;
984 989
@@ -987,7 +992,7 @@ static int show_numa_map(struct seq_file *m, void *v)
987 walk.private = md; 992 walk.private = md;
988 walk.mm = mm; 993 walk.mm = mm;
989 994
990 pol = get_vma_policy(priv->task, vma, vma->vm_start); 995 pol = get_vma_policy(proc_priv->task, vma, vma->vm_start);
991 mpol_to_str(buffer, sizeof(buffer), pol, 0); 996 mpol_to_str(buffer, sizeof(buffer), pol, 0);
992 mpol_cond_put(pol); 997 mpol_cond_put(pol);
993 998
@@ -1034,12 +1039,12 @@ static int show_numa_map(struct seq_file *m, void *v)
1034 seq_printf(m, " N%d=%lu", n, md->node[n]); 1039 seq_printf(m, " N%d=%lu", n, md->node[n]);
1035out: 1040out:
1036 seq_putc(m, '\n'); 1041 seq_putc(m, '\n');
1037 kfree(md);
1038 1042
1039 if (m->count < m->size) 1043 if (m->count < m->size)
1040 m->version = (vma != priv->tail_vma) ? vma->vm_start : 0; 1044 m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0;
1041 return 0; 1045 return 0;
1042} 1046}
1047
1043static const struct seq_operations proc_pid_numa_maps_op = { 1048static const struct seq_operations proc_pid_numa_maps_op = {
1044 .start = m_start, 1049 .start = m_start,
1045 .next = m_next, 1050 .next = m_next,
@@ -1049,7 +1054,20 @@ static const struct seq_operations proc_pid_numa_maps_op = {
1049 1054
1050static int numa_maps_open(struct inode *inode, struct file *file) 1055static int numa_maps_open(struct inode *inode, struct file *file)
1051{ 1056{
1052 return do_maps_open(inode, file, &proc_pid_numa_maps_op); 1057 struct numa_maps_private *priv;
1058 int ret = -ENOMEM;
1059 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
1060 if (priv) {
1061 priv->proc_maps.pid = proc_pid(inode);
1062 ret = seq_open(file, &proc_pid_numa_maps_op);
1063 if (!ret) {
1064 struct seq_file *m = file->private_data;
1065 m->private = priv;
1066 } else {
1067 kfree(priv);
1068 }
1069 }
1070 return ret;
1053} 1071}
1054 1072
1055const struct file_operations proc_numa_maps_operations = { 1073const struct file_operations proc_numa_maps_operations = {