diff options
author | David Rientjes <rientjes@google.com> | 2012-10-16 20:31:23 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-16 21:00:50 -0400 |
commit | 32f8516a8c733d281faa9f6666b509035246505c (patch) | |
tree | 31e40b8d5c96dec8a3e6f8adfb3db992718fecc1 | |
parent | dd8e8c4a2c902d8350b702e7bc7c2799e5e7e331 (diff) |
mm, mempolicy: fix printing stack contents in numa_maps
When reading /proc/pid/numa_maps, it's possible to return the contents of
the stack where the mempolicy string should be printed if the policy gets
freed from beneath us.
This happens because mpol_to_str() may return an error the
stack-allocated buffer is then printed without ever being stored.
There are two possible error conditions in mpol_to_str():
- if the buffer allocated is insufficient for the string to be stored,
and
- if the mempolicy has an invalid mode.
The first error condition is not triggered in any of the callers to
mpol_to_str(): at least 50 bytes is always allocated on the stack and this
is sufficient for the string to be written. A future patch should convert
this into BUILD_BUG_ON() since we know the maximum strlen possible, but
that's not -rc material.
The second error condition is possible if a race occurs in dropping a
reference to a task's mempolicy causing it to be freed during the read().
The slab poison value is then used for the mode and mpol_to_str() returns
-EINVAL.
This race is only possible because get_vma_policy() believes that
mm->mmap_sem protects task->mempolicy, which isn't true. The exit path
does not hold mm->mmap_sem when dropping the reference or setting
task->mempolicy to NULL: it uses task_lock(task) instead.
Thus, it's required for the caller of a task mempolicy to hold
task_lock(task) while grabbing the mempolicy and reading it. Callers with
a vma policy store their mempolicy earlier and can simply increment the
reference count so it's guaranteed not to be freed.
Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/proc/task_mmu.c | 7 | ||||
-rw-r--r-- | mm/mempolicy.c | 5 |
2 files changed, 7 insertions, 5 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 79827ce03e3..14df8806ff2 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -1158,6 +1158,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1158 | struct vm_area_struct *vma = v; | 1158 | struct vm_area_struct *vma = v; |
1159 | struct numa_maps *md = &numa_priv->md; | 1159 | struct numa_maps *md = &numa_priv->md; |
1160 | struct file *file = vma->vm_file; | 1160 | struct file *file = vma->vm_file; |
1161 | struct task_struct *task = proc_priv->task; | ||
1161 | struct mm_struct *mm = vma->vm_mm; | 1162 | struct mm_struct *mm = vma->vm_mm; |
1162 | struct mm_walk walk = {}; | 1163 | struct mm_walk walk = {}; |
1163 | struct mempolicy *pol; | 1164 | struct mempolicy *pol; |
@@ -1177,9 +1178,11 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1177 | walk.private = md; | 1178 | walk.private = md; |
1178 | walk.mm = mm; | 1179 | walk.mm = mm; |
1179 | 1180 | ||
1180 | pol = get_vma_policy(proc_priv->task, vma, vma->vm_start); | 1181 | task_lock(task); |
1182 | pol = get_vma_policy(task, vma, vma->vm_start); | ||
1181 | mpol_to_str(buffer, sizeof(buffer), pol, 0); | 1183 | mpol_to_str(buffer, sizeof(buffer), pol, 0); |
1182 | mpol_cond_put(pol); | 1184 | mpol_cond_put(pol); |
1185 | task_unlock(task); | ||
1183 | 1186 | ||
1184 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); | 1187 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); |
1185 | 1188 | ||
@@ -1189,7 +1192,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1189 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { | 1192 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { |
1190 | seq_printf(m, " heap"); | 1193 | seq_printf(m, " heap"); |
1191 | } else { | 1194 | } else { |
1192 | pid_t tid = vm_is_stack(proc_priv->task, vma, is_pid); | 1195 | pid_t tid = vm_is_stack(task, vma, is_pid); |
1193 | if (tid != 0) { | 1196 | if (tid != 0) { |
1194 | /* | 1197 | /* |
1195 | * Thread stack in /proc/PID/task/TID/maps or | 1198 | * Thread stack in /proc/PID/task/TID/maps or |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0b78fb9ea65..d04a8a54c29 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1536,9 +1536,8 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, | |||
1536 | * | 1536 | * |
1537 | * Returns effective policy for a VMA at specified address. | 1537 | * Returns effective policy for a VMA at specified address. |
1538 | * Falls back to @task or system default policy, as necessary. | 1538 | * Falls back to @task or system default policy, as necessary. |
1539 | * Current or other task's task mempolicy and non-shared vma policies | 1539 | * Current or other task's task mempolicy and non-shared vma policies must be |
1540 | * are protected by the task's mmap_sem, which must be held for read by | 1540 | * protected by task_lock(task) by the caller. |
1541 | * the caller. | ||
1542 | * Shared policies [those marked as MPOL_F_SHARED] require an extra reference | 1541 | * Shared policies [those marked as MPOL_F_SHARED] require an extra reference |
1543 | * count--added by the get_policy() vm_op, as appropriate--to protect against | 1542 | * count--added by the get_policy() vm_op, as appropriate--to protect against |
1544 | * freeing by another task. It is the caller's responsibility to free the | 1543 | * freeing by another task. It is the caller's responsibility to free the |