diff options
| author | Oleg Nesterov <oleg@redhat.com> | 2012-06-15 11:43:28 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2012-06-16 03:10:42 -0400 |
| commit | 5323ce71e4b4e1f188ebbc0cc7776885ea6c75fb (patch) | |
| tree | fd4f1445cd71a2a2cc0096f08a216e27bc3fc237 /kernel/events | |
| parent | cc359d180fa9c25a4c1819f17e07a422d788353d (diff) | |
uprobes: Write_opcode()->__replace_page() can race with try_to_unmap()
write_opcode() gets old_page via get_user_pages() and then calls
__replace_page() which assumes that this old_page is still
mapped after pte_offset_map_lock().
This is not true if this old_page was already try_to_unmap()'ed,
and in this case everything __replace_page() does with old_page
is wrong. Just for example, put_page() is not balanced.
I think it is possible to teach __replace_page() to handle this
unlikely case correctly, but this patch simply changes it to use
page_check_address() and return -EAGAIN if it fails. The caller
should notice this error code and retry.
Note: write_opcode() asks for the cleanups, I'll try to do this
in a separate patch.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120615154328.GA9571@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/events')
| -rw-r--r-- | kernel/events/uprobes.c | 41 |
1 files changed, 13 insertions, 28 deletions
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 604930bf9c92..3ccdb29ee8d6 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
| @@ -129,33 +129,17 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) | |||
| 129 | static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) | 129 | static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) |
| 130 | { | 130 | { |
| 131 | struct mm_struct *mm = vma->vm_mm; | 131 | struct mm_struct *mm = vma->vm_mm; |
| 132 | pgd_t *pgd; | ||
| 133 | pud_t *pud; | ||
| 134 | pmd_t *pmd; | ||
| 135 | pte_t *ptep; | ||
| 136 | spinlock_t *ptl; | ||
| 137 | unsigned long addr; | 132 | unsigned long addr; |
| 138 | int err = -EFAULT; | 133 | spinlock_t *ptl; |
| 134 | pte_t *ptep; | ||
| 139 | 135 | ||
| 140 | addr = page_address_in_vma(page, vma); | 136 | addr = page_address_in_vma(page, vma); |
| 141 | if (addr == -EFAULT) | 137 | if (addr == -EFAULT) |
| 142 | goto out; | 138 | return -EFAULT; |
| 143 | |||
| 144 | pgd = pgd_offset(mm, addr); | ||
| 145 | if (!pgd_present(*pgd)) | ||
| 146 | goto out; | ||
| 147 | |||
| 148 | pud = pud_offset(pgd, addr); | ||
| 149 | if (!pud_present(*pud)) | ||
| 150 | goto out; | ||
| 151 | 139 | ||
| 152 | pmd = pmd_offset(pud, addr); | 140 | ptep = page_check_address(page, mm, addr, &ptl, 0); |
| 153 | if (!pmd_present(*pmd)) | ||
| 154 | goto out; | ||
| 155 | |||
| 156 | ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); | ||
| 157 | if (!ptep) | 141 | if (!ptep) |
| 158 | goto out; | 142 | return -EAGAIN; |
| 159 | 143 | ||
| 160 | get_page(kpage); | 144 | get_page(kpage); |
| 161 | page_add_new_anon_rmap(kpage, vma, addr); | 145 | page_add_new_anon_rmap(kpage, vma, addr); |
| @@ -174,10 +158,8 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct | |||
| 174 | try_to_free_swap(page); | 158 | try_to_free_swap(page); |
| 175 | put_page(page); | 159 | put_page(page); |
| 176 | pte_unmap_unlock(ptep, ptl); | 160 | pte_unmap_unlock(ptep, ptl); |
| 177 | err = 0; | ||
| 178 | 161 | ||
| 179 | out: | 162 | return 0; |
| 180 | return err; | ||
| 181 | } | 163 | } |
| 182 | 164 | ||
| 183 | /** | 165 | /** |
| @@ -222,9 +204,10 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, | |||
| 222 | void *vaddr_old, *vaddr_new; | 204 | void *vaddr_old, *vaddr_new; |
| 223 | struct vm_area_struct *vma; | 205 | struct vm_area_struct *vma; |
| 224 | struct uprobe *uprobe; | 206 | struct uprobe *uprobe; |
| 207 | unsigned long pgoff; | ||
| 225 | loff_t addr; | 208 | loff_t addr; |
| 226 | int ret; | 209 | int ret; |
| 227 | 210 | retry: | |
| 228 | /* Read the page with vaddr into memory */ | 211 | /* Read the page with vaddr into memory */ |
| 229 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); | 212 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); |
| 230 | if (ret <= 0) | 213 | if (ret <= 0) |
| @@ -269,9 +252,9 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, | |||
| 269 | memcpy(vaddr_new, vaddr_old, PAGE_SIZE); | 252 | memcpy(vaddr_new, vaddr_old, PAGE_SIZE); |
| 270 | 253 | ||
| 271 | /* poke the new insn in, ASSUMES we don't cross page boundary */ | 254 | /* poke the new insn in, ASSUMES we don't cross page boundary */ |
| 272 | vaddr &= ~PAGE_MASK; | 255 | pgoff = (vaddr & ~PAGE_MASK); |
| 273 | BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); | 256 | BUG_ON(pgoff + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); |
| 274 | memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); | 257 | memcpy(vaddr_new + pgoff, &opcode, UPROBE_SWBP_INSN_SIZE); |
| 275 | 258 | ||
| 276 | kunmap_atomic(vaddr_new); | 259 | kunmap_atomic(vaddr_new); |
| 277 | kunmap_atomic(vaddr_old); | 260 | kunmap_atomic(vaddr_old); |
| @@ -291,6 +274,8 @@ unlock_out: | |||
| 291 | put_out: | 274 | put_out: |
| 292 | put_page(old_page); | 275 | put_page(old_page); |
| 293 | 276 | ||
| 277 | if (unlikely(ret == -EAGAIN)) | ||
| 278 | goto retry; | ||
| 294 | return ret; | 279 | return ret; |
| 295 | } | 280 | } |
| 296 | 281 | ||
