diff options
author | Hugh Dickins <hugh@veritas.com> | 2006-12-10 05:18:43 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.osdl.org> | 2006-12-10 12:55:39 -0500 |
commit | 5fcf7bb73f66cc1c4ad90788b0f367c4d6852b75 (patch) | |
tree | 76854ba1babc308beaf8f19d299a5b32ab7fda30 | |
parent | 347a00fb4ad2200f8f8331f8b366b1d84eff577d (diff) |
[PATCH] read_zero_pagealigned() locking fix
Ramiro Voicu hits the BUG_ON(!pte_none(*pte)) in zeromap_pte_range: kernel
bugzilla 7645. Right: read_zero_pagealigned uses down_read of mmap_sem,
but another thread's racing read of /dev/zero, or a normal fault, can
easily set that pte again, in between zap_page_range and zeromap_page_range
getting there. It's been wrong ever since 2.4.3.
The simple fix is to use down_write instead, but that would serialize reads
of /dev/zero more than at present: perhaps some app would be badly
affected. So instead let zeromap_page_range return the error instead of
BUG_ON, and read_zero_pagealigned break to the slower clear_user loop in
that case - there's no need to optimize for it.
Use -EEXIST for when a pte is found: BUG_ON in mmap_zero (the other user of
zeromap_page_range), though it really isn't interesting there. And since
mmap_zero wants -EAGAIN for out-of-memory, the zeromaps better return that
than -ENOMEM.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Ramiro Voicu: <Ramiro.Voicu@cern.ch>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | drivers/char/mem.c | 12 | ||||
-rw-r--r-- | mm/memory.c | 32 |
2 files changed, 29 insertions, 15 deletions
diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 089020e0ee5a..4f1813e04754 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c | |||
@@ -646,7 +646,8 @@ static inline size_t read_zero_pagealigned(char __user * buf, size_t size) | |||
646 | count = size; | 646 | count = size; |
647 | 647 | ||
648 | zap_page_range(vma, addr, count, NULL); | 648 | zap_page_range(vma, addr, count, NULL); |
649 | zeromap_page_range(vma, addr, count, PAGE_COPY); | 649 | if (zeromap_page_range(vma, addr, count, PAGE_COPY)) |
650 | break; | ||
650 | 651 | ||
651 | size -= count; | 652 | size -= count; |
652 | buf += count; | 653 | buf += count; |
@@ -713,11 +714,14 @@ out: | |||
713 | 714 | ||
714 | static int mmap_zero(struct file * file, struct vm_area_struct * vma) | 715 | static int mmap_zero(struct file * file, struct vm_area_struct * vma) |
715 | { | 716 | { |
717 | int err; | ||
718 | |||
716 | if (vma->vm_flags & VM_SHARED) | 719 | if (vma->vm_flags & VM_SHARED) |
717 | return shmem_zero_setup(vma); | 720 | return shmem_zero_setup(vma); |
718 | if (zeromap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot)) | 721 | err = zeromap_page_range(vma, vma->vm_start, |
719 | return -EAGAIN; | 722 | vma->vm_end - vma->vm_start, vma->vm_page_prot); |
720 | return 0; | 723 | BUG_ON(err == -EEXIST); |
724 | return err; | ||
721 | } | 725 | } |
722 | #else /* CONFIG_MMU */ | 726 | #else /* CONFIG_MMU */ |
723 | static ssize_t read_zero(struct file * file, char * buf, | 727 | static ssize_t read_zero(struct file * file, char * buf, |
diff --git a/mm/memory.c b/mm/memory.c index 4198df0dff1c..bf6100236e62 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1110,23 +1110,29 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd, | |||
1110 | { | 1110 | { |
1111 | pte_t *pte; | 1111 | pte_t *pte; |
1112 | spinlock_t *ptl; | 1112 | spinlock_t *ptl; |
1113 | int err = 0; | ||
1113 | 1114 | ||
1114 | pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); | 1115 | pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); |
1115 | if (!pte) | 1116 | if (!pte) |
1116 | return -ENOMEM; | 1117 | return -EAGAIN; |
1117 | arch_enter_lazy_mmu_mode(); | 1118 | arch_enter_lazy_mmu_mode(); |
1118 | do { | 1119 | do { |
1119 | struct page *page = ZERO_PAGE(addr); | 1120 | struct page *page = ZERO_PAGE(addr); |
1120 | pte_t zero_pte = pte_wrprotect(mk_pte(page, prot)); | 1121 | pte_t zero_pte = pte_wrprotect(mk_pte(page, prot)); |
1122 | |||
1123 | if (unlikely(!pte_none(*pte))) { | ||
1124 | err = -EEXIST; | ||
1125 | pte++; | ||
1126 | break; | ||
1127 | } | ||
1121 | page_cache_get(page); | 1128 | page_cache_get(page); |
1122 | page_add_file_rmap(page); | 1129 | page_add_file_rmap(page); |
1123 | inc_mm_counter(mm, file_rss); | 1130 | inc_mm_counter(mm, file_rss); |
1124 | BUG_ON(!pte_none(*pte)); | ||
1125 | set_pte_at(mm, addr, pte, zero_pte); | 1131 | set_pte_at(mm, addr, pte, zero_pte); |
1126 | } while (pte++, addr += PAGE_SIZE, addr != end); | 1132 | } while (pte++, addr += PAGE_SIZE, addr != end); |
1127 | arch_leave_lazy_mmu_mode(); | 1133 | arch_leave_lazy_mmu_mode(); |
1128 | pte_unmap_unlock(pte - 1, ptl); | 1134 | pte_unmap_unlock(pte - 1, ptl); |
1129 | return 0; | 1135 | return err; |
1130 | } | 1136 | } |
1131 | 1137 | ||
1132 | static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud, | 1138 | static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud, |
@@ -1134,16 +1140,18 @@ static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud, | |||
1134 | { | 1140 | { |
1135 | pmd_t *pmd; | 1141 | pmd_t *pmd; |
1136 | unsigned long next; | 1142 | unsigned long next; |
1143 | int err; | ||
1137 | 1144 | ||
1138 | pmd = pmd_alloc(mm, pud, addr); | 1145 | pmd = pmd_alloc(mm, pud, addr); |
1139 | if (!pmd) | 1146 | if (!pmd) |
1140 | return -ENOMEM; | 1147 | return -EAGAIN; |
1141 | do { | 1148 | do { |
1142 | next = pmd_addr_end(addr, end); | 1149 | next = pmd_addr_end(addr, end); |
1143 | if (zeromap_pte_range(mm, pmd, addr, next, prot)) | 1150 | err = zeromap_pte_range(mm, pmd, addr, next, prot); |
1144 | return -ENOMEM; | 1151 | if (err) |
1152 | break; | ||
1145 | } while (pmd++, addr = next, addr != end); | 1153 | } while (pmd++, addr = next, addr != end); |
1146 | return 0; | 1154 | return err; |
1147 | } | 1155 | } |
1148 | 1156 | ||
1149 | static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd, | 1157 | static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd, |
@@ -1151,16 +1159,18 @@ static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd, | |||
1151 | { | 1159 | { |
1152 | pud_t *pud; | 1160 | pud_t *pud; |
1153 | unsigned long next; | 1161 | unsigned long next; |
1162 | int err; | ||
1154 | 1163 | ||
1155 | pud = pud_alloc(mm, pgd, addr); | 1164 | pud = pud_alloc(mm, pgd, addr); |
1156 | if (!pud) | 1165 | if (!pud) |
1157 | return -ENOMEM; | 1166 | return -EAGAIN; |
1158 | do { | 1167 | do { |
1159 | next = pud_addr_end(addr, end); | 1168 | next = pud_addr_end(addr, end); |
1160 | if (zeromap_pmd_range(mm, pud, addr, next, prot)) | 1169 | err = zeromap_pmd_range(mm, pud, addr, next, prot); |
1161 | return -ENOMEM; | 1170 | if (err) |
1171 | break; | ||
1162 | } while (pud++, addr = next, addr != end); | 1172 | } while (pud++, addr = next, addr != end); |
1163 | return 0; | 1173 | return err; |
1164 | } | 1174 | } |
1165 | 1175 | ||
1166 | int zeromap_page_range(struct vm_area_struct *vma, | 1176 | int zeromap_page_range(struct vm_area_struct *vma, |