diff options
author | Hugh Dickins <hugh@veritas.com> | 2006-12-10 05:18:43 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.osdl.org> | 2006-12-10 12:55:39 -0500 |
commit | 5fcf7bb73f66cc1c4ad90788b0f367c4d6852b75 (patch) | |
tree | 76854ba1babc308beaf8f19d299a5b32ab7fda30 /mm | |
parent | 347a00fb4ad2200f8f8331f8b366b1d84eff577d (diff) |
[PATCH] read_zero_pagealigned() locking fix
Ramiro Voicu hits the BUG_ON(!pte_none(*pte)) in zeromap_pte_range: kernel
bugzilla 7645. Right: read_zero_pagealigned uses down_read of mmap_sem,
but another thread's racing read of /dev/zero, or a normal fault, can
easily set that pte again, in between zap_page_range and zeromap_page_range
getting there. It's been wrong ever since 2.4.3.
The simple fix is to use down_write instead, but that would serialize reads
of /dev/zero more than at present: perhaps some app would be badly
affected. So instead let zeromap_page_range return the error instead of
BUG_ON, and read_zero_pagealigned break to the slower clear_user loop in
that case - there's no need to optimize for it.
Use -EEXIST for when a pte is found: BUG_ON in mmap_zero (the other user of
zeromap_page_range), though it really isn't interesting there. And since
mmap_zero wants -EAGAIN for out-of-memory, the zeromaps better return that
than -ENOMEM.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Ramiro Voicu: <Ramiro.Voicu@cern.ch>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memory.c | 32 |
1 files changed, 21 insertions, 11 deletions
diff --git a/mm/memory.c b/mm/memory.c index 4198df0dff1c..bf6100236e62 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1110,23 +1110,29 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd, | |||
1110 | { | 1110 | { |
1111 | pte_t *pte; | 1111 | pte_t *pte; |
1112 | spinlock_t *ptl; | 1112 | spinlock_t *ptl; |
1113 | int err = 0; | ||
1113 | 1114 | ||
1114 | pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); | 1115 | pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); |
1115 | if (!pte) | 1116 | if (!pte) |
1116 | return -ENOMEM; | 1117 | return -EAGAIN; |
1117 | arch_enter_lazy_mmu_mode(); | 1118 | arch_enter_lazy_mmu_mode(); |
1118 | do { | 1119 | do { |
1119 | struct page *page = ZERO_PAGE(addr); | 1120 | struct page *page = ZERO_PAGE(addr); |
1120 | pte_t zero_pte = pte_wrprotect(mk_pte(page, prot)); | 1121 | pte_t zero_pte = pte_wrprotect(mk_pte(page, prot)); |
1122 | |||
1123 | if (unlikely(!pte_none(*pte))) { | ||
1124 | err = -EEXIST; | ||
1125 | pte++; | ||
1126 | break; | ||
1127 | } | ||
1121 | page_cache_get(page); | 1128 | page_cache_get(page); |
1122 | page_add_file_rmap(page); | 1129 | page_add_file_rmap(page); |
1123 | inc_mm_counter(mm, file_rss); | 1130 | inc_mm_counter(mm, file_rss); |
1124 | BUG_ON(!pte_none(*pte)); | ||
1125 | set_pte_at(mm, addr, pte, zero_pte); | 1131 | set_pte_at(mm, addr, pte, zero_pte); |
1126 | } while (pte++, addr += PAGE_SIZE, addr != end); | 1132 | } while (pte++, addr += PAGE_SIZE, addr != end); |
1127 | arch_leave_lazy_mmu_mode(); | 1133 | arch_leave_lazy_mmu_mode(); |
1128 | pte_unmap_unlock(pte - 1, ptl); | 1134 | pte_unmap_unlock(pte - 1, ptl); |
1129 | return 0; | 1135 | return err; |
1130 | } | 1136 | } |
1131 | 1137 | ||
1132 | static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud, | 1138 | static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud, |
@@ -1134,16 +1140,18 @@ static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud, | |||
1134 | { | 1140 | { |
1135 | pmd_t *pmd; | 1141 | pmd_t *pmd; |
1136 | unsigned long next; | 1142 | unsigned long next; |
1143 | int err; | ||
1137 | 1144 | ||
1138 | pmd = pmd_alloc(mm, pud, addr); | 1145 | pmd = pmd_alloc(mm, pud, addr); |
1139 | if (!pmd) | 1146 | if (!pmd) |
1140 | return -ENOMEM; | 1147 | return -EAGAIN; |
1141 | do { | 1148 | do { |
1142 | next = pmd_addr_end(addr, end); | 1149 | next = pmd_addr_end(addr, end); |
1143 | if (zeromap_pte_range(mm, pmd, addr, next, prot)) | 1150 | err = zeromap_pte_range(mm, pmd, addr, next, prot); |
1144 | return -ENOMEM; | 1151 | if (err) |
1152 | break; | ||
1145 | } while (pmd++, addr = next, addr != end); | 1153 | } while (pmd++, addr = next, addr != end); |
1146 | return 0; | 1154 | return err; |
1147 | } | 1155 | } |
1148 | 1156 | ||
1149 | static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd, | 1157 | static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd, |
@@ -1151,16 +1159,18 @@ static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd, | |||
1151 | { | 1159 | { |
1152 | pud_t *pud; | 1160 | pud_t *pud; |
1153 | unsigned long next; | 1161 | unsigned long next; |
1162 | int err; | ||
1154 | 1163 | ||
1155 | pud = pud_alloc(mm, pgd, addr); | 1164 | pud = pud_alloc(mm, pgd, addr); |
1156 | if (!pud) | 1165 | if (!pud) |
1157 | return -ENOMEM; | 1166 | return -EAGAIN; |
1158 | do { | 1167 | do { |
1159 | next = pud_addr_end(addr, end); | 1168 | next = pud_addr_end(addr, end); |
1160 | if (zeromap_pmd_range(mm, pud, addr, next, prot)) | 1169 | err = zeromap_pmd_range(mm, pud, addr, next, prot); |
1161 | return -ENOMEM; | 1170 | if (err) |
1171 | break; | ||
1162 | } while (pud++, addr = next, addr != end); | 1172 | } while (pud++, addr = next, addr != end); |
1163 | return 0; | 1173 | return err; |
1164 | } | 1174 | } |
1165 | 1175 | ||
1166 | int zeromap_page_range(struct vm_area_struct *vma, | 1176 | int zeromap_page_range(struct vm_area_struct *vma, |