aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2006-12-10 05:18:43 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-10 12:55:39 -0500
commit5fcf7bb73f66cc1c4ad90788b0f367c4d6852b75 (patch)
tree76854ba1babc308beaf8f19d299a5b32ab7fda30
parent347a00fb4ad2200f8f8331f8b366b1d84eff577d (diff)
[PATCH] read_zero_pagealigned() locking fix
Ramiro Voicu hits the BUG_ON(!pte_none(*pte)) in zeromap_pte_range: kernel bugzilla 7645. Right: read_zero_pagealigned uses down_read of mmap_sem, but another thread's racing read of /dev/zero, or a normal fault, can easily set that pte again, in between zap_page_range and zeromap_page_range getting there. It's been wrong ever since 2.4.3. The simple fix is to use down_write instead, but that would serialize reads of /dev/zero more than at present: perhaps some app would be badly affected. So instead let zeromap_page_range return the error instead of BUG_ON, and read_zero_pagealigned break to the slower clear_user loop in that case - there's no need to optimize for it. Use -EEXIST for when a pte is found: BUG_ON in mmap_zero (the other user of zeromap_page_range), though it really isn't interesting there. And since mmap_zero wants -EAGAIN for out-of-memory, the zeromaps better return that than -ENOMEM. Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: Ramiro Voicu: <Ramiro.Voicu@cern.ch> Cc: <stable@kernel.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/char/mem.c12
-rw-r--r--mm/memory.c32
2 files changed, 29 insertions, 15 deletions
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 089020e0ee5a..4f1813e04754 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -646,7 +646,8 @@ static inline size_t read_zero_pagealigned(char __user * buf, size_t size)
646 count = size; 646 count = size;
647 647
648 zap_page_range(vma, addr, count, NULL); 648 zap_page_range(vma, addr, count, NULL);
649 zeromap_page_range(vma, addr, count, PAGE_COPY); 649 if (zeromap_page_range(vma, addr, count, PAGE_COPY))
650 break;
650 651
651 size -= count; 652 size -= count;
652 buf += count; 653 buf += count;
@@ -713,11 +714,14 @@ out:
713 714
714static int mmap_zero(struct file * file, struct vm_area_struct * vma) 715static int mmap_zero(struct file * file, struct vm_area_struct * vma)
715{ 716{
717 int err;
718
716 if (vma->vm_flags & VM_SHARED) 719 if (vma->vm_flags & VM_SHARED)
717 return shmem_zero_setup(vma); 720 return shmem_zero_setup(vma);
718 if (zeromap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot)) 721 err = zeromap_page_range(vma, vma->vm_start,
719 return -EAGAIN; 722 vma->vm_end - vma->vm_start, vma->vm_page_prot);
720 return 0; 723 BUG_ON(err == -EEXIST);
724 return err;
721} 725}
722#else /* CONFIG_MMU */ 726#else /* CONFIG_MMU */
723static ssize_t read_zero(struct file * file, char * buf, 727static ssize_t read_zero(struct file * file, char * buf,
diff --git a/mm/memory.c b/mm/memory.c
index 4198df0dff1c..bf6100236e62 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1110,23 +1110,29 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
1110{ 1110{
1111 pte_t *pte; 1111 pte_t *pte;
1112 spinlock_t *ptl; 1112 spinlock_t *ptl;
1113 int err = 0;
1113 1114
1114 pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); 1115 pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
1115 if (!pte) 1116 if (!pte)
1116 return -ENOMEM; 1117 return -EAGAIN;
1117 arch_enter_lazy_mmu_mode(); 1118 arch_enter_lazy_mmu_mode();
1118 do { 1119 do {
1119 struct page *page = ZERO_PAGE(addr); 1120 struct page *page = ZERO_PAGE(addr);
1120 pte_t zero_pte = pte_wrprotect(mk_pte(page, prot)); 1121 pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
1122
1123 if (unlikely(!pte_none(*pte))) {
1124 err = -EEXIST;
1125 pte++;
1126 break;
1127 }
1121 page_cache_get(page); 1128 page_cache_get(page);
1122 page_add_file_rmap(page); 1129 page_add_file_rmap(page);
1123 inc_mm_counter(mm, file_rss); 1130 inc_mm_counter(mm, file_rss);
1124 BUG_ON(!pte_none(*pte));
1125 set_pte_at(mm, addr, pte, zero_pte); 1131 set_pte_at(mm, addr, pte, zero_pte);
1126 } while (pte++, addr += PAGE_SIZE, addr != end); 1132 } while (pte++, addr += PAGE_SIZE, addr != end);
1127 arch_leave_lazy_mmu_mode(); 1133 arch_leave_lazy_mmu_mode();
1128 pte_unmap_unlock(pte - 1, ptl); 1134 pte_unmap_unlock(pte - 1, ptl);
1129 return 0; 1135 return err;
1130} 1136}
1131 1137
1132static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud, 1138static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
@@ -1134,16 +1140,18 @@ static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
1134{ 1140{
1135 pmd_t *pmd; 1141 pmd_t *pmd;
1136 unsigned long next; 1142 unsigned long next;
1143 int err;
1137 1144
1138 pmd = pmd_alloc(mm, pud, addr); 1145 pmd = pmd_alloc(mm, pud, addr);
1139 if (!pmd) 1146 if (!pmd)
1140 return -ENOMEM; 1147 return -EAGAIN;
1141 do { 1148 do {
1142 next = pmd_addr_end(addr, end); 1149 next = pmd_addr_end(addr, end);
1143 if (zeromap_pte_range(mm, pmd, addr, next, prot)) 1150 err = zeromap_pte_range(mm, pmd, addr, next, prot);
1144 return -ENOMEM; 1151 if (err)
1152 break;
1145 } while (pmd++, addr = next, addr != end); 1153 } while (pmd++, addr = next, addr != end);
1146 return 0; 1154 return err;
1147} 1155}
1148 1156
1149static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd, 1157static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
@@ -1151,16 +1159,18 @@ static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
1151{ 1159{
1152 pud_t *pud; 1160 pud_t *pud;
1153 unsigned long next; 1161 unsigned long next;
1162 int err;
1154 1163
1155 pud = pud_alloc(mm, pgd, addr); 1164 pud = pud_alloc(mm, pgd, addr);
1156 if (!pud) 1165 if (!pud)
1157 return -ENOMEM; 1166 return -EAGAIN;
1158 do { 1167 do {
1159 next = pud_addr_end(addr, end); 1168 next = pud_addr_end(addr, end);
1160 if (zeromap_pmd_range(mm, pud, addr, next, prot)) 1169 err = zeromap_pmd_range(mm, pud, addr, next, prot);
1161 return -ENOMEM; 1170 if (err)
1171 break;
1162 } while (pud++, addr = next, addr != end); 1172 } while (pud++, addr = next, addr != end);
1163 return 0; 1173 return err;
1164} 1174}
1165 1175
1166int zeromap_page_range(struct vm_area_struct *vma, 1176int zeromap_page_range(struct vm_area_struct *vma,