diff options
author | Hugh Dickins <hugh@veritas.com> | 2005-10-29 21:16:27 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-30 00:40:40 -0400 |
commit | 705e87c0c3c38424f7f30556c85bc20e808d2f59 (patch) | |
tree | 7a237e6266f4801385e1226cc497b47e3a2458bd /mm | |
parent | 8f4e2101fd7df9031a754eedb82e2060b51f8c45 (diff) |
[PATCH] mm: pte_offset_map_lock loops
Convert those common loops using page_table_lock on the outside and
pte_offset_map within to use just pte_offset_map_lock within instead.
These all hold mmap_sem (some exclusively, some not), so at no level can a
page table be whipped away from beneath them. But whereas pte_alloc loops
tested with the "atomic" pmd_present, these loops are testing with pmd_none,
which on i386 PAE tests both lower and upper halves.
That's now unsafe, so add a cast into pmd_none to test only the vital lower
half: we lose a little sensitivity to a corrupt middle directory, but not
enough to worry about. It appears that i386 and UML were the only
architectures vulnerable in this way, and pgd and pud no problem.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/mempolicy.c | 7 | ||||
-rw-r--r-- | mm/mprotect.c | 7 | ||||
-rw-r--r-- | mm/msync.c | 21 | ||||
-rw-r--r-- | mm/swapfile.c | 20 |
4 files changed, 21 insertions, 34 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 11d824f282f1..902d4c9eccdc 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -228,9 +228,9 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
228 | { | 228 | { |
229 | pte_t *orig_pte; | 229 | pte_t *orig_pte; |
230 | pte_t *pte; | 230 | pte_t *pte; |
231 | spinlock_t *ptl; | ||
231 | 232 | ||
232 | spin_lock(&vma->vm_mm->page_table_lock); | 233 | orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
233 | orig_pte = pte = pte_offset_map(pmd, addr); | ||
234 | do { | 234 | do { |
235 | unsigned long pfn; | 235 | unsigned long pfn; |
236 | unsigned int nid; | 236 | unsigned int nid; |
@@ -246,8 +246,7 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
246 | if (!node_isset(nid, *nodes)) | 246 | if (!node_isset(nid, *nodes)) |
247 | break; | 247 | break; |
248 | } while (pte++, addr += PAGE_SIZE, addr != end); | 248 | } while (pte++, addr += PAGE_SIZE, addr != end); |
249 | pte_unmap(orig_pte); | 249 | pte_unmap_unlock(orig_pte, ptl); |
250 | spin_unlock(&vma->vm_mm->page_table_lock); | ||
251 | return addr != end; | 250 | return addr != end; |
252 | } | 251 | } |
253 | 252 | ||
diff --git a/mm/mprotect.c b/mm/mprotect.c index 672a76fddd5e..17a2b52b753b 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -29,8 +29,9 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, | |||
29 | unsigned long addr, unsigned long end, pgprot_t newprot) | 29 | unsigned long addr, unsigned long end, pgprot_t newprot) |
30 | { | 30 | { |
31 | pte_t *pte; | 31 | pte_t *pte; |
32 | spinlock_t *ptl; | ||
32 | 33 | ||
33 | pte = pte_offset_map(pmd, addr); | 34 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
34 | do { | 35 | do { |
35 | if (pte_present(*pte)) { | 36 | if (pte_present(*pte)) { |
36 | pte_t ptent; | 37 | pte_t ptent; |
@@ -44,7 +45,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, | |||
44 | lazy_mmu_prot_update(ptent); | 45 | lazy_mmu_prot_update(ptent); |
45 | } | 46 | } |
46 | } while (pte++, addr += PAGE_SIZE, addr != end); | 47 | } while (pte++, addr += PAGE_SIZE, addr != end); |
47 | pte_unmap(pte - 1); | 48 | pte_unmap_unlock(pte - 1, ptl); |
48 | } | 49 | } |
49 | 50 | ||
50 | static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud, | 51 | static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud, |
@@ -88,7 +89,6 @@ static void change_protection(struct vm_area_struct *vma, | |||
88 | BUG_ON(addr >= end); | 89 | BUG_ON(addr >= end); |
89 | pgd = pgd_offset(mm, addr); | 90 | pgd = pgd_offset(mm, addr); |
90 | flush_cache_range(vma, addr, end); | 91 | flush_cache_range(vma, addr, end); |
91 | spin_lock(&mm->page_table_lock); | ||
92 | do { | 92 | do { |
93 | next = pgd_addr_end(addr, end); | 93 | next = pgd_addr_end(addr, end); |
94 | if (pgd_none_or_clear_bad(pgd)) | 94 | if (pgd_none_or_clear_bad(pgd)) |
@@ -96,7 +96,6 @@ static void change_protection(struct vm_area_struct *vma, | |||
96 | change_pud_range(mm, pgd, addr, next, newprot); | 96 | change_pud_range(mm, pgd, addr, next, newprot); |
97 | } while (pgd++, addr = next, addr != end); | 97 | } while (pgd++, addr = next, addr != end); |
98 | flush_tlb_range(vma, start, end); | 98 | flush_tlb_range(vma, start, end); |
99 | spin_unlock(&mm->page_table_lock); | ||
100 | } | 99 | } |
101 | 100 | ||
102 | static int | 101 | static int |
diff --git a/mm/msync.c b/mm/msync.c index 860395486060..0e040e9c39d8 100644 --- a/mm/msync.c +++ b/mm/msync.c | |||
@@ -17,28 +17,22 @@ | |||
17 | #include <asm/pgtable.h> | 17 | #include <asm/pgtable.h> |
18 | #include <asm/tlbflush.h> | 18 | #include <asm/tlbflush.h> |
19 | 19 | ||
20 | /* | ||
21 | * Called with mm->page_table_lock held to protect against other | ||
22 | * threads/the swapper from ripping pte's out from under us. | ||
23 | */ | ||
24 | |||
25 | static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 20 | static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
26 | unsigned long addr, unsigned long end) | 21 | unsigned long addr, unsigned long end) |
27 | { | 22 | { |
28 | struct mm_struct *mm = vma->vm_mm; | ||
29 | pte_t *pte; | 23 | pte_t *pte; |
24 | spinlock_t *ptl; | ||
30 | int progress = 0; | 25 | int progress = 0; |
31 | 26 | ||
32 | again: | 27 | again: |
33 | pte = pte_offset_map(pmd, addr); | 28 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
34 | do { | 29 | do { |
35 | unsigned long pfn; | 30 | unsigned long pfn; |
36 | struct page *page; | 31 | struct page *page; |
37 | 32 | ||
38 | if (progress >= 64) { | 33 | if (progress >= 64) { |
39 | progress = 0; | 34 | progress = 0; |
40 | if (need_resched() || | 35 | if (need_resched() || need_lockbreak(ptl)) |
41 | need_lockbreak(&mm->page_table_lock)) | ||
42 | break; | 36 | break; |
43 | } | 37 | } |
44 | progress++; | 38 | progress++; |
@@ -58,8 +52,8 @@ again: | |||
58 | set_page_dirty(page); | 52 | set_page_dirty(page); |
59 | progress += 3; | 53 | progress += 3; |
60 | } while (pte++, addr += PAGE_SIZE, addr != end); | 54 | } while (pte++, addr += PAGE_SIZE, addr != end); |
61 | pte_unmap(pte - 1); | 55 | pte_unmap_unlock(pte - 1, ptl); |
62 | cond_resched_lock(&mm->page_table_lock); | 56 | cond_resched(); |
63 | if (addr != end) | 57 | if (addr != end) |
64 | goto again; | 58 | goto again; |
65 | } | 59 | } |
@@ -97,7 +91,6 @@ static inline void msync_pud_range(struct vm_area_struct *vma, pgd_t *pgd, | |||
97 | static void msync_page_range(struct vm_area_struct *vma, | 91 | static void msync_page_range(struct vm_area_struct *vma, |
98 | unsigned long addr, unsigned long end) | 92 | unsigned long addr, unsigned long end) |
99 | { | 93 | { |
100 | struct mm_struct *mm = vma->vm_mm; | ||
101 | pgd_t *pgd; | 94 | pgd_t *pgd; |
102 | unsigned long next; | 95 | unsigned long next; |
103 | 96 | ||
@@ -110,16 +103,14 @@ static void msync_page_range(struct vm_area_struct *vma, | |||
110 | return; | 103 | return; |
111 | 104 | ||
112 | BUG_ON(addr >= end); | 105 | BUG_ON(addr >= end); |
113 | pgd = pgd_offset(mm, addr); | 106 | pgd = pgd_offset(vma->vm_mm, addr); |
114 | flush_cache_range(vma, addr, end); | 107 | flush_cache_range(vma, addr, end); |
115 | spin_lock(&mm->page_table_lock); | ||
116 | do { | 108 | do { |
117 | next = pgd_addr_end(addr, end); | 109 | next = pgd_addr_end(addr, end); |
118 | if (pgd_none_or_clear_bad(pgd)) | 110 | if (pgd_none_or_clear_bad(pgd)) |
119 | continue; | 111 | continue; |
120 | msync_pud_range(vma, pgd, addr, next); | 112 | msync_pud_range(vma, pgd, addr, next); |
121 | } while (pgd++, addr = next, addr != end); | 113 | } while (pgd++, addr = next, addr != end); |
122 | spin_unlock(&mm->page_table_lock); | ||
123 | } | 114 | } |
124 | 115 | ||
125 | /* | 116 | /* |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 296e0bbf7836..510f0039b000 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -401,8 +401,6 @@ void free_swap_and_cache(swp_entry_t entry) | |||
401 | * No need to decide whether this PTE shares the swap entry with others, | 401 | * No need to decide whether this PTE shares the swap entry with others, |
402 | * just let do_wp_page work it out if a write is requested later - to | 402 | * just let do_wp_page work it out if a write is requested later - to |
403 | * force COW, vm_page_prot omits write permission from any private vma. | 403 | * force COW, vm_page_prot omits write permission from any private vma. |
404 | * | ||
405 | * vma->vm_mm->page_table_lock is held. | ||
406 | */ | 404 | */ |
407 | static void unuse_pte(struct vm_area_struct *vma, pte_t *pte, | 405 | static void unuse_pte(struct vm_area_struct *vma, pte_t *pte, |
408 | unsigned long addr, swp_entry_t entry, struct page *page) | 406 | unsigned long addr, swp_entry_t entry, struct page *page) |
@@ -424,23 +422,25 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
424 | unsigned long addr, unsigned long end, | 422 | unsigned long addr, unsigned long end, |
425 | swp_entry_t entry, struct page *page) | 423 | swp_entry_t entry, struct page *page) |
426 | { | 424 | { |
427 | pte_t *pte; | ||
428 | pte_t swp_pte = swp_entry_to_pte(entry); | 425 | pte_t swp_pte = swp_entry_to_pte(entry); |
426 | pte_t *pte; | ||
427 | spinlock_t *ptl; | ||
428 | int found = 0; | ||
429 | 429 | ||
430 | pte = pte_offset_map(pmd, addr); | 430 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
431 | do { | 431 | do { |
432 | /* | 432 | /* |
433 | * swapoff spends a _lot_ of time in this loop! | 433 | * swapoff spends a _lot_ of time in this loop! |
434 | * Test inline before going to call unuse_pte. | 434 | * Test inline before going to call unuse_pte. |
435 | */ | 435 | */ |
436 | if (unlikely(pte_same(*pte, swp_pte))) { | 436 | if (unlikely(pte_same(*pte, swp_pte))) { |
437 | unuse_pte(vma, pte, addr, entry, page); | 437 | unuse_pte(vma, pte++, addr, entry, page); |
438 | pte_unmap(pte); | 438 | found = 1; |
439 | return 1; | 439 | break; |
440 | } | 440 | } |
441 | } while (pte++, addr += PAGE_SIZE, addr != end); | 441 | } while (pte++, addr += PAGE_SIZE, addr != end); |
442 | pte_unmap(pte - 1); | 442 | pte_unmap_unlock(pte - 1, ptl); |
443 | return 0; | 443 | return found; |
444 | } | 444 | } |
445 | 445 | ||
446 | static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, | 446 | static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, |
@@ -522,12 +522,10 @@ static int unuse_mm(struct mm_struct *mm, | |||
522 | down_read(&mm->mmap_sem); | 522 | down_read(&mm->mmap_sem); |
523 | lock_page(page); | 523 | lock_page(page); |
524 | } | 524 | } |
525 | spin_lock(&mm->page_table_lock); | ||
526 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 525 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
527 | if (vma->anon_vma && unuse_vma(vma, entry, page)) | 526 | if (vma->anon_vma && unuse_vma(vma, entry, page)) |
528 | break; | 527 | break; |
529 | } | 528 | } |
530 | spin_unlock(&mm->page_table_lock); | ||
531 | up_read(&mm->mmap_sem); | 529 | up_read(&mm->mmap_sem); |
532 | /* | 530 | /* |
533 | * Currently unuse_mm cannot fail, but leave error handling | 531 | * Currently unuse_mm cannot fail, but leave error handling |