diff options
author | Hugh Dickins <hugh@veritas.com> | 2005-04-19 16:29:15 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org.(none)> | 2005-04-19 16:29:15 -0400 |
commit | e0da382c92626ad1d7f4b7527d19b80104d67a83 (patch) | |
tree | b3f455518c286ee14cb2755ced8808487bca7911 /arch | |
parent | 9f6c6fc505560465be0964eb4da1b6ca97bd3951 (diff) |
[PATCH] freepgt: free_pgtables use vma list
Recent woes with some arches needing their own pgd_addr_end macro; and 4-level
clear_page_range regression since 2.6.10's clear_page_tables; and its
long-standing well-known inefficiency in searching throughout the higher-level
page tables for those few entries to clear and free: all can be blamed on
ignoring the list of vmas when we free page tables.
Replace exit_mmap's clear_page_range of the total user address space by
free_pgtables operating on the mm's vma list; unmap_region use it in the same
way, giving floor and ceiling beyond which it may not free tables. This
brings lmbench fork/exec/sh numbers back to 2.6.10 (unless preempt is enabled,
in which case latency fixes spoil unmap_vmas throughput).
Beware: the do_mmap_pgoff driver failure case must now use unmap_region
instead of zap_page_range, since a page table might have been allocated, and
can only be freed while it is touched by some vma.
Move free_pgtables from mmap.c to memory.c, where its lower levels are adapted
from the clear_page_range levels. (Most of free_pgtables' old code was
actually for a non-existent case, prev not properly set up, dating from before
hch gave us split_vma.) Pass mmu_gather** in the public interfaces, since we
might want to add latency lockdrops later; but no attempt to do so yet, going
by vma should itself reduce latency.
But what if is_hugepage_only_range? Those ia64 and ppc64 cases need careful
examination: put that off until a later patch of the series.
What of x86_64's 32bit vdso page __map_syscall32 maps outside any vma?
And the range to sparc64's flush_tlb_pgtables? It's less clear to me now that
we need to do more than is done here - every PMD_SIZE ever occupied will be
flushed, do we really have to flush every PGDIR_SIZE ever partially occupied?
A shame to complicate it unnecessarily.
Special thanks to David Miller for time spent repairing my ceilings.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/i386/mm/pgtable.c | 2 | ||||
-rw-r--r-- | arch/ia64/mm/hugetlbpage.c | 37 |
2 files changed, 3 insertions, 36 deletions
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index 0742d54f8bb0..dd81479ff88a 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c | |||
@@ -255,6 +255,6 @@ void pgd_free(pgd_t *pgd) | |||
255 | if (PTRS_PER_PMD > 1) | 255 | if (PTRS_PER_PMD > 1) |
256 | for (i = 0; i < USER_PTRS_PER_PGD; ++i) | 256 | for (i = 0; i < USER_PTRS_PER_PGD; ++i) |
257 | kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); | 257 | kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); |
258 | /* in the non-PAE case, clear_page_range() clears user pgd entries */ | 258 | /* in the non-PAE case, free_pgtables() clears user pgd entries */ |
259 | kmem_cache_free(pgd_cache, pgd); | 259 | kmem_cache_free(pgd_cache, pgd); |
260 | } | 260 | } |
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 40ad8328ffd5..626258ae9742 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c | |||
@@ -187,45 +187,12 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int wri | |||
187 | } | 187 | } |
188 | 188 | ||
189 | /* | 189 | /* |
190 | * Same as generic free_pgtables(), except constant PGDIR_* and pgd_offset | 190 | * Do nothing, until we've worked out what to do! To allow build, we |
191 | * are hugetlb region specific. | 191 | * must remove reference to clear_page_range since it no longer exists. |
192 | */ | 192 | */ |
193 | void hugetlb_free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev, | 193 | void hugetlb_free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev, |
194 | unsigned long start, unsigned long end) | 194 | unsigned long start, unsigned long end) |
195 | { | 195 | { |
196 | unsigned long first = start & HUGETLB_PGDIR_MASK; | ||
197 | unsigned long last = end + HUGETLB_PGDIR_SIZE - 1; | ||
198 | struct mm_struct *mm = tlb->mm; | ||
199 | |||
200 | if (!prev) { | ||
201 | prev = mm->mmap; | ||
202 | if (!prev) | ||
203 | goto no_mmaps; | ||
204 | if (prev->vm_end > start) { | ||
205 | if (last > prev->vm_start) | ||
206 | last = prev->vm_start; | ||
207 | goto no_mmaps; | ||
208 | } | ||
209 | } | ||
210 | for (;;) { | ||
211 | struct vm_area_struct *next = prev->vm_next; | ||
212 | |||
213 | if (next) { | ||
214 | if (next->vm_start < start) { | ||
215 | prev = next; | ||
216 | continue; | ||
217 | } | ||
218 | if (last > next->vm_start) | ||
219 | last = next->vm_start; | ||
220 | } | ||
221 | if (prev->vm_end > first) | ||
222 | first = prev->vm_end; | ||
223 | break; | ||
224 | } | ||
225 | no_mmaps: | ||
226 | if (last < first) /* for arches with discontiguous pgd indices */ | ||
227 | return; | ||
228 | clear_page_range(tlb, first, last); | ||
229 | } | 196 | } |
230 | 197 | ||
231 | void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) | 198 | void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) |