aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mmap.c
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2005-04-19 16:29:15 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org.(none)>2005-04-19 16:29:15 -0400
commite0da382c92626ad1d7f4b7527d19b80104d67a83 (patch)
treeb3f455518c286ee14cb2755ced8808487bca7911 /mm/mmap.c
parent9f6c6fc505560465be0964eb4da1b6ca97bd3951 (diff)
[PATCH] freepgt: free_pgtables use vma list
Recent woes with some arches needing their own pgd_addr_end macro; and 4-level clear_page_range regression since 2.6.10's clear_page_tables; and its long-standing well-known inefficiency in searching throughout the higher-level page tables for those few entries to clear and free: all can be blamed on ignoring the list of vmas when we free page tables. Replace exit_mmap's clear_page_range of the total user address space by free_pgtables operating on the mm's vma list; unmap_region use it in the same way, giving floor and ceiling beyond which it may not free tables. This brings lmbench fork/exec/sh numbers back to 2.6.10 (unless preempt is enabled, in which case latency fixes spoil unmap_vmas throughput). Beware: the do_mmap_pgoff driver failure case must now use unmap_region instead of zap_page_range, since a page table might have been allocated, and can only be freed while it is touched by some vma. Move free_pgtables from mmap.c to memory.c, where its lower levels are adapted from the clear_page_range levels. (Most of free_pgtables' old code was actually for a non-existent case, prev not properly set up, dating from before hch gave us split_vma.) Pass mmu_gather** in the public interfaces, since we might want to add latency lockdrops later; but no attempt to do so yet, going by vma should itself reduce latency. But what if is_hugepage_only_range? Those ia64 and ppc64 cases need careful examination: put that off until a later patch of the series. What of x86_64's 32bit vdso page __map_syscall32 maps outside any vma? And the range to sparc64's flush_tlb_pgtables? It's less clear to me now that we need to do more than is done here - every PMD_SIZE ever occupied will be flushed, do we really have to flush every PGDIR_SIZE ever partially occupied? A shame to complicate it unnecessarily. Special thanks to David Miller for time spent repairing my ceilings. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c102
1 files changed, 21 insertions, 81 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index a95ebda27446..926d03015471 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -29,6 +29,10 @@
29#include <asm/cacheflush.h> 29#include <asm/cacheflush.h>
30#include <asm/tlb.h> 30#include <asm/tlb.h>
31 31
32static void unmap_region(struct mm_struct *mm,
33 struct vm_area_struct *vma, struct vm_area_struct *prev,
34 unsigned long start, unsigned long end);
35
32/* 36/*
33 * WARNING: the debugging will use recursive algorithms so never enable this 37 * WARNING: the debugging will use recursive algorithms so never enable this
34 * unless you know what you are doing. 38 * unless you know what you are doing.
@@ -1129,7 +1133,8 @@ unmap_and_free_vma:
1129 fput(file); 1133 fput(file);
1130 1134
1131 /* Undo any partial mapping done by a device driver. */ 1135 /* Undo any partial mapping done by a device driver. */
1132 zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL); 1136 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1137 charged = 0;
1133free_vma: 1138free_vma:
1134 kmem_cache_free(vm_area_cachep, vma); 1139 kmem_cache_free(vm_area_cachep, vma);
1135unacct_error: 1140unacct_error:
@@ -1572,66 +1577,6 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
1572} 1577}
1573#endif 1578#endif
1574 1579
1575/*
1576 * Try to free as many page directory entries as we can,
1577 * without having to work very hard at actually scanning
1578 * the page tables themselves.
1579 *
1580 * Right now we try to free page tables if we have a nice
1581 * PGDIR-aligned area that got free'd up. We could be more
1582 * granular if we want to, but this is fast and simple,
1583 * and covers the bad cases.
1584 *
1585 * "prev", if it exists, points to a vma before the one
1586 * we just free'd - but there's no telling how much before.
1587 */
1588static void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
1589 unsigned long start, unsigned long end)
1590{
1591 unsigned long first = start & PGDIR_MASK;
1592 unsigned long last = end + PGDIR_SIZE - 1;
1593 struct mm_struct *mm = tlb->mm;
1594
1595 if (last > MM_VM_SIZE(mm) || last < end)
1596 last = MM_VM_SIZE(mm);
1597
1598 if (!prev) {
1599 prev = mm->mmap;
1600 if (!prev)
1601 goto no_mmaps;
1602 if (prev->vm_end > start) {
1603 if (last > prev->vm_start)
1604 last = prev->vm_start;
1605 goto no_mmaps;
1606 }
1607 }
1608 for (;;) {
1609 struct vm_area_struct *next = prev->vm_next;
1610
1611 if (next) {
1612 if (next->vm_start < start) {
1613 prev = next;
1614 continue;
1615 }
1616 if (last > next->vm_start)
1617 last = next->vm_start;
1618 }
1619 if (prev->vm_end > first)
1620 first = prev->vm_end;
1621 break;
1622 }
1623no_mmaps:
1624 if (last < first) /* for arches with discontiguous pgd indices */
1625 return;
1626 if (first < FIRST_USER_PGD_NR * PGDIR_SIZE)
1627 first = FIRST_USER_PGD_NR * PGDIR_SIZE;
1628 /* No point trying to free anything if we're in the same pte page */
1629 if ((first & PMD_MASK) < (last & PMD_MASK)) {
1630 clear_page_range(tlb, first, last);
1631 flush_tlb_pgtables(mm, first, last);
1632 }
1633}
1634
1635/* Normal function to fix up a mapping 1580/* Normal function to fix up a mapping
1636 * This function is the default for when an area has no specific 1581 * This function is the default for when an area has no specific
1637 * function. This may be used as part of a more specific routine. 1582 * function. This may be used as part of a more specific routine.
@@ -1674,24 +1619,22 @@ static void unmap_vma_list(struct mm_struct *mm,
1674 * Called with the page table lock held. 1619 * Called with the page table lock held.
1675 */ 1620 */
1676static void unmap_region(struct mm_struct *mm, 1621static void unmap_region(struct mm_struct *mm,
1677 struct vm_area_struct *vma, 1622 struct vm_area_struct *vma, struct vm_area_struct *prev,
1678 struct vm_area_struct *prev, 1623 unsigned long start, unsigned long end)
1679 unsigned long start,
1680 unsigned long end)
1681{ 1624{
1625 struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
1682 struct mmu_gather *tlb; 1626 struct mmu_gather *tlb;
1683 unsigned long nr_accounted = 0; 1627 unsigned long nr_accounted = 0;
1684 1628
1685 lru_add_drain(); 1629 lru_add_drain();
1630 spin_lock(&mm->page_table_lock);
1686 tlb = tlb_gather_mmu(mm, 0); 1631 tlb = tlb_gather_mmu(mm, 0);
1687 unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL); 1632 unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
1688 vm_unacct_memory(nr_accounted); 1633 vm_unacct_memory(nr_accounted);
1689 1634 free_pgtables(&tlb, vma, prev? prev->vm_end: 0,
1690 if (is_hugepage_only_range(mm, start, end - start)) 1635 next? next->vm_start: 0);
1691 hugetlb_free_pgtables(tlb, prev, start, end);
1692 else
1693 free_pgtables(tlb, prev, start, end);
1694 tlb_finish_mmu(tlb, start, end); 1636 tlb_finish_mmu(tlb, start, end);
1637 spin_unlock(&mm->page_table_lock);
1695} 1638}
1696 1639
1697/* 1640/*
@@ -1823,9 +1766,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1823 * Remove the vma's, and unmap the actual pages 1766 * Remove the vma's, and unmap the actual pages
1824 */ 1767 */
1825 detach_vmas_to_be_unmapped(mm, mpnt, prev, end); 1768 detach_vmas_to_be_unmapped(mm, mpnt, prev, end);
1826 spin_lock(&mm->page_table_lock);
1827 unmap_region(mm, mpnt, prev, start, end); 1769 unmap_region(mm, mpnt, prev, start, end);
1828 spin_unlock(&mm->page_table_lock);
1829 1770
1830 /* Fix up all other VM information */ 1771 /* Fix up all other VM information */
1831 unmap_vma_list(mm, mpnt); 1772 unmap_vma_list(mm, mpnt);
@@ -1957,25 +1898,21 @@ EXPORT_SYMBOL(do_brk);
1957void exit_mmap(struct mm_struct *mm) 1898void exit_mmap(struct mm_struct *mm)
1958{ 1899{
1959 struct mmu_gather *tlb; 1900 struct mmu_gather *tlb;
1960 struct vm_area_struct *vma; 1901 struct vm_area_struct *vma = mm->mmap;
1961 unsigned long nr_accounted = 0; 1902 unsigned long nr_accounted = 0;
1962 1903
1963 lru_add_drain(); 1904 lru_add_drain();
1964 1905
1965 spin_lock(&mm->page_table_lock); 1906 spin_lock(&mm->page_table_lock);
1966 1907
1967 tlb = tlb_gather_mmu(mm, 1);
1968 flush_cache_mm(mm); 1908 flush_cache_mm(mm);
1969 /* Use ~0UL here to ensure all VMAs in the mm are unmapped */ 1909 tlb = tlb_gather_mmu(mm, 1);
1970 mm->map_count -= unmap_vmas(&tlb, mm, mm->mmap, 0, 1910 /* Use -1 here to ensure all VMAs in the mm are unmapped */
1971 ~0UL, &nr_accounted, NULL); 1911 mm->map_count -= unmap_vmas(&tlb, mm, vma, 0, -1, &nr_accounted, NULL);
1972 vm_unacct_memory(nr_accounted); 1912 vm_unacct_memory(nr_accounted);
1973 BUG_ON(mm->map_count); /* This is just debugging */ 1913 free_pgtables(&tlb, vma, 0, 0);
1974 clear_page_range(tlb, FIRST_USER_PGD_NR * PGDIR_SIZE, MM_VM_SIZE(mm));
1975
1976 tlb_finish_mmu(tlb, 0, MM_VM_SIZE(mm)); 1914 tlb_finish_mmu(tlb, 0, MM_VM_SIZE(mm));
1977 1915
1978 vma = mm->mmap;
1979 mm->mmap = mm->mmap_cache = NULL; 1916 mm->mmap = mm->mmap_cache = NULL;
1980 mm->mm_rb = RB_ROOT; 1917 mm->mm_rb = RB_ROOT;
1981 set_mm_counter(mm, rss, 0); 1918 set_mm_counter(mm, rss, 0);
@@ -1993,6 +1930,9 @@ void exit_mmap(struct mm_struct *mm)
1993 remove_vm_struct(vma); 1930 remove_vm_struct(vma);
1994 vma = next; 1931 vma = next;
1995 } 1932 }
1933
1934 BUG_ON(mm->map_count); /* This is just debugging */
1935 BUG_ON(mm->nr_ptes); /* This is just debugging */
1996} 1936}
1997 1937
1998/* Insert vm structure into process list sorted by address 1938/* Insert vm structure into process list sorted by address