aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2005-10-29 21:16:40 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-10-30 00:40:42 -0400
commit4c21e2f2441dc5fbb957b030333f5a3f2d02dea7 (patch)
tree1f76d33bb1d76221c6424bc5fed080a4f91349a6 /mm/memory.c
parentb38c6845b695141259019e2b7c0fe6c32a6e720d (diff)
[PATCH] mm: split page table lock
Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with a many-threaded application which concurrently initializes different parts of a large anonymous area. This patch corrects that, by using a separate spinlock per page table page, to guard the page table entries in that page, instead of using the mm's single page_table_lock. (But even then, page_table_lock is still used to guard page table allocation, and anon_vma allocation.) In this implementation, the spinlock is tucked inside the struct page of the page table page: with a BUILD_BUG_ON in case it overflows - which it would in the case of 32-bit PA-RISC with spinlock debugging enabled. Splitting the lock is not quite for free: another cacheline access. Ideally, I suppose we would use split ptlock only for multi-threaded processes on multi-cpu machines; but deciding that dynamically would have its own costs. So for now enable it by config, at some number of cpus - since the Kconfig language doesn't support inequalities, let preprocessor compare that with NR_CPUS. But I don't think it's worth being user-configurable: for good testing of both split and unsplit configs, split now at 4 cpus, and perhaps change that to 8 later. There is a benefit even for singly threaded processes: kswapd can be attacking one part of the mm while another part is busy faulting. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c24
1 files changed, 14 insertions, 10 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 8461e2dd91d7..e9ef599498b5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -114,6 +114,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
114{ 114{
115 struct page *page = pmd_page(*pmd); 115 struct page *page = pmd_page(*pmd);
116 pmd_clear(pmd); 116 pmd_clear(pmd);
117 pte_lock_deinit(page);
117 pte_free_tlb(tlb, page); 118 pte_free_tlb(tlb, page);
118 dec_page_state(nr_page_table_pages); 119 dec_page_state(nr_page_table_pages);
119 tlb->mm->nr_ptes--; 120 tlb->mm->nr_ptes--;
@@ -294,10 +295,12 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
294 if (!new) 295 if (!new)
295 return -ENOMEM; 296 return -ENOMEM;
296 297
298 pte_lock_init(new);
297 spin_lock(&mm->page_table_lock); 299 spin_lock(&mm->page_table_lock);
298 if (pmd_present(*pmd)) /* Another has populated it */ 300 if (pmd_present(*pmd)) { /* Another has populated it */
301 pte_lock_deinit(new);
299 pte_free(new); 302 pte_free(new);
300 else { 303 } else {
301 mm->nr_ptes++; 304 mm->nr_ptes++;
302 inc_page_state(nr_page_table_pages); 305 inc_page_state(nr_page_table_pages);
303 pmd_populate(mm, pmd, new); 306 pmd_populate(mm, pmd, new);
@@ -432,7 +435,7 @@ again:
432 if (!dst_pte) 435 if (!dst_pte)
433 return -ENOMEM; 436 return -ENOMEM;
434 src_pte = pte_offset_map_nested(src_pmd, addr); 437 src_pte = pte_offset_map_nested(src_pmd, addr);
435 src_ptl = &src_mm->page_table_lock; 438 src_ptl = pte_lockptr(src_mm, src_pmd);
436 spin_lock(src_ptl); 439 spin_lock(src_ptl);
437 440
438 do { 441 do {
@@ -1194,15 +1197,16 @@ EXPORT_SYMBOL(remap_pfn_range);
1194 * (but do_wp_page is only called after already making such a check; 1197 * (but do_wp_page is only called after already making such a check;
1195 * and do_anonymous_page and do_no_page can safely check later on). 1198 * and do_anonymous_page and do_no_page can safely check later on).
1196 */ 1199 */
1197static inline int pte_unmap_same(struct mm_struct *mm, 1200static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
1198 pte_t *page_table, pte_t orig_pte) 1201 pte_t *page_table, pte_t orig_pte)
1199{ 1202{
1200 int same = 1; 1203 int same = 1;
1201#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) 1204#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
1202 if (sizeof(pte_t) > sizeof(unsigned long)) { 1205 if (sizeof(pte_t) > sizeof(unsigned long)) {
1203 spin_lock(&mm->page_table_lock); 1206 spinlock_t *ptl = pte_lockptr(mm, pmd);
1207 spin_lock(ptl);
1204 same = pte_same(*page_table, orig_pte); 1208 same = pte_same(*page_table, orig_pte);
1205 spin_unlock(&mm->page_table_lock); 1209 spin_unlock(ptl);
1206 } 1210 }
1207#endif 1211#endif
1208 pte_unmap(page_table); 1212 pte_unmap(page_table);
@@ -1655,7 +1659,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
1655 pte_t pte; 1659 pte_t pte;
1656 int ret = VM_FAULT_MINOR; 1660 int ret = VM_FAULT_MINOR;
1657 1661
1658 if (!pte_unmap_same(mm, page_table, orig_pte)) 1662 if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
1659 goto out; 1663 goto out;
1660 1664
1661 entry = pte_to_swp_entry(orig_pte); 1665 entry = pte_to_swp_entry(orig_pte);
@@ -1773,7 +1777,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
1773 page_cache_get(page); 1777 page_cache_get(page);
1774 entry = mk_pte(page, vma->vm_page_prot); 1778 entry = mk_pte(page, vma->vm_page_prot);
1775 1779
1776 ptl = &mm->page_table_lock; 1780 ptl = pte_lockptr(mm, pmd);
1777 spin_lock(ptl); 1781 spin_lock(ptl);
1778 if (!pte_none(*page_table)) 1782 if (!pte_none(*page_table))
1779 goto release; 1783 goto release;
@@ -1934,7 +1938,7 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
1934 pgoff_t pgoff; 1938 pgoff_t pgoff;
1935 int err; 1939 int err;
1936 1940
1937 if (!pte_unmap_same(mm, page_table, orig_pte)) 1941 if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
1938 return VM_FAULT_MINOR; 1942 return VM_FAULT_MINOR;
1939 1943
1940 if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) { 1944 if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
@@ -1992,7 +1996,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
1992 pte, pmd, write_access, entry); 1996 pte, pmd, write_access, entry);
1993 } 1997 }
1994 1998
1995 ptl = &mm->page_table_lock; 1999 ptl = pte_lockptr(mm, pmd);
1996 spin_lock(ptl); 2000 spin_lock(ptl);
1997 if (unlikely(!pte_same(*pte, entry))) 2001 if (unlikely(!pte_same(*pte, entry)))
1998 goto unlock; 2002 goto unlock;