aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2012-07-31 19:42:03 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 21:42:40 -0400
commit24669e58477e2752c1fbca9c1c988e9dd0d79d15 (patch)
treea4fe04fc1cc2b5a2d89b67f745185b9e9640426f /mm
parent972dc4de13f667a7df27ee32573b2e6fc6cc8434 (diff)
hugetlb: use mmu_gather instead of a temporary linked list for accumulating pages
Use a mmu_gather instead of a temporary linked list for accumulating pages when we unmap a hugepage range Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: David Rientjes <rientjes@google.com> Cc: Hillf Danton <dhillf@gmail.com> Cc: Michal Hocko <mhocko@suse.cz> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/hugetlb.c59
-rw-r--r--mm/memory.c7
2 files changed, 40 insertions, 26 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b1e0ed1ea912..e54b695336f9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -24,8 +24,9 @@
24 24
25#include <asm/page.h> 25#include <asm/page.h>
26#include <asm/pgtable.h> 26#include <asm/pgtable.h>
27#include <linux/io.h> 27#include <asm/tlb.h>
28 28
29#include <linux/io.h>
29#include <linux/hugetlb.h> 30#include <linux/hugetlb.h>
30#include <linux/node.h> 31#include <linux/node.h>
31#include "internal.h" 32#include "internal.h"
@@ -2310,30 +2311,26 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte)
2310 return 0; 2311 return 0;
2311} 2312}
2312 2313
2313void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, 2314void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
2314 unsigned long end, struct page *ref_page) 2315 unsigned long start, unsigned long end,
2316 struct page *ref_page)
2315{ 2317{
2318 int force_flush = 0;
2316 struct mm_struct *mm = vma->vm_mm; 2319 struct mm_struct *mm = vma->vm_mm;
2317 unsigned long address; 2320 unsigned long address;
2318 pte_t *ptep; 2321 pte_t *ptep;
2319 pte_t pte; 2322 pte_t pte;
2320 struct page *page; 2323 struct page *page;
2321 struct page *tmp;
2322 struct hstate *h = hstate_vma(vma); 2324 struct hstate *h = hstate_vma(vma);
2323 unsigned long sz = huge_page_size(h); 2325 unsigned long sz = huge_page_size(h);
2324 2326
2325 /*
2326 * A page gathering list, protected by per file i_mmap_mutex. The
2327 * lock is used to avoid list corruption from multiple unmapping
2328 * of the same page since we are using page->lru.
2329 */
2330 LIST_HEAD(page_list);
2331
2332 WARN_ON(!is_vm_hugetlb_page(vma)); 2327 WARN_ON(!is_vm_hugetlb_page(vma));
2333 BUG_ON(start & ~huge_page_mask(h)); 2328 BUG_ON(start & ~huge_page_mask(h));
2334 BUG_ON(end & ~huge_page_mask(h)); 2329 BUG_ON(end & ~huge_page_mask(h));
2335 2330
2331 tlb_start_vma(tlb, vma);
2336 mmu_notifier_invalidate_range_start(mm, start, end); 2332 mmu_notifier_invalidate_range_start(mm, start, end);
2333again:
2337 spin_lock(&mm->page_table_lock); 2334 spin_lock(&mm->page_table_lock);
2338 for (address = start; address < end; address += sz) { 2335 for (address = start; address < end; address += sz) {
2339 ptep = huge_pte_offset(mm, address); 2336 ptep = huge_pte_offset(mm, address);
@@ -2372,30 +2369,45 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
2372 } 2369 }
2373 2370
2374 pte = huge_ptep_get_and_clear(mm, address, ptep); 2371 pte = huge_ptep_get_and_clear(mm, address, ptep);
2372 tlb_remove_tlb_entry(tlb, ptep, address);
2375 if (pte_dirty(pte)) 2373 if (pte_dirty(pte))
2376 set_page_dirty(page); 2374 set_page_dirty(page);
2377 list_add(&page->lru, &page_list);
2378 2375
2376 page_remove_rmap(page);
2377 force_flush = !__tlb_remove_page(tlb, page);
2378 if (force_flush)
2379 break;
2379 /* Bail out after unmapping reference page if supplied */ 2380 /* Bail out after unmapping reference page if supplied */
2380 if (ref_page) 2381 if (ref_page)
2381 break; 2382 break;
2382 } 2383 }
2383 flush_tlb_range(vma, start, end);
2384 spin_unlock(&mm->page_table_lock); 2384 spin_unlock(&mm->page_table_lock);
2385 mmu_notifier_invalidate_range_end(mm, start, end); 2385 /*
2386 list_for_each_entry_safe(page, tmp, &page_list, lru) { 2386 * mmu_gather ran out of room to batch pages, we break out of
2387 page_remove_rmap(page); 2387 * the PTE lock to avoid doing the potential expensive TLB invalidate
2388 list_del(&page->lru); 2388 * and page-free while holding it.
2389 put_page(page); 2389 */
2390 if (force_flush) {
2391 force_flush = 0;
2392 tlb_flush_mmu(tlb);
2393 if (address < end && !ref_page)
2394 goto again;
2390 } 2395 }
2396 mmu_notifier_invalidate_range_end(mm, start, end);
2397 tlb_end_vma(tlb, vma);
2391} 2398}
2392 2399
2393void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, 2400void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
2394 unsigned long end, struct page *ref_page) 2401 unsigned long end, struct page *ref_page)
2395{ 2402{
2396 mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); 2403 struct mm_struct *mm;
2397 __unmap_hugepage_range(vma, start, end, ref_page); 2404 struct mmu_gather tlb;
2398 mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); 2405
2406 mm = vma->vm_mm;
2407
2408 tlb_gather_mmu(&tlb, mm, 0);
2409 __unmap_hugepage_range(&tlb, vma, start, end, ref_page);
2410 tlb_finish_mmu(&tlb, start, end);
2399} 2411}
2400 2412
2401/* 2413/*
@@ -2440,9 +2452,8 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
2440 * from the time of fork. This would look like data corruption 2452 * from the time of fork. This would look like data corruption
2441 */ 2453 */
2442 if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) 2454 if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER))
2443 __unmap_hugepage_range(iter_vma, 2455 unmap_hugepage_range(iter_vma, address,
2444 address, address + huge_page_size(h), 2456 address + huge_page_size(h), page);
2445 page);
2446 } 2457 }
2447 mutex_unlock(&mapping->i_mmap_mutex); 2458 mutex_unlock(&mapping->i_mmap_mutex);
2448 2459
diff --git a/mm/memory.c b/mm/memory.c
index 91f69459d3e8..59e5bebc2e35 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1343,8 +1343,11 @@ static void unmap_single_vma(struct mmu_gather *tlb,
1343 * Since no pte has actually been setup, it is 1343 * Since no pte has actually been setup, it is
1344 * safe to do nothing in this case. 1344 * safe to do nothing in this case.
1345 */ 1345 */
1346 if (vma->vm_file) 1346 if (vma->vm_file) {
1347 unmap_hugepage_range(vma, start, end, NULL); 1347 mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
1348 __unmap_hugepage_range(tlb, vma, start, end, NULL);
1349 mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
1350 }
1348 } else 1351 } else
1349 unmap_page_range(tlb, vma, start, end, details); 1352 unmap_page_range(tlb, vma, start, end, details);
1350 } 1353 }