diff options
author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2012-07-31 19:42:03 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-31 21:42:40 -0400 |
commit | 24669e58477e2752c1fbca9c1c988e9dd0d79d15 (patch) | |
tree | a4fe04fc1cc2b5a2d89b67f745185b9e9640426f /mm | |
parent | 972dc4de13f667a7df27ee32573b2e6fc6cc8434 (diff) |
hugetlb: use mmu_gather instead of a temporary linked list for accumulating pages
Use a mmu_gather instead of a temporary linked list for accumulating pages
when we unmap a hugepage range
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 59 | ||||
-rw-r--r-- | mm/memory.c | 7 |
2 files changed, 40 insertions, 26 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b1e0ed1ea912..e54b695336f9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -24,8 +24,9 @@ | |||
24 | 24 | ||
25 | #include <asm/page.h> | 25 | #include <asm/page.h> |
26 | #include <asm/pgtable.h> | 26 | #include <asm/pgtable.h> |
27 | #include <linux/io.h> | 27 | #include <asm/tlb.h> |
28 | 28 | ||
29 | #include <linux/io.h> | ||
29 | #include <linux/hugetlb.h> | 30 | #include <linux/hugetlb.h> |
30 | #include <linux/node.h> | 31 | #include <linux/node.h> |
31 | #include "internal.h" | 32 | #include "internal.h" |
@@ -2310,30 +2311,26 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte) | |||
2310 | return 0; | 2311 | return 0; |
2311 | } | 2312 | } |
2312 | 2313 | ||
2313 | void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | 2314 | void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, |
2314 | unsigned long end, struct page *ref_page) | 2315 | unsigned long start, unsigned long end, |
2316 | struct page *ref_page) | ||
2315 | { | 2317 | { |
2318 | int force_flush = 0; | ||
2316 | struct mm_struct *mm = vma->vm_mm; | 2319 | struct mm_struct *mm = vma->vm_mm; |
2317 | unsigned long address; | 2320 | unsigned long address; |
2318 | pte_t *ptep; | 2321 | pte_t *ptep; |
2319 | pte_t pte; | 2322 | pte_t pte; |
2320 | struct page *page; | 2323 | struct page *page; |
2321 | struct page *tmp; | ||
2322 | struct hstate *h = hstate_vma(vma); | 2324 | struct hstate *h = hstate_vma(vma); |
2323 | unsigned long sz = huge_page_size(h); | 2325 | unsigned long sz = huge_page_size(h); |
2324 | 2326 | ||
2325 | /* | ||
2326 | * A page gathering list, protected by per file i_mmap_mutex. The | ||
2327 | * lock is used to avoid list corruption from multiple unmapping | ||
2328 | * of the same page since we are using page->lru. | ||
2329 | */ | ||
2330 | LIST_HEAD(page_list); | ||
2331 | |||
2332 | WARN_ON(!is_vm_hugetlb_page(vma)); | 2327 | WARN_ON(!is_vm_hugetlb_page(vma)); |
2333 | BUG_ON(start & ~huge_page_mask(h)); | 2328 | BUG_ON(start & ~huge_page_mask(h)); |
2334 | BUG_ON(end & ~huge_page_mask(h)); | 2329 | BUG_ON(end & ~huge_page_mask(h)); |
2335 | 2330 | ||
2331 | tlb_start_vma(tlb, vma); | ||
2336 | mmu_notifier_invalidate_range_start(mm, start, end); | 2332 | mmu_notifier_invalidate_range_start(mm, start, end); |
2333 | again: | ||
2337 | spin_lock(&mm->page_table_lock); | 2334 | spin_lock(&mm->page_table_lock); |
2338 | for (address = start; address < end; address += sz) { | 2335 | for (address = start; address < end; address += sz) { |
2339 | ptep = huge_pte_offset(mm, address); | 2336 | ptep = huge_pte_offset(mm, address); |
@@ -2372,30 +2369,45 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
2372 | } | 2369 | } |
2373 | 2370 | ||
2374 | pte = huge_ptep_get_and_clear(mm, address, ptep); | 2371 | pte = huge_ptep_get_and_clear(mm, address, ptep); |
2372 | tlb_remove_tlb_entry(tlb, ptep, address); | ||
2375 | if (pte_dirty(pte)) | 2373 | if (pte_dirty(pte)) |
2376 | set_page_dirty(page); | 2374 | set_page_dirty(page); |
2377 | list_add(&page->lru, &page_list); | ||
2378 | 2375 | ||
2376 | page_remove_rmap(page); | ||
2377 | force_flush = !__tlb_remove_page(tlb, page); | ||
2378 | if (force_flush) | ||
2379 | break; | ||
2379 | /* Bail out after unmapping reference page if supplied */ | 2380 | /* Bail out after unmapping reference page if supplied */ |
2380 | if (ref_page) | 2381 | if (ref_page) |
2381 | break; | 2382 | break; |
2382 | } | 2383 | } |
2383 | flush_tlb_range(vma, start, end); | ||
2384 | spin_unlock(&mm->page_table_lock); | 2384 | spin_unlock(&mm->page_table_lock); |
2385 | mmu_notifier_invalidate_range_end(mm, start, end); | 2385 | /* |
2386 | list_for_each_entry_safe(page, tmp, &page_list, lru) { | 2386 | * mmu_gather ran out of room to batch pages, we break out of |
2387 | page_remove_rmap(page); | 2387 | * the PTE lock to avoid doing the potential expensive TLB invalidate |
2388 | list_del(&page->lru); | 2388 | * and page-free while holding it. |
2389 | put_page(page); | 2389 | */ |
2390 | if (force_flush) { | ||
2391 | force_flush = 0; | ||
2392 | tlb_flush_mmu(tlb); | ||
2393 | if (address < end && !ref_page) | ||
2394 | goto again; | ||
2390 | } | 2395 | } |
2396 | mmu_notifier_invalidate_range_end(mm, start, end); | ||
2397 | tlb_end_vma(tlb, vma); | ||
2391 | } | 2398 | } |
2392 | 2399 | ||
2393 | void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | 2400 | void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, |
2394 | unsigned long end, struct page *ref_page) | 2401 | unsigned long end, struct page *ref_page) |
2395 | { | 2402 | { |
2396 | mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); | 2403 | struct mm_struct *mm; |
2397 | __unmap_hugepage_range(vma, start, end, ref_page); | 2404 | struct mmu_gather tlb; |
2398 | mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); | 2405 | |
2406 | mm = vma->vm_mm; | ||
2407 | |||
2408 | tlb_gather_mmu(&tlb, mm, 0); | ||
2409 | __unmap_hugepage_range(&tlb, vma, start, end, ref_page); | ||
2410 | tlb_finish_mmu(&tlb, start, end); | ||
2399 | } | 2411 | } |
2400 | 2412 | ||
2401 | /* | 2413 | /* |
@@ -2440,9 +2452,8 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2440 | * from the time of fork. This would look like data corruption | 2452 | * from the time of fork. This would look like data corruption |
2441 | */ | 2453 | */ |
2442 | if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) | 2454 | if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) |
2443 | __unmap_hugepage_range(iter_vma, | 2455 | unmap_hugepage_range(iter_vma, address, |
2444 | address, address + huge_page_size(h), | 2456 | address + huge_page_size(h), page); |
2445 | page); | ||
2446 | } | 2457 | } |
2447 | mutex_unlock(&mapping->i_mmap_mutex); | 2458 | mutex_unlock(&mapping->i_mmap_mutex); |
2448 | 2459 | ||
diff --git a/mm/memory.c b/mm/memory.c index 91f69459d3e8..59e5bebc2e35 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1343,8 +1343,11 @@ static void unmap_single_vma(struct mmu_gather *tlb, | |||
1343 | * Since no pte has actually been setup, it is | 1343 | * Since no pte has actually been setup, it is |
1344 | * safe to do nothing in this case. | 1344 | * safe to do nothing in this case. |
1345 | */ | 1345 | */ |
1346 | if (vma->vm_file) | 1346 | if (vma->vm_file) { |
1347 | unmap_hugepage_range(vma, start, end, NULL); | 1347 | mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); |
1348 | __unmap_hugepage_range(tlb, vma, start, end, NULL); | ||
1349 | mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); | ||
1350 | } | ||
1348 | } else | 1351 | } else |
1349 | unmap_page_range(tlb, vma, start, end, details); | 1352 | unmap_page_range(tlb, vma, start, end, details); |
1350 | } | 1353 | } |