aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/hugetlbfs/inode.c4
-rw-r--r--include/linux/hugetlb.h22
-rw-r--r--mm/hugetlb.c59
-rw-r--r--mm/memory.c7
4 files changed, 59 insertions, 33 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e13e9bdb0bf5..8349a899912e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -416,8 +416,8 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
416 else 416 else
417 v_offset = 0; 417 v_offset = 0;
418 418
419 __unmap_hugepage_range(vma, 419 unmap_hugepage_range(vma, vma->vm_start + v_offset,
420 vma->vm_start + v_offset, vma->vm_end, NULL); 420 vma->vm_end, NULL);
421 } 421 }
422} 422}
423 423
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 217f52859fa7..0f23c1840c9b 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -7,6 +7,7 @@
7 7
8struct ctl_table; 8struct ctl_table;
9struct user_struct; 9struct user_struct;
10struct mmu_gather;
10 11
11#ifdef CONFIG_HUGETLB_PAGE 12#ifdef CONFIG_HUGETLB_PAGE
12 13
@@ -40,9 +41,10 @@ int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
40 struct page **, struct vm_area_struct **, 41 struct page **, struct vm_area_struct **,
41 unsigned long *, int *, int, unsigned int flags); 42 unsigned long *, int *, int, unsigned int flags);
42void unmap_hugepage_range(struct vm_area_struct *, 43void unmap_hugepage_range(struct vm_area_struct *,
43 unsigned long, unsigned long, struct page *); 44 unsigned long, unsigned long, struct page *);
44void __unmap_hugepage_range(struct vm_area_struct *, 45void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
45 unsigned long, unsigned long, struct page *); 46 unsigned long start, unsigned long end,
47 struct page *ref_page);
46int hugetlb_prefault(struct address_space *, struct vm_area_struct *); 48int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
47void hugetlb_report_meminfo(struct seq_file *); 49void hugetlb_report_meminfo(struct seq_file *);
48int hugetlb_report_node_meminfo(int, char *); 50int hugetlb_report_node_meminfo(int, char *);
@@ -98,7 +100,6 @@ static inline unsigned long hugetlb_total_pages(void)
98#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) 100#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
99#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) 101#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
100#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) 102#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; })
101#define unmap_hugepage_range(vma, start, end, page) BUG()
102static inline void hugetlb_report_meminfo(struct seq_file *m) 103static inline void hugetlb_report_meminfo(struct seq_file *m)
103{ 104{
104} 105}
@@ -112,13 +113,24 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
112#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) 113#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
113#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) 114#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; })
114#define huge_pte_offset(mm, address) 0 115#define huge_pte_offset(mm, address) 0
115#define dequeue_hwpoisoned_huge_page(page) 0 116static inline int dequeue_hwpoisoned_huge_page(struct page *page)
117{
118 return 0;
119}
120
116static inline void copy_huge_page(struct page *dst, struct page *src) 121static inline void copy_huge_page(struct page *dst, struct page *src)
117{ 122{
118} 123}
119 124
120#define hugetlb_change_protection(vma, address, end, newprot) 125#define hugetlb_change_protection(vma, address, end, newprot)
121 126
127static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
128 struct vm_area_struct *vma, unsigned long start,
129 unsigned long end, struct page *ref_page)
130{
131 BUG();
132}
133
122#endif /* !CONFIG_HUGETLB_PAGE */ 134#endif /* !CONFIG_HUGETLB_PAGE */
123 135
124#define HUGETLB_ANON_FILE "anon_hugepage" 136#define HUGETLB_ANON_FILE "anon_hugepage"
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b1e0ed1ea912..e54b695336f9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -24,8 +24,9 @@
24 24
25#include <asm/page.h> 25#include <asm/page.h>
26#include <asm/pgtable.h> 26#include <asm/pgtable.h>
27#include <linux/io.h> 27#include <asm/tlb.h>
28 28
29#include <linux/io.h>
29#include <linux/hugetlb.h> 30#include <linux/hugetlb.h>
30#include <linux/node.h> 31#include <linux/node.h>
31#include "internal.h" 32#include "internal.h"
@@ -2310,30 +2311,26 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte)
2310 return 0; 2311 return 0;
2311} 2312}
2312 2313
2313void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, 2314void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
2314 unsigned long end, struct page *ref_page) 2315 unsigned long start, unsigned long end,
2316 struct page *ref_page)
2315{ 2317{
2318 int force_flush = 0;
2316 struct mm_struct *mm = vma->vm_mm; 2319 struct mm_struct *mm = vma->vm_mm;
2317 unsigned long address; 2320 unsigned long address;
2318 pte_t *ptep; 2321 pte_t *ptep;
2319 pte_t pte; 2322 pte_t pte;
2320 struct page *page; 2323 struct page *page;
2321 struct page *tmp;
2322 struct hstate *h = hstate_vma(vma); 2324 struct hstate *h = hstate_vma(vma);
2323 unsigned long sz = huge_page_size(h); 2325 unsigned long sz = huge_page_size(h);
2324 2326
2325 /*
2326 * A page gathering list, protected by per file i_mmap_mutex. The
2327 * lock is used to avoid list corruption from multiple unmapping
2328 * of the same page since we are using page->lru.
2329 */
2330 LIST_HEAD(page_list);
2331
2332 WARN_ON(!is_vm_hugetlb_page(vma)); 2327 WARN_ON(!is_vm_hugetlb_page(vma));
2333 BUG_ON(start & ~huge_page_mask(h)); 2328 BUG_ON(start & ~huge_page_mask(h));
2334 BUG_ON(end & ~huge_page_mask(h)); 2329 BUG_ON(end & ~huge_page_mask(h));
2335 2330
2331 tlb_start_vma(tlb, vma);
2336 mmu_notifier_invalidate_range_start(mm, start, end); 2332 mmu_notifier_invalidate_range_start(mm, start, end);
2333again:
2337 spin_lock(&mm->page_table_lock); 2334 spin_lock(&mm->page_table_lock);
2338 for (address = start; address < end; address += sz) { 2335 for (address = start; address < end; address += sz) {
2339 ptep = huge_pte_offset(mm, address); 2336 ptep = huge_pte_offset(mm, address);
@@ -2372,30 +2369,45 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
2372 } 2369 }
2373 2370
2374 pte = huge_ptep_get_and_clear(mm, address, ptep); 2371 pte = huge_ptep_get_and_clear(mm, address, ptep);
2372 tlb_remove_tlb_entry(tlb, ptep, address);
2375 if (pte_dirty(pte)) 2373 if (pte_dirty(pte))
2376 set_page_dirty(page); 2374 set_page_dirty(page);
2377 list_add(&page->lru, &page_list);
2378 2375
2376 page_remove_rmap(page);
2377 force_flush = !__tlb_remove_page(tlb, page);
2378 if (force_flush)
2379 break;
2379 /* Bail out after unmapping reference page if supplied */ 2380 /* Bail out after unmapping reference page if supplied */
2380 if (ref_page) 2381 if (ref_page)
2381 break; 2382 break;
2382 } 2383 }
2383 flush_tlb_range(vma, start, end);
2384 spin_unlock(&mm->page_table_lock); 2384 spin_unlock(&mm->page_table_lock);
2385 mmu_notifier_invalidate_range_end(mm, start, end); 2385 /*
2386 list_for_each_entry_safe(page, tmp, &page_list, lru) { 2386 * mmu_gather ran out of room to batch pages, we break out of
2387 page_remove_rmap(page); 2387 * the PTE lock to avoid doing the potential expensive TLB invalidate
2388 list_del(&page->lru); 2388 * and page-free while holding it.
2389 put_page(page); 2389 */
2390 if (force_flush) {
2391 force_flush = 0;
2392 tlb_flush_mmu(tlb);
2393 if (address < end && !ref_page)
2394 goto again;
2390 } 2395 }
2396 mmu_notifier_invalidate_range_end(mm, start, end);
2397 tlb_end_vma(tlb, vma);
2391} 2398}
2392 2399
2393void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, 2400void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
2394 unsigned long end, struct page *ref_page) 2401 unsigned long end, struct page *ref_page)
2395{ 2402{
2396 mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); 2403 struct mm_struct *mm;
2397 __unmap_hugepage_range(vma, start, end, ref_page); 2404 struct mmu_gather tlb;
2398 mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); 2405
2406 mm = vma->vm_mm;
2407
2408 tlb_gather_mmu(&tlb, mm, 0);
2409 __unmap_hugepage_range(&tlb, vma, start, end, ref_page);
2410 tlb_finish_mmu(&tlb, start, end);
2399} 2411}
2400 2412
2401/* 2413/*
@@ -2440,9 +2452,8 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
2440 * from the time of fork. This would look like data corruption 2452 * from the time of fork. This would look like data corruption
2441 */ 2453 */
2442 if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) 2454 if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER))
2443 __unmap_hugepage_range(iter_vma, 2455 unmap_hugepage_range(iter_vma, address,
2444 address, address + huge_page_size(h), 2456 address + huge_page_size(h), page);
2445 page);
2446 } 2457 }
2447 mutex_unlock(&mapping->i_mmap_mutex); 2458 mutex_unlock(&mapping->i_mmap_mutex);
2448 2459
diff --git a/mm/memory.c b/mm/memory.c
index 91f69459d3e8..59e5bebc2e35 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1343,8 +1343,11 @@ static void unmap_single_vma(struct mmu_gather *tlb,
1343 * Since no pte has actually been setup, it is 1343 * Since no pte has actually been setup, it is
1344 * safe to do nothing in this case. 1344 * safe to do nothing in this case.
1345 */ 1345 */
1346 if (vma->vm_file) 1346 if (vma->vm_file) {
1347 unmap_hugepage_range(vma, start, end, NULL); 1347 mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex);
1348 __unmap_hugepage_range(tlb, vma, start, end, NULL);
1349 mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
1350 }
1348 } else 1351 } else
1349 unmap_page_range(tlb, vma, start, end, details); 1352 unmap_page_range(tlb, vma, start, end, details);
1350 } 1353 }