diff options
author | Andrea Arcangeli <aarcange@redhat.com> | 2011-01-13 18:47:17 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 20:32:47 -0500 |
commit | a664b2d8555c659127bf8fe049a58449d394a707 (patch) | |
tree | 14771f4ab93a9dda98174f21e0361a77e2aebfa6 | |
parent | 1ddd6db43a08cba56c7ee920800980862086f1c3 (diff) |
thp: madvise(MADV_NOHUGEPAGE)
Add madvise MADV_NOHUGEPAGE to mark regions that are not important to be
hugepage backed. Return -EINVAL if the vma is not of an anonymous type,
or the feature isn't built into the kernel. Never silently return
success.
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/huge_mm.h | 14 | ||||
-rw-r--r-- | include/linux/khugepaged.h | 7 | ||||
-rw-r--r-- | include/linux/mm.h | 1 | ||||
-rw-r--r-- | mm/huge_memory.c | 41 | ||||
-rw-r--r-- | mm/madvise.c | 4 |
5 files changed, 46 insertions, 21 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 9b48c24df260..a8b7e42d19ec 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h | |||
@@ -52,10 +52,12 @@ extern pmd_t *page_check_address_pmd(struct page *page, | |||
52 | #define HPAGE_PMD_SIZE HPAGE_SIZE | 52 | #define HPAGE_PMD_SIZE HPAGE_SIZE |
53 | 53 | ||
54 | #define transparent_hugepage_enabled(__vma) \ | 54 | #define transparent_hugepage_enabled(__vma) \ |
55 | (transparent_hugepage_flags & (1<<TRANSPARENT_HUGEPAGE_FLAG) || \ | 55 | ((transparent_hugepage_flags & \ |
56 | (transparent_hugepage_flags & \ | 56 | (1<<TRANSPARENT_HUGEPAGE_FLAG) || \ |
57 | (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) && \ | 57 | (transparent_hugepage_flags & \ |
58 | (__vma)->vm_flags & VM_HUGEPAGE)) | 58 | (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) && \ |
59 | ((__vma)->vm_flags & VM_HUGEPAGE))) && \ | ||
60 | !((__vma)->vm_flags & VM_NOHUGEPAGE)) | ||
59 | #define transparent_hugepage_defrag(__vma) \ | 61 | #define transparent_hugepage_defrag(__vma) \ |
60 | ((transparent_hugepage_flags & \ | 62 | ((transparent_hugepage_flags & \ |
61 | (1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)) || \ | 63 | (1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)) || \ |
@@ -103,7 +105,7 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd); | |||
103 | #if HPAGE_PMD_ORDER > MAX_ORDER | 105 | #if HPAGE_PMD_ORDER > MAX_ORDER |
104 | #error "hugepages can't be allocated by the buddy allocator" | 106 | #error "hugepages can't be allocated by the buddy allocator" |
105 | #endif | 107 | #endif |
106 | extern int hugepage_madvise(unsigned long *vm_flags); | 108 | extern int hugepage_madvise(unsigned long *vm_flags, int advice); |
107 | extern void __vma_adjust_trans_huge(struct vm_area_struct *vma, | 109 | extern void __vma_adjust_trans_huge(struct vm_area_struct *vma, |
108 | unsigned long start, | 110 | unsigned long start, |
109 | unsigned long end, | 111 | unsigned long end, |
@@ -141,7 +143,7 @@ static inline int split_huge_page(struct page *page) | |||
141 | do { } while (0) | 143 | do { } while (0) |
142 | #define wait_split_huge_page(__anon_vma, __pmd) \ | 144 | #define wait_split_huge_page(__anon_vma, __pmd) \ |
143 | do { } while (0) | 145 | do { } while (0) |
144 | static inline int hugepage_madvise(unsigned long *vm_flags) | 146 | static inline int hugepage_madvise(unsigned long *vm_flags, int advice) |
145 | { | 147 | { |
146 | BUG(); | 148 | BUG(); |
147 | return 0; | 149 | return 0; |
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 552f3184756c..6b394f0b5148 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h | |||
@@ -38,9 +38,10 @@ static inline void khugepaged_exit(struct mm_struct *mm) | |||
38 | static inline int khugepaged_enter(struct vm_area_struct *vma) | 38 | static inline int khugepaged_enter(struct vm_area_struct *vma) |
39 | { | 39 | { |
40 | if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags)) | 40 | if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags)) |
41 | if (khugepaged_always() || | 41 | if ((khugepaged_always() || |
42 | (khugepaged_req_madv() && | 42 | (khugepaged_req_madv() && |
43 | vma->vm_flags & VM_HUGEPAGE)) | 43 | vma->vm_flags & VM_HUGEPAGE)) && |
44 | !(vma->vm_flags & VM_NOHUGEPAGE)) | ||
44 | if (__khugepaged_enter(vma->vm_mm)) | 45 | if (__khugepaged_enter(vma->vm_mm)) |
45 | return -ENOMEM; | 46 | return -ENOMEM; |
46 | return 0; | 47 | return 0; |
diff --git a/include/linux/mm.h b/include/linux/mm.h index ce97a2bb0b19..956a35532f47 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -83,6 +83,7 @@ extern unsigned int kobjsize(const void *objp); | |||
83 | #define VM_GROWSUP 0x00000200 | 83 | #define VM_GROWSUP 0x00000200 |
84 | #else | 84 | #else |
85 | #define VM_GROWSUP 0x00000000 | 85 | #define VM_GROWSUP 0x00000000 |
86 | #define VM_NOHUGEPAGE 0x00000200 /* MADV_NOHUGEPAGE marked this vma */ | ||
86 | #endif | 87 | #endif |
87 | #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ | 88 | #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ |
88 | #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ | 89 | #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f4f6041176a4..fce667c0281d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/kthread.h> | 16 | #include <linux/kthread.h> |
17 | #include <linux/khugepaged.h> | 17 | #include <linux/khugepaged.h> |
18 | #include <linux/freezer.h> | 18 | #include <linux/freezer.h> |
19 | #include <linux/mman.h> | ||
19 | #include <asm/tlb.h> | 20 | #include <asm/tlb.h> |
20 | #include <asm/pgalloc.h> | 21 | #include <asm/pgalloc.h> |
21 | #include "internal.h" | 22 | #include "internal.h" |
@@ -1388,18 +1389,36 @@ out: | |||
1388 | return ret; | 1389 | return ret; |
1389 | } | 1390 | } |
1390 | 1391 | ||
1391 | int hugepage_madvise(unsigned long *vm_flags) | 1392 | int hugepage_madvise(unsigned long *vm_flags, int advice) |
1392 | { | 1393 | { |
1393 | /* | 1394 | switch (advice) { |
1394 | * Be somewhat over-protective like KSM for now! | 1395 | case MADV_HUGEPAGE: |
1395 | */ | 1396 | /* |
1396 | if (*vm_flags & (VM_HUGEPAGE | VM_SHARED | VM_MAYSHARE | | 1397 | * Be somewhat over-protective like KSM for now! |
1397 | VM_PFNMAP | VM_IO | VM_DONTEXPAND | | 1398 | */ |
1398 | VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | | 1399 | if (*vm_flags & (VM_HUGEPAGE | |
1399 | VM_MIXEDMAP | VM_SAO)) | 1400 | VM_SHARED | VM_MAYSHARE | |
1400 | return -EINVAL; | 1401 | VM_PFNMAP | VM_IO | VM_DONTEXPAND | |
1401 | 1402 | VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | | |
1402 | *vm_flags |= VM_HUGEPAGE; | 1403 | VM_MIXEDMAP | VM_SAO)) |
1404 | return -EINVAL; | ||
1405 | *vm_flags &= ~VM_NOHUGEPAGE; | ||
1406 | *vm_flags |= VM_HUGEPAGE; | ||
1407 | break; | ||
1408 | case MADV_NOHUGEPAGE: | ||
1409 | /* | ||
1410 | * Be somewhat over-protective like KSM for now! | ||
1411 | */ | ||
1412 | if (*vm_flags & (VM_NOHUGEPAGE | | ||
1413 | VM_SHARED | VM_MAYSHARE | | ||
1414 | VM_PFNMAP | VM_IO | VM_DONTEXPAND | | ||
1415 | VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | | ||
1416 | VM_MIXEDMAP | VM_SAO)) | ||
1417 | return -EINVAL; | ||
1418 | *vm_flags &= ~VM_HUGEPAGE; | ||
1419 | *vm_flags |= VM_NOHUGEPAGE; | ||
1420 | break; | ||
1421 | } | ||
1403 | 1422 | ||
1404 | return 0; | 1423 | return 0; |
1405 | } | 1424 | } |
diff --git a/mm/madvise.c b/mm/madvise.c index ecde40a401c1..bbac126e03ed 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -72,7 +72,8 @@ static long madvise_behavior(struct vm_area_struct * vma, | |||
72 | goto out; | 72 | goto out; |
73 | break; | 73 | break; |
74 | case MADV_HUGEPAGE: | 74 | case MADV_HUGEPAGE: |
75 | error = hugepage_madvise(&new_flags); | 75 | case MADV_NOHUGEPAGE: |
76 | error = hugepage_madvise(&new_flags, behavior); | ||
76 | if (error) | 77 | if (error) |
77 | goto out; | 78 | goto out; |
78 | break; | 79 | break; |
@@ -290,6 +291,7 @@ madvise_behavior_valid(int behavior) | |||
290 | #endif | 291 | #endif |
291 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 292 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
292 | case MADV_HUGEPAGE: | 293 | case MADV_HUGEPAGE: |
294 | case MADV_NOHUGEPAGE: | ||
293 | #endif | 295 | #endif |
294 | return 1; | 296 | return 1; |
295 | 297 | ||