diff options
-rw-r--r-- | include/linux/ksm.h | 50 | ||||
-rw-r--r-- | include/linux/mm.h | 1 | ||||
-rw-r--r-- | include/linux/sched.h | 7 | ||||
-rw-r--r-- | kernel/fork.c | 8 | ||||
-rw-r--r-- | mm/Kconfig | 11 | ||||
-rw-r--r-- | mm/Makefile | 1 | ||||
-rw-r--r-- | mm/ksm.c | 56 | ||||
-rw-r--r-- | mm/madvise.c | 14 |
8 files changed, 147 insertions, 1 deletions
diff --git a/include/linux/ksm.h b/include/linux/ksm.h new file mode 100644 index 000000000000..eb2a448981ee --- /dev/null +++ b/include/linux/ksm.h | |||
@@ -0,0 +1,50 @@ | |||
1 | #ifndef __LINUX_KSM_H | ||
2 | #define __LINUX_KSM_H | ||
3 | /* | ||
4 | * Memory merging support. | ||
5 | * | ||
6 | * This code enables dynamic sharing of identical pages found in different | ||
7 | * memory areas, even if they are not shared by fork(). | ||
8 | */ | ||
9 | |||
10 | #include <linux/bitops.h> | ||
11 | #include <linux/mm.h> | ||
12 | #include <linux/sched.h> | ||
13 | |||
14 | #ifdef CONFIG_KSM | ||
15 | int ksm_madvise(struct vm_area_struct *vma, unsigned long start, | ||
16 | unsigned long end, int advice, unsigned long *vm_flags); | ||
17 | int __ksm_enter(struct mm_struct *mm); | ||
18 | void __ksm_exit(struct mm_struct *mm); | ||
19 | |||
20 | static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) | ||
21 | { | ||
22 | if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) | ||
23 | return __ksm_enter(mm); | ||
24 | return 0; | ||
25 | } | ||
26 | |||
27 | static inline void ksm_exit(struct mm_struct *mm) | ||
28 | { | ||
29 | if (test_bit(MMF_VM_MERGEABLE, &mm->flags)) | ||
30 | __ksm_exit(mm); | ||
31 | } | ||
32 | #else /* !CONFIG_KSM */ | ||
33 | |||
34 | static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, | ||
35 | unsigned long end, int advice, unsigned long *vm_flags) | ||
36 | { | ||
37 | return 0; | ||
38 | } | ||
39 | |||
40 | static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) | ||
41 | { | ||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | static inline void ksm_exit(struct mm_struct *mm) | ||
46 | { | ||
47 | } | ||
48 | #endif /* !CONFIG_KSM */ | ||
49 | |||
50 | #endif | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index d3c8ae7c8015..d808cf832c4d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -103,6 +103,7 @@ extern unsigned int kobjsize(const void *objp); | |||
103 | #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ | 103 | #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ |
104 | #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ | 104 | #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ |
105 | #define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */ | 105 | #define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */ |
106 | #define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ | ||
106 | 107 | ||
107 | #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ | 108 | #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ |
108 | #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS | 109 | #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 8fe351c3914a..8f3e63cb33a6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -434,7 +434,9 @@ extern int get_dumpable(struct mm_struct *mm); | |||
434 | /* dumpable bits */ | 434 | /* dumpable bits */ |
435 | #define MMF_DUMPABLE 0 /* core dump is permitted */ | 435 | #define MMF_DUMPABLE 0 /* core dump is permitted */ |
436 | #define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ | 436 | #define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ |
437 | |||
437 | #define MMF_DUMPABLE_BITS 2 | 438 | #define MMF_DUMPABLE_BITS 2 |
439 | #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) | ||
438 | 440 | ||
439 | /* coredump filter bits */ | 441 | /* coredump filter bits */ |
440 | #define MMF_DUMP_ANON_PRIVATE 2 | 442 | #define MMF_DUMP_ANON_PRIVATE 2 |
@@ -444,6 +446,7 @@ extern int get_dumpable(struct mm_struct *mm); | |||
444 | #define MMF_DUMP_ELF_HEADERS 6 | 446 | #define MMF_DUMP_ELF_HEADERS 6 |
445 | #define MMF_DUMP_HUGETLB_PRIVATE 7 | 447 | #define MMF_DUMP_HUGETLB_PRIVATE 7 |
446 | #define MMF_DUMP_HUGETLB_SHARED 8 | 448 | #define MMF_DUMP_HUGETLB_SHARED 8 |
449 | |||
447 | #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS | 450 | #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS |
448 | #define MMF_DUMP_FILTER_BITS 7 | 451 | #define MMF_DUMP_FILTER_BITS 7 |
449 | #define MMF_DUMP_FILTER_MASK \ | 452 | #define MMF_DUMP_FILTER_MASK \ |
@@ -457,6 +460,10 @@ extern int get_dumpable(struct mm_struct *mm); | |||
457 | #else | 460 | #else |
458 | # define MMF_DUMP_MASK_DEFAULT_ELF 0 | 461 | # define MMF_DUMP_MASK_DEFAULT_ELF 0 |
459 | #endif | 462 | #endif |
463 | /* leave room for more dump flags */ | ||
464 | #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ | ||
465 | |||
466 | #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) | ||
460 | 467 | ||
461 | struct sighand_struct { | 468 | struct sighand_struct { |
462 | atomic_t count; | 469 | atomic_t count; |
diff --git a/kernel/fork.c b/kernel/fork.c index d4638c8cc19e..73a442b7be6d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/ftrace.h> | 49 | #include <linux/ftrace.h> |
50 | #include <linux/profile.h> | 50 | #include <linux/profile.h> |
51 | #include <linux/rmap.h> | 51 | #include <linux/rmap.h> |
52 | #include <linux/ksm.h> | ||
52 | #include <linux/acct.h> | 53 | #include <linux/acct.h> |
53 | #include <linux/tsacct_kern.h> | 54 | #include <linux/tsacct_kern.h> |
54 | #include <linux/cn_proc.h> | 55 | #include <linux/cn_proc.h> |
@@ -299,6 +300,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
299 | rb_link = &mm->mm_rb.rb_node; | 300 | rb_link = &mm->mm_rb.rb_node; |
300 | rb_parent = NULL; | 301 | rb_parent = NULL; |
301 | pprev = &mm->mmap; | 302 | pprev = &mm->mmap; |
303 | retval = ksm_fork(mm, oldmm); | ||
304 | if (retval) | ||
305 | goto out; | ||
302 | 306 | ||
303 | for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { | 307 | for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { |
304 | struct file *file; | 308 | struct file *file; |
@@ -435,7 +439,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
435 | atomic_set(&mm->mm_count, 1); | 439 | atomic_set(&mm->mm_count, 1); |
436 | init_rwsem(&mm->mmap_sem); | 440 | init_rwsem(&mm->mmap_sem); |
437 | INIT_LIST_HEAD(&mm->mmlist); | 441 | INIT_LIST_HEAD(&mm->mmlist); |
438 | mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; | 442 | mm->flags = (current->mm) ? |
443 | (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; | ||
439 | mm->core_state = NULL; | 444 | mm->core_state = NULL; |
440 | mm->nr_ptes = 0; | 445 | mm->nr_ptes = 0; |
441 | set_mm_counter(mm, file_rss, 0); | 446 | set_mm_counter(mm, file_rss, 0); |
@@ -496,6 +501,7 @@ void mmput(struct mm_struct *mm) | |||
496 | 501 | ||
497 | if (atomic_dec_and_test(&mm->mm_users)) { | 502 | if (atomic_dec_and_test(&mm->mm_users)) { |
498 | exit_aio(mm); | 503 | exit_aio(mm); |
504 | ksm_exit(mm); | ||
499 | exit_mmap(mm); | 505 | exit_mmap(mm); |
500 | set_mm_exe_file(mm, NULL); | 506 | set_mm_exe_file(mm, NULL); |
501 | if (!list_empty(&mm->mmlist)) { | 507 | if (!list_empty(&mm->mmlist)) { |
diff --git a/mm/Kconfig b/mm/Kconfig index 3aa519f52e18..c0b6afa178a1 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -214,6 +214,17 @@ config HAVE_MLOCKED_PAGE_BIT | |||
214 | config MMU_NOTIFIER | 214 | config MMU_NOTIFIER |
215 | bool | 215 | bool |
216 | 216 | ||
217 | config KSM | ||
218 | bool "Enable KSM for page merging" | ||
219 | depends on MMU | ||
220 | help | ||
221 | Enable Kernel Samepage Merging: KSM periodically scans those areas | ||
222 | of an application's address space that an app has advised may be | ||
223 | mergeable. When it finds pages of identical content, it replaces | ||
224 | the many instances by a single resident page with that content, so | ||
225 | saving memory until one or another app needs to modify the content. | ||
226 | Recommended for use with KVM, or with other duplicative applications. | ||
227 | |||
217 | config DEFAULT_MMAP_MIN_ADDR | 228 | config DEFAULT_MMAP_MIN_ADDR |
218 | int "Low address space to protect from user allocation" | 229 | int "Low address space to protect from user allocation" |
219 | default 4096 | 230 | default 4096 |
diff --git a/mm/Makefile b/mm/Makefile index ea4b18bd3960..a63bf59a0c77 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -25,6 +25,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o | |||
25 | obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o | 25 | obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o |
26 | obj-$(CONFIG_SLOB) += slob.o | 26 | obj-$(CONFIG_SLOB) += slob.o |
27 | obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o | 27 | obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o |
28 | obj-$(CONFIG_KSM) += ksm.o | ||
28 | obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o | 29 | obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o |
29 | obj-$(CONFIG_SLAB) += slab.o | 30 | obj-$(CONFIG_SLAB) += slab.o |
30 | obj-$(CONFIG_SLUB) += slub.o | 31 | obj-$(CONFIG_SLUB) += slub.o |
diff --git a/mm/ksm.c b/mm/ksm.c new file mode 100644 index 000000000000..8b76008fcd32 --- /dev/null +++ b/mm/ksm.c | |||
@@ -0,0 +1,56 @@ | |||
1 | /* | ||
2 | * Initial dummy version just to illustrate KSM's interface to other files. | ||
3 | */ | ||
4 | |||
5 | #include <linux/errno.h> | ||
6 | #include <linux/mman.h> | ||
7 | #include <linux/ksm.h> | ||
8 | |||
9 | int ksm_madvise(struct vm_area_struct *vma, unsigned long start, | ||
10 | unsigned long end, int advice, unsigned long *vm_flags) | ||
11 | { | ||
12 | struct mm_struct *mm = vma->vm_mm; | ||
13 | |||
14 | switch (advice) { | ||
15 | case MADV_MERGEABLE: | ||
16 | /* | ||
17 | * Be somewhat over-protective for now! | ||
18 | */ | ||
19 | if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | | ||
20 | VM_PFNMAP | VM_IO | VM_DONTEXPAND | | ||
21 | VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | | ||
22 | VM_MIXEDMAP | VM_SAO)) | ||
23 | return 0; /* just ignore the advice */ | ||
24 | |||
25 | if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) | ||
26 | if (__ksm_enter(mm) < 0) | ||
27 | return -EAGAIN; | ||
28 | |||
29 | *vm_flags |= VM_MERGEABLE; | ||
30 | break; | ||
31 | |||
32 | case MADV_UNMERGEABLE: | ||
33 | if (!(*vm_flags & VM_MERGEABLE)) | ||
34 | return 0; /* just ignore the advice */ | ||
35 | |||
36 | /* Unmerge any merged pages here */ | ||
37 | |||
38 | *vm_flags &= ~VM_MERGEABLE; | ||
39 | break; | ||
40 | } | ||
41 | |||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | int __ksm_enter(struct mm_struct *mm) | ||
46 | { | ||
47 | /* Allocate a structure to track mm and link it into KSM's list */ | ||
48 | set_bit(MMF_VM_MERGEABLE, &mm->flags); | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | void __ksm_exit(struct mm_struct *mm) | ||
53 | { | ||
54 | /* Unlink and free all KSM's structures which track this mm */ | ||
55 | clear_bit(MMF_VM_MERGEABLE, &mm->flags); | ||
56 | } | ||
diff --git a/mm/madvise.c b/mm/madvise.c index 66c31264f062..d9ae2067952e 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/mempolicy.h> | 11 | #include <linux/mempolicy.h> |
12 | #include <linux/hugetlb.h> | 12 | #include <linux/hugetlb.h> |
13 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
14 | #include <linux/ksm.h> | ||
14 | 15 | ||
15 | /* | 16 | /* |
16 | * Any behaviour which results in changes to the vma->vm_flags needs to | 17 | * Any behaviour which results in changes to the vma->vm_flags needs to |
@@ -63,6 +64,12 @@ static long madvise_behavior(struct vm_area_struct * vma, | |||
63 | } | 64 | } |
64 | new_flags &= ~VM_DONTCOPY; | 65 | new_flags &= ~VM_DONTCOPY; |
65 | break; | 66 | break; |
67 | case MADV_MERGEABLE: | ||
68 | case MADV_UNMERGEABLE: | ||
69 | error = ksm_madvise(vma, start, end, behavior, &new_flags); | ||
70 | if (error) | ||
71 | goto out; | ||
72 | break; | ||
66 | } | 73 | } |
67 | 74 | ||
68 | if (new_flags == vma->vm_flags) { | 75 | if (new_flags == vma->vm_flags) { |
@@ -239,6 +246,10 @@ madvise_behavior_valid(int behavior) | |||
239 | case MADV_REMOVE: | 246 | case MADV_REMOVE: |
240 | case MADV_WILLNEED: | 247 | case MADV_WILLNEED: |
241 | case MADV_DONTNEED: | 248 | case MADV_DONTNEED: |
249 | #ifdef CONFIG_KSM | ||
250 | case MADV_MERGEABLE: | ||
251 | case MADV_UNMERGEABLE: | ||
252 | #endif | ||
242 | return 1; | 253 | return 1; |
243 | 254 | ||
244 | default: | 255 | default: |
@@ -273,6 +284,9 @@ madvise_behavior_valid(int behavior) | |||
273 | * MADV_DONTFORK - omit this area from child's address space when forking: | 284 | * MADV_DONTFORK - omit this area from child's address space when forking: |
274 | * typically, to avoid COWing pages pinned by get_user_pages(). | 285 | * typically, to avoid COWing pages pinned by get_user_pages(). |
275 | * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking. | 286 | * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking. |
287 | * MADV_MERGEABLE - the application recommends that KSM try to merge pages in | ||
288 | * this area with pages of identical content from other such areas. | ||
289 | * MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others. | ||
276 | * | 290 | * |
277 | * return values: | 291 | * return values: |
278 | * zero - success | 292 | * zero - success |