aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh.dickins@tiscali.co.uk>2009-09-21 20:01:57 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-22 10:17:31 -0400
commitf8af4da3b4c14e7267c4ffb952079af3912c51c5 (patch)
tree17b0cfbd2d3d9abf9008f69e7fee5369cec7afa5
parentd19f352484467a5e518639ddff0554669c10ffab (diff)
ksm: the mm interface to ksm
This patch presents the mm interface to a dummy version of ksm.c, for better scrutiny of that interface: the real ksm.c follows later. When CONFIG_KSM is not set, madvise(2) reject MADV_MERGEABLE and MADV_UNMERGEABLE with EINVAL, since that seems more helpful than pretending that they can be serviced. But when CONFIG_KSM=y, accept them even if KSM is not currently running, and even on areas which KSM will not touch (e.g. hugetlb or shared file or special driver mappings). Like other madvices, report ENOMEM despite success if any area in the range is unmapped, and use EAGAIN to report out of memory. Define vma flag VM_MERGEABLE to identify an area on which KSM may try merging pages: leave it to ksm_madvise() to decide whether to set it. Define mm flag MMF_VM_MERGEABLE to identify an mm which might contain VM_MERGEABLE areas, to minimize callouts when forking or exiting. Based upon earlier patches by Chris Wright and Izik Eidus. Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk> Signed-off-by: Chris Wright <chrisw@redhat.com> Signed-off-by: Izik Eidus <ieidus@redhat.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Avi Kivity <avi@redhat.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/ksm.h50
-rw-r--r--include/linux/mm.h1
-rw-r--r--include/linux/sched.h7
-rw-r--r--kernel/fork.c8
-rw-r--r--mm/Kconfig11
-rw-r--r--mm/Makefile1
-rw-r--r--mm/ksm.c56
-rw-r--r--mm/madvise.c14
8 files changed, 147 insertions, 1 deletions
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
new file mode 100644
index 000000000000..eb2a448981ee
--- /dev/null
+++ b/include/linux/ksm.h
@@ -0,0 +1,50 @@
1#ifndef __LINUX_KSM_H
2#define __LINUX_KSM_H
3/*
4 * Memory merging support.
5 *
6 * This code enables dynamic sharing of identical pages found in different
7 * memory areas, even if they are not shared by fork().
8 */
9
10#include <linux/bitops.h>
11#include <linux/mm.h>
12#include <linux/sched.h>
13
14#ifdef CONFIG_KSM
15int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
16 unsigned long end, int advice, unsigned long *vm_flags);
17int __ksm_enter(struct mm_struct *mm);
18void __ksm_exit(struct mm_struct *mm);
19
20static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
21{
22 if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
23 return __ksm_enter(mm);
24 return 0;
25}
26
27static inline void ksm_exit(struct mm_struct *mm)
28{
29 if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
30 __ksm_exit(mm);
31}
32#else /* !CONFIG_KSM */
33
34static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
35 unsigned long end, int advice, unsigned long *vm_flags)
36{
37 return 0;
38}
39
40static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
41{
42 return 0;
43}
44
45static inline void ksm_exit(struct mm_struct *mm)
46{
47}
48#endif /* !CONFIG_KSM */
49
50#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d3c8ae7c8015..d808cf832c4d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -103,6 +103,7 @@ extern unsigned int kobjsize(const void *objp);
103#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ 103#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
104#define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ 104#define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */
105#define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */ 105#define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */
106#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */
106 107
107#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ 108#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
108#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS 109#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8fe351c3914a..8f3e63cb33a6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -434,7 +434,9 @@ extern int get_dumpable(struct mm_struct *mm);
434/* dumpable bits */ 434/* dumpable bits */
435#define MMF_DUMPABLE 0 /* core dump is permitted */ 435#define MMF_DUMPABLE 0 /* core dump is permitted */
436#define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ 436#define MMF_DUMP_SECURELY 1 /* core file is readable only by root */
437
437#define MMF_DUMPABLE_BITS 2 438#define MMF_DUMPABLE_BITS 2
439#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
438 440
439/* coredump filter bits */ 441/* coredump filter bits */
440#define MMF_DUMP_ANON_PRIVATE 2 442#define MMF_DUMP_ANON_PRIVATE 2
@@ -444,6 +446,7 @@ extern int get_dumpable(struct mm_struct *mm);
444#define MMF_DUMP_ELF_HEADERS 6 446#define MMF_DUMP_ELF_HEADERS 6
445#define MMF_DUMP_HUGETLB_PRIVATE 7 447#define MMF_DUMP_HUGETLB_PRIVATE 7
446#define MMF_DUMP_HUGETLB_SHARED 8 448#define MMF_DUMP_HUGETLB_SHARED 8
449
447#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS 450#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS
448#define MMF_DUMP_FILTER_BITS 7 451#define MMF_DUMP_FILTER_BITS 7
449#define MMF_DUMP_FILTER_MASK \ 452#define MMF_DUMP_FILTER_MASK \
@@ -457,6 +460,10 @@ extern int get_dumpable(struct mm_struct *mm);
457#else 460#else
458# define MMF_DUMP_MASK_DEFAULT_ELF 0 461# define MMF_DUMP_MASK_DEFAULT_ELF 0
459#endif 462#endif
463 /* leave room for more dump flags */
464#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */
465
466#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
460 467
461struct sighand_struct { 468struct sighand_struct {
462 atomic_t count; 469 atomic_t count;
diff --git a/kernel/fork.c b/kernel/fork.c
index d4638c8cc19e..73a442b7be6d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -49,6 +49,7 @@
49#include <linux/ftrace.h> 49#include <linux/ftrace.h>
50#include <linux/profile.h> 50#include <linux/profile.h>
51#include <linux/rmap.h> 51#include <linux/rmap.h>
52#include <linux/ksm.h>
52#include <linux/acct.h> 53#include <linux/acct.h>
53#include <linux/tsacct_kern.h> 54#include <linux/tsacct_kern.h>
54#include <linux/cn_proc.h> 55#include <linux/cn_proc.h>
@@ -299,6 +300,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
299 rb_link = &mm->mm_rb.rb_node; 300 rb_link = &mm->mm_rb.rb_node;
300 rb_parent = NULL; 301 rb_parent = NULL;
301 pprev = &mm->mmap; 302 pprev = &mm->mmap;
303 retval = ksm_fork(mm, oldmm);
304 if (retval)
305 goto out;
302 306
303 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { 307 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
304 struct file *file; 308 struct file *file;
@@ -435,7 +439,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
435 atomic_set(&mm->mm_count, 1); 439 atomic_set(&mm->mm_count, 1);
436 init_rwsem(&mm->mmap_sem); 440 init_rwsem(&mm->mmap_sem);
437 INIT_LIST_HEAD(&mm->mmlist); 441 INIT_LIST_HEAD(&mm->mmlist);
438 mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; 442 mm->flags = (current->mm) ?
443 (current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
439 mm->core_state = NULL; 444 mm->core_state = NULL;
440 mm->nr_ptes = 0; 445 mm->nr_ptes = 0;
441 set_mm_counter(mm, file_rss, 0); 446 set_mm_counter(mm, file_rss, 0);
@@ -496,6 +501,7 @@ void mmput(struct mm_struct *mm)
496 501
497 if (atomic_dec_and_test(&mm->mm_users)) { 502 if (atomic_dec_and_test(&mm->mm_users)) {
498 exit_aio(mm); 503 exit_aio(mm);
504 ksm_exit(mm);
499 exit_mmap(mm); 505 exit_mmap(mm);
500 set_mm_exe_file(mm, NULL); 506 set_mm_exe_file(mm, NULL);
501 if (!list_empty(&mm->mmlist)) { 507 if (!list_empty(&mm->mmlist)) {
diff --git a/mm/Kconfig b/mm/Kconfig
index 3aa519f52e18..c0b6afa178a1 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -214,6 +214,17 @@ config HAVE_MLOCKED_PAGE_BIT
214config MMU_NOTIFIER 214config MMU_NOTIFIER
215 bool 215 bool
216 216
217config KSM
218 bool "Enable KSM for page merging"
219 depends on MMU
220 help
221 Enable Kernel Samepage Merging: KSM periodically scans those areas
222 of an application's address space that an app has advised may be
223 mergeable. When it finds pages of identical content, it replaces
224 the many instances by a single resident page with that content, so
225 saving memory until one or another app needs to modify the content.
226 Recommended for use with KVM, or with other duplicative applications.
227
217config DEFAULT_MMAP_MIN_ADDR 228config DEFAULT_MMAP_MIN_ADDR
218 int "Low address space to protect from user allocation" 229 int "Low address space to protect from user allocation"
219 default 4096 230 default 4096
diff --git a/mm/Makefile b/mm/Makefile
index ea4b18bd3960..a63bf59a0c77 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
25obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o 25obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
26obj-$(CONFIG_SLOB) += slob.o 26obj-$(CONFIG_SLOB) += slob.o
27obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o 27obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
28obj-$(CONFIG_KSM) += ksm.o
28obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o 29obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
29obj-$(CONFIG_SLAB) += slab.o 30obj-$(CONFIG_SLAB) += slab.o
30obj-$(CONFIG_SLUB) += slub.o 31obj-$(CONFIG_SLUB) += slub.o
diff --git a/mm/ksm.c b/mm/ksm.c
new file mode 100644
index 000000000000..8b76008fcd32
--- /dev/null
+++ b/mm/ksm.c
@@ -0,0 +1,56 @@
1/*
2 * Initial dummy version just to illustrate KSM's interface to other files.
3 */
4
5#include <linux/errno.h>
6#include <linux/mman.h>
7#include <linux/ksm.h>
8
9int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
10 unsigned long end, int advice, unsigned long *vm_flags)
11{
12 struct mm_struct *mm = vma->vm_mm;
13
14 switch (advice) {
15 case MADV_MERGEABLE:
16 /*
17 * Be somewhat over-protective for now!
18 */
19 if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
20 VM_PFNMAP | VM_IO | VM_DONTEXPAND |
21 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
22 VM_MIXEDMAP | VM_SAO))
23 return 0; /* just ignore the advice */
24
25 if (!test_bit(MMF_VM_MERGEABLE, &mm->flags))
26 if (__ksm_enter(mm) < 0)
27 return -EAGAIN;
28
29 *vm_flags |= VM_MERGEABLE;
30 break;
31
32 case MADV_UNMERGEABLE:
33 if (!(*vm_flags & VM_MERGEABLE))
34 return 0; /* just ignore the advice */
35
36 /* Unmerge any merged pages here */
37
38 *vm_flags &= ~VM_MERGEABLE;
39 break;
40 }
41
42 return 0;
43}
44
45int __ksm_enter(struct mm_struct *mm)
46{
47 /* Allocate a structure to track mm and link it into KSM's list */
48 set_bit(MMF_VM_MERGEABLE, &mm->flags);
49 return 0;
50}
51
52void __ksm_exit(struct mm_struct *mm)
53{
54 /* Unlink and free all KSM's structures which track this mm */
55 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
56}
diff --git a/mm/madvise.c b/mm/madvise.c
index 66c31264f062..d9ae2067952e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -11,6 +11,7 @@
11#include <linux/mempolicy.h> 11#include <linux/mempolicy.h>
12#include <linux/hugetlb.h> 12#include <linux/hugetlb.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/ksm.h>
14 15
15/* 16/*
16 * Any behaviour which results in changes to the vma->vm_flags needs to 17 * Any behaviour which results in changes to the vma->vm_flags needs to
@@ -63,6 +64,12 @@ static long madvise_behavior(struct vm_area_struct * vma,
63 } 64 }
64 new_flags &= ~VM_DONTCOPY; 65 new_flags &= ~VM_DONTCOPY;
65 break; 66 break;
67 case MADV_MERGEABLE:
68 case MADV_UNMERGEABLE:
69 error = ksm_madvise(vma, start, end, behavior, &new_flags);
70 if (error)
71 goto out;
72 break;
66 } 73 }
67 74
68 if (new_flags == vma->vm_flags) { 75 if (new_flags == vma->vm_flags) {
@@ -239,6 +246,10 @@ madvise_behavior_valid(int behavior)
239 case MADV_REMOVE: 246 case MADV_REMOVE:
240 case MADV_WILLNEED: 247 case MADV_WILLNEED:
241 case MADV_DONTNEED: 248 case MADV_DONTNEED:
249#ifdef CONFIG_KSM
250 case MADV_MERGEABLE:
251 case MADV_UNMERGEABLE:
252#endif
242 return 1; 253 return 1;
243 254
244 default: 255 default:
@@ -273,6 +284,9 @@ madvise_behavior_valid(int behavior)
273 * MADV_DONTFORK - omit this area from child's address space when forking: 284 * MADV_DONTFORK - omit this area from child's address space when forking:
274 * typically, to avoid COWing pages pinned by get_user_pages(). 285 * typically, to avoid COWing pages pinned by get_user_pages().
275 * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking. 286 * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
287 * MADV_MERGEABLE - the application recommends that KSM try to merge pages in
288 * this area with pages of identical content from other such areas.
289 * MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others.
276 * 290 *
277 * return values: 291 * return values:
278 * zero - success 292 * zero - success