diff options
-rw-r--r-- | Documentation/vm/ksm.txt | 7 | ||||
-rw-r--r-- | mm/ksm.c | 151 |
2 files changed, 139 insertions, 19 deletions
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt index b392e496f816..25cc89ba811b 100644 --- a/Documentation/vm/ksm.txt +++ b/Documentation/vm/ksm.txt | |||
@@ -58,6 +58,13 @@ sleep_millisecs - how many milliseconds ksmd should sleep before next scan | |||
58 | e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs" | 58 | e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs" |
59 | Default: 20 (chosen for demonstration purposes) | 59 | Default: 20 (chosen for demonstration purposes) |
60 | 60 | ||
61 | merge_across_nodes - specifies if pages from different numa nodes can be merged. | ||
62 | When set to 0, ksm merges only pages which physically | ||
63 | reside in the memory area of same NUMA node. It brings | ||
64 | lower latency to access to shared page. Value can be | ||
65 | changed only when there is no ksm shared pages in system. | ||
66 | Default: 1 | ||
67 | |||
61 | run - set 0 to stop ksmd from running but keep merged pages, | 68 | run - set 0 to stop ksmd from running but keep merged pages, |
62 | set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run", | 69 | set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run", |
63 | set 2 to stop ksmd and unmerge all pages currently merged, | 70 | set 2 to stop ksmd and unmerge all pages currently merged, |
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/hashtable.h> | 36 | #include <linux/hashtable.h> |
37 | #include <linux/freezer.h> | 37 | #include <linux/freezer.h> |
38 | #include <linux/oom.h> | 38 | #include <linux/oom.h> |
39 | #include <linux/numa.h> | ||
39 | 40 | ||
40 | #include <asm/tlbflush.h> | 41 | #include <asm/tlbflush.h> |
41 | #include "internal.h" | 42 | #include "internal.h" |
@@ -139,6 +140,9 @@ struct rmap_item { | |||
139 | struct mm_struct *mm; | 140 | struct mm_struct *mm; |
140 | unsigned long address; /* + low bits used for flags below */ | 141 | unsigned long address; /* + low bits used for flags below */ |
141 | unsigned int oldchecksum; /* when unstable */ | 142 | unsigned int oldchecksum; /* when unstable */ |
143 | #ifdef CONFIG_NUMA | ||
144 | unsigned int nid; | ||
145 | #endif | ||
142 | union { | 146 | union { |
143 | struct rb_node node; /* when node of unstable tree */ | 147 | struct rb_node node; /* when node of unstable tree */ |
144 | struct { /* when listed from stable tree */ | 148 | struct { /* when listed from stable tree */ |
@@ -153,8 +157,8 @@ struct rmap_item { | |||
153 | #define STABLE_FLAG 0x200 /* is listed from the stable tree */ | 157 | #define STABLE_FLAG 0x200 /* is listed from the stable tree */ |
154 | 158 | ||
155 | /* The stable and unstable tree heads */ | 159 | /* The stable and unstable tree heads */ |
156 | static struct rb_root root_stable_tree = RB_ROOT; | 160 | static struct rb_root root_unstable_tree[MAX_NUMNODES]; |
157 | static struct rb_root root_unstable_tree = RB_ROOT; | 161 | static struct rb_root root_stable_tree[MAX_NUMNODES]; |
158 | 162 | ||
159 | #define MM_SLOTS_HASH_BITS 10 | 163 | #define MM_SLOTS_HASH_BITS 10 |
160 | static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); | 164 | static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); |
@@ -188,6 +192,9 @@ static unsigned int ksm_thread_pages_to_scan = 100; | |||
188 | /* Milliseconds ksmd should sleep between batches */ | 192 | /* Milliseconds ksmd should sleep between batches */ |
189 | static unsigned int ksm_thread_sleep_millisecs = 20; | 193 | static unsigned int ksm_thread_sleep_millisecs = 20; |
190 | 194 | ||
195 | /* Zeroed when merging across nodes is not allowed */ | ||
196 | static unsigned int ksm_merge_across_nodes = 1; | ||
197 | |||
191 | #define KSM_RUN_STOP 0 | 198 | #define KSM_RUN_STOP 0 |
192 | #define KSM_RUN_MERGE 1 | 199 | #define KSM_RUN_MERGE 1 |
193 | #define KSM_RUN_UNMERGE 2 | 200 | #define KSM_RUN_UNMERGE 2 |
@@ -441,10 +448,25 @@ out: page = NULL; | |||
441 | return page; | 448 | return page; |
442 | } | 449 | } |
443 | 450 | ||
451 | /* | ||
452 | * This helper is used for getting right index into array of tree roots. | ||
453 | * When merge_across_nodes knob is set to 1, there are only two rb-trees for | ||
454 | * stable and unstable pages from all nodes with roots in index 0. Otherwise, | ||
455 | * every node has its own stable and unstable tree. | ||
456 | */ | ||
457 | static inline int get_kpfn_nid(unsigned long kpfn) | ||
458 | { | ||
459 | if (ksm_merge_across_nodes) | ||
460 | return 0; | ||
461 | else | ||
462 | return pfn_to_nid(kpfn); | ||
463 | } | ||
464 | |||
444 | static void remove_node_from_stable_tree(struct stable_node *stable_node) | 465 | static void remove_node_from_stable_tree(struct stable_node *stable_node) |
445 | { | 466 | { |
446 | struct rmap_item *rmap_item; | 467 | struct rmap_item *rmap_item; |
447 | struct hlist_node *hlist; | 468 | struct hlist_node *hlist; |
469 | int nid; | ||
448 | 470 | ||
449 | hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { | 471 | hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { |
450 | if (rmap_item->hlist.next) | 472 | if (rmap_item->hlist.next) |
@@ -456,7 +478,9 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node) | |||
456 | cond_resched(); | 478 | cond_resched(); |
457 | } | 479 | } |
458 | 480 | ||
459 | rb_erase(&stable_node->node, &root_stable_tree); | 481 | nid = get_kpfn_nid(stable_node->kpfn); |
482 | |||
483 | rb_erase(&stable_node->node, &root_stable_tree[nid]); | ||
460 | free_stable_node(stable_node); | 484 | free_stable_node(stable_node); |
461 | } | 485 | } |
462 | 486 | ||
@@ -554,7 +578,12 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item) | |||
554 | age = (unsigned char)(ksm_scan.seqnr - rmap_item->address); | 578 | age = (unsigned char)(ksm_scan.seqnr - rmap_item->address); |
555 | BUG_ON(age > 1); | 579 | BUG_ON(age > 1); |
556 | if (!age) | 580 | if (!age) |
557 | rb_erase(&rmap_item->node, &root_unstable_tree); | 581 | #ifdef CONFIG_NUMA |
582 | rb_erase(&rmap_item->node, | ||
583 | &root_unstable_tree[rmap_item->nid]); | ||
584 | #else | ||
585 | rb_erase(&rmap_item->node, &root_unstable_tree[0]); | ||
586 | #endif | ||
558 | 587 | ||
559 | ksm_pages_unshared--; | 588 | ksm_pages_unshared--; |
560 | rmap_item->address &= PAGE_MASK; | 589 | rmap_item->address &= PAGE_MASK; |
@@ -990,8 +1019,9 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item, | |||
990 | */ | 1019 | */ |
991 | static struct page *stable_tree_search(struct page *page) | 1020 | static struct page *stable_tree_search(struct page *page) |
992 | { | 1021 | { |
993 | struct rb_node *node = root_stable_tree.rb_node; | 1022 | struct rb_node *node; |
994 | struct stable_node *stable_node; | 1023 | struct stable_node *stable_node; |
1024 | int nid; | ||
995 | 1025 | ||
996 | stable_node = page_stable_node(page); | 1026 | stable_node = page_stable_node(page); |
997 | if (stable_node) { /* ksm page forked */ | 1027 | if (stable_node) { /* ksm page forked */ |
@@ -999,6 +1029,9 @@ static struct page *stable_tree_search(struct page *page) | |||
999 | return page; | 1029 | return page; |
1000 | } | 1030 | } |
1001 | 1031 | ||
1032 | nid = get_kpfn_nid(page_to_pfn(page)); | ||
1033 | node = root_stable_tree[nid].rb_node; | ||
1034 | |||
1002 | while (node) { | 1035 | while (node) { |
1003 | struct page *tree_page; | 1036 | struct page *tree_page; |
1004 | int ret; | 1037 | int ret; |
@@ -1033,10 +1066,16 @@ static struct page *stable_tree_search(struct page *page) | |||
1033 | */ | 1066 | */ |
1034 | static struct stable_node *stable_tree_insert(struct page *kpage) | 1067 | static struct stable_node *stable_tree_insert(struct page *kpage) |
1035 | { | 1068 | { |
1036 | struct rb_node **new = &root_stable_tree.rb_node; | 1069 | int nid; |
1070 | unsigned long kpfn; | ||
1071 | struct rb_node **new; | ||
1037 | struct rb_node *parent = NULL; | 1072 | struct rb_node *parent = NULL; |
1038 | struct stable_node *stable_node; | 1073 | struct stable_node *stable_node; |
1039 | 1074 | ||
1075 | kpfn = page_to_pfn(kpage); | ||
1076 | nid = get_kpfn_nid(kpfn); | ||
1077 | new = &root_stable_tree[nid].rb_node; | ||
1078 | |||
1040 | while (*new) { | 1079 | while (*new) { |
1041 | struct page *tree_page; | 1080 | struct page *tree_page; |
1042 | int ret; | 1081 | int ret; |
@@ -1070,11 +1109,11 @@ static struct stable_node *stable_tree_insert(struct page *kpage) | |||
1070 | return NULL; | 1109 | return NULL; |
1071 | 1110 | ||
1072 | rb_link_node(&stable_node->node, parent, new); | 1111 | rb_link_node(&stable_node->node, parent, new); |
1073 | rb_insert_color(&stable_node->node, &root_stable_tree); | 1112 | rb_insert_color(&stable_node->node, &root_stable_tree[nid]); |
1074 | 1113 | ||
1075 | INIT_HLIST_HEAD(&stable_node->hlist); | 1114 | INIT_HLIST_HEAD(&stable_node->hlist); |
1076 | 1115 | ||
1077 | stable_node->kpfn = page_to_pfn(kpage); | 1116 | stable_node->kpfn = kpfn; |
1078 | set_page_stable_node(kpage, stable_node); | 1117 | set_page_stable_node(kpage, stable_node); |
1079 | 1118 | ||
1080 | return stable_node; | 1119 | return stable_node; |
@@ -1098,10 +1137,15 @@ static | |||
1098 | struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item, | 1137 | struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item, |
1099 | struct page *page, | 1138 | struct page *page, |
1100 | struct page **tree_pagep) | 1139 | struct page **tree_pagep) |
1101 | |||
1102 | { | 1140 | { |
1103 | struct rb_node **new = &root_unstable_tree.rb_node; | 1141 | struct rb_node **new; |
1142 | struct rb_root *root; | ||
1104 | struct rb_node *parent = NULL; | 1143 | struct rb_node *parent = NULL; |
1144 | int nid; | ||
1145 | |||
1146 | nid = get_kpfn_nid(page_to_pfn(page)); | ||
1147 | root = &root_unstable_tree[nid]; | ||
1148 | new = &root->rb_node; | ||
1105 | 1149 | ||
1106 | while (*new) { | 1150 | while (*new) { |
1107 | struct rmap_item *tree_rmap_item; | 1151 | struct rmap_item *tree_rmap_item; |
@@ -1122,6 +1166,18 @@ struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item, | |||
1122 | return NULL; | 1166 | return NULL; |
1123 | } | 1167 | } |
1124 | 1168 | ||
1169 | /* | ||
1170 | * If tree_page has been migrated to another NUMA node, it | ||
1171 | * will be flushed out and put into the right unstable tree | ||
1172 | * next time: only merge with it if merge_across_nodes. | ||
1173 | * Just notice, we don't have similar problem for PageKsm | ||
1174 | * because their migration is disabled now. (62b61f611e) | ||
1175 | */ | ||
1176 | if (!ksm_merge_across_nodes && page_to_nid(tree_page) != nid) { | ||
1177 | put_page(tree_page); | ||
1178 | return NULL; | ||
1179 | } | ||
1180 | |||
1125 | ret = memcmp_pages(page, tree_page); | 1181 | ret = memcmp_pages(page, tree_page); |
1126 | 1182 | ||
1127 | parent = *new; | 1183 | parent = *new; |
@@ -1139,8 +1195,11 @@ struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item, | |||
1139 | 1195 | ||
1140 | rmap_item->address |= UNSTABLE_FLAG; | 1196 | rmap_item->address |= UNSTABLE_FLAG; |
1141 | rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK); | 1197 | rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK); |
1198 | #ifdef CONFIG_NUMA | ||
1199 | rmap_item->nid = nid; | ||
1200 | #endif | ||
1142 | rb_link_node(&rmap_item->node, parent, new); | 1201 | rb_link_node(&rmap_item->node, parent, new); |
1143 | rb_insert_color(&rmap_item->node, &root_unstable_tree); | 1202 | rb_insert_color(&rmap_item->node, root); |
1144 | 1203 | ||
1145 | ksm_pages_unshared++; | 1204 | ksm_pages_unshared++; |
1146 | return NULL; | 1205 | return NULL; |
@@ -1154,6 +1213,13 @@ struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item, | |||
1154 | static void stable_tree_append(struct rmap_item *rmap_item, | 1213 | static void stable_tree_append(struct rmap_item *rmap_item, |
1155 | struct stable_node *stable_node) | 1214 | struct stable_node *stable_node) |
1156 | { | 1215 | { |
1216 | #ifdef CONFIG_NUMA | ||
1217 | /* | ||
1218 | * Usually rmap_item->nid is already set correctly, | ||
1219 | * but it may be wrong after switching merge_across_nodes. | ||
1220 | */ | ||
1221 | rmap_item->nid = get_kpfn_nid(stable_node->kpfn); | ||
1222 | #endif | ||
1157 | rmap_item->head = stable_node; | 1223 | rmap_item->head = stable_node; |
1158 | rmap_item->address |= STABLE_FLAG; | 1224 | rmap_item->address |= STABLE_FLAG; |
1159 | hlist_add_head(&rmap_item->hlist, &stable_node->hlist); | 1225 | hlist_add_head(&rmap_item->hlist, &stable_node->hlist); |
@@ -1283,6 +1349,7 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) | |||
1283 | struct mm_slot *slot; | 1349 | struct mm_slot *slot; |
1284 | struct vm_area_struct *vma; | 1350 | struct vm_area_struct *vma; |
1285 | struct rmap_item *rmap_item; | 1351 | struct rmap_item *rmap_item; |
1352 | int nid; | ||
1286 | 1353 | ||
1287 | if (list_empty(&ksm_mm_head.mm_list)) | 1354 | if (list_empty(&ksm_mm_head.mm_list)) |
1288 | return NULL; | 1355 | return NULL; |
@@ -1301,7 +1368,8 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) | |||
1301 | */ | 1368 | */ |
1302 | lru_add_drain_all(); | 1369 | lru_add_drain_all(); |
1303 | 1370 | ||
1304 | root_unstable_tree = RB_ROOT; | 1371 | for (nid = 0; nid < nr_node_ids; nid++) |
1372 | root_unstable_tree[nid] = RB_ROOT; | ||
1305 | 1373 | ||
1306 | spin_lock(&ksm_mmlist_lock); | 1374 | spin_lock(&ksm_mmlist_lock); |
1307 | slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list); | 1375 | slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list); |
@@ -1770,15 +1838,19 @@ static struct stable_node *ksm_check_stable_tree(unsigned long start_pfn, | |||
1770 | unsigned long end_pfn) | 1838 | unsigned long end_pfn) |
1771 | { | 1839 | { |
1772 | struct rb_node *node; | 1840 | struct rb_node *node; |
1841 | int nid; | ||
1773 | 1842 | ||
1774 | for (node = rb_first(&root_stable_tree); node; node = rb_next(node)) { | 1843 | for (nid = 0; nid < nr_node_ids; nid++) |
1775 | struct stable_node *stable_node; | 1844 | for (node = rb_first(&root_stable_tree[nid]); node; |
1845 | node = rb_next(node)) { | ||
1846 | struct stable_node *stable_node; | ||
1847 | |||
1848 | stable_node = rb_entry(node, struct stable_node, node); | ||
1849 | if (stable_node->kpfn >= start_pfn && | ||
1850 | stable_node->kpfn < end_pfn) | ||
1851 | return stable_node; | ||
1852 | } | ||
1776 | 1853 | ||
1777 | stable_node = rb_entry(node, struct stable_node, node); | ||
1778 | if (stable_node->kpfn >= start_pfn && | ||
1779 | stable_node->kpfn < end_pfn) | ||
1780 | return stable_node; | ||
1781 | } | ||
1782 | return NULL; | 1854 | return NULL; |
1783 | } | 1855 | } |
1784 | 1856 | ||
@@ -1925,6 +1997,40 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr, | |||
1925 | } | 1997 | } |
1926 | KSM_ATTR(run); | 1998 | KSM_ATTR(run); |
1927 | 1999 | ||
2000 | #ifdef CONFIG_NUMA | ||
2001 | static ssize_t merge_across_nodes_show(struct kobject *kobj, | ||
2002 | struct kobj_attribute *attr, char *buf) | ||
2003 | { | ||
2004 | return sprintf(buf, "%u\n", ksm_merge_across_nodes); | ||
2005 | } | ||
2006 | |||
2007 | static ssize_t merge_across_nodes_store(struct kobject *kobj, | ||
2008 | struct kobj_attribute *attr, | ||
2009 | const char *buf, size_t count) | ||
2010 | { | ||
2011 | int err; | ||
2012 | unsigned long knob; | ||
2013 | |||
2014 | err = kstrtoul(buf, 10, &knob); | ||
2015 | if (err) | ||
2016 | return err; | ||
2017 | if (knob > 1) | ||
2018 | return -EINVAL; | ||
2019 | |||
2020 | mutex_lock(&ksm_thread_mutex); | ||
2021 | if (ksm_merge_across_nodes != knob) { | ||
2022 | if (ksm_pages_shared) | ||
2023 | err = -EBUSY; | ||
2024 | else | ||
2025 | ksm_merge_across_nodes = knob; | ||
2026 | } | ||
2027 | mutex_unlock(&ksm_thread_mutex); | ||
2028 | |||
2029 | return err ? err : count; | ||
2030 | } | ||
2031 | KSM_ATTR(merge_across_nodes); | ||
2032 | #endif | ||
2033 | |||
1928 | static ssize_t pages_shared_show(struct kobject *kobj, | 2034 | static ssize_t pages_shared_show(struct kobject *kobj, |
1929 | struct kobj_attribute *attr, char *buf) | 2035 | struct kobj_attribute *attr, char *buf) |
1930 | { | 2036 | { |
@@ -1979,6 +2085,9 @@ static struct attribute *ksm_attrs[] = { | |||
1979 | &pages_unshared_attr.attr, | 2085 | &pages_unshared_attr.attr, |
1980 | &pages_volatile_attr.attr, | 2086 | &pages_volatile_attr.attr, |
1981 | &full_scans_attr.attr, | 2087 | &full_scans_attr.attr, |
2088 | #ifdef CONFIG_NUMA | ||
2089 | &merge_across_nodes_attr.attr, | ||
2090 | #endif | ||
1982 | NULL, | 2091 | NULL, |
1983 | }; | 2092 | }; |
1984 | 2093 | ||
@@ -1992,11 +2101,15 @@ static int __init ksm_init(void) | |||
1992 | { | 2101 | { |
1993 | struct task_struct *ksm_thread; | 2102 | struct task_struct *ksm_thread; |
1994 | int err; | 2103 | int err; |
2104 | int nid; | ||
1995 | 2105 | ||
1996 | err = ksm_slab_init(); | 2106 | err = ksm_slab_init(); |
1997 | if (err) | 2107 | if (err) |
1998 | goto out; | 2108 | goto out; |
1999 | 2109 | ||
2110 | for (nid = 0; nid < nr_node_ids; nid++) | ||
2111 | root_stable_tree[nid] = RB_ROOT; | ||
2112 | |||
2000 | ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd"); | 2113 | ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd"); |
2001 | if (IS_ERR(ksm_thread)) { | 2114 | if (IS_ERR(ksm_thread)) { |
2002 | printk(KERN_ERR "ksm: creating kthread failed\n"); | 2115 | printk(KERN_ERR "ksm: creating kthread failed\n"); |