diff options
-rw-r--r-- | mm/ksm.c | 180 |
1 files changed, 101 insertions, 79 deletions
@@ -106,34 +106,44 @@ struct ksm_scan { | |||
106 | }; | 106 | }; |
107 | 107 | ||
108 | /** | 108 | /** |
109 | * struct stable_node - node of the stable rbtree | ||
110 | * @node: rb node of this ksm page in the stable tree | ||
111 | * @hlist: hlist head of rmap_items using this ksm page | ||
112 | */ | ||
113 | struct stable_node { | ||
114 | struct rb_node node; | ||
115 | struct hlist_head hlist; | ||
116 | }; | ||
117 | |||
118 | /** | ||
109 | * struct rmap_item - reverse mapping item for virtual addresses | 119 | * struct rmap_item - reverse mapping item for virtual addresses |
110 | * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list | 120 | * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list |
111 | * @filler: unused space we're making available in this patch | 121 | * @filler: unused space we're making available in this patch |
112 | * @mm: the memory structure this rmap_item is pointing into | 122 | * @mm: the memory structure this rmap_item is pointing into |
113 | * @address: the virtual address this rmap_item tracks (+ flags in low bits) | 123 | * @address: the virtual address this rmap_item tracks (+ flags in low bits) |
114 | * @oldchecksum: previous checksum of the page at that virtual address | 124 | * @oldchecksum: previous checksum of the page at that virtual address |
115 | * @node: rb_node of this rmap_item in either unstable or stable tree | 125 | * @node: rb node of this rmap_item in the unstable tree |
116 | * @next: next rmap_item hanging off the same node of the stable tree | 126 | * @head: pointer to stable_node heading this list in the stable tree |
117 | * @prev: previous rmap_item hanging off the same node of the stable tree | 127 | * @hlist: link into hlist of rmap_items hanging off that stable_node |
118 | */ | 128 | */ |
119 | struct rmap_item { | 129 | struct rmap_item { |
120 | struct rmap_item *rmap_list; | 130 | struct rmap_item *rmap_list; |
121 | unsigned long filler; | 131 | unsigned long filler; |
122 | struct mm_struct *mm; | 132 | struct mm_struct *mm; |
123 | unsigned long address; /* + low bits used for flags below */ | 133 | unsigned long address; /* + low bits used for flags below */ |
134 | unsigned int oldchecksum; /* when unstable */ | ||
124 | union { | 135 | union { |
125 | unsigned int oldchecksum; /* when unstable */ | 136 | struct rb_node node; /* when node of unstable tree */ |
126 | struct rmap_item *next; /* when stable */ | 137 | struct { /* when listed from stable tree */ |
127 | }; | 138 | struct stable_node *head; |
128 | union { | 139 | struct hlist_node hlist; |
129 | struct rb_node node; /* when tree node */ | 140 | }; |
130 | struct rmap_item *prev; /* in stable list */ | ||
131 | }; | 141 | }; |
132 | }; | 142 | }; |
133 | 143 | ||
134 | #define SEQNR_MASK 0x0ff /* low bits of unstable tree seqnr */ | 144 | #define SEQNR_MASK 0x0ff /* low bits of unstable tree seqnr */ |
135 | #define NODE_FLAG 0x100 /* is a node of unstable or stable tree */ | 145 | #define UNSTABLE_FLAG 0x100 /* is a node of the unstable tree */ |
136 | #define STABLE_FLAG 0x200 /* is a node or list item of stable tree */ | 146 | #define STABLE_FLAG 0x200 /* is listed from the stable tree */ |
137 | 147 | ||
138 | /* The stable and unstable tree heads */ | 148 | /* The stable and unstable tree heads */ |
139 | static struct rb_root root_stable_tree = RB_ROOT; | 149 | static struct rb_root root_stable_tree = RB_ROOT; |
@@ -150,6 +160,7 @@ static struct ksm_scan ksm_scan = { | |||
150 | }; | 160 | }; |
151 | 161 | ||
152 | static struct kmem_cache *rmap_item_cache; | 162 | static struct kmem_cache *rmap_item_cache; |
163 | static struct kmem_cache *stable_node_cache; | ||
153 | static struct kmem_cache *mm_slot_cache; | 164 | static struct kmem_cache *mm_slot_cache; |
154 | 165 | ||
155 | /* The number of nodes in the stable tree */ | 166 | /* The number of nodes in the stable tree */ |
@@ -192,13 +203,19 @@ static int __init ksm_slab_init(void) | |||
192 | if (!rmap_item_cache) | 203 | if (!rmap_item_cache) |
193 | goto out; | 204 | goto out; |
194 | 205 | ||
206 | stable_node_cache = KSM_KMEM_CACHE(stable_node, 0); | ||
207 | if (!stable_node_cache) | ||
208 | goto out_free1; | ||
209 | |||
195 | mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0); | 210 | mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0); |
196 | if (!mm_slot_cache) | 211 | if (!mm_slot_cache) |
197 | goto out_free; | 212 | goto out_free2; |
198 | 213 | ||
199 | return 0; | 214 | return 0; |
200 | 215 | ||
201 | out_free: | 216 | out_free2: |
217 | kmem_cache_destroy(stable_node_cache); | ||
218 | out_free1: | ||
202 | kmem_cache_destroy(rmap_item_cache); | 219 | kmem_cache_destroy(rmap_item_cache); |
203 | out: | 220 | out: |
204 | return -ENOMEM; | 221 | return -ENOMEM; |
@@ -207,6 +224,7 @@ out: | |||
207 | static void __init ksm_slab_free(void) | 224 | static void __init ksm_slab_free(void) |
208 | { | 225 | { |
209 | kmem_cache_destroy(mm_slot_cache); | 226 | kmem_cache_destroy(mm_slot_cache); |
227 | kmem_cache_destroy(stable_node_cache); | ||
210 | kmem_cache_destroy(rmap_item_cache); | 228 | kmem_cache_destroy(rmap_item_cache); |
211 | mm_slot_cache = NULL; | 229 | mm_slot_cache = NULL; |
212 | } | 230 | } |
@@ -228,6 +246,16 @@ static inline void free_rmap_item(struct rmap_item *rmap_item) | |||
228 | kmem_cache_free(rmap_item_cache, rmap_item); | 246 | kmem_cache_free(rmap_item_cache, rmap_item); |
229 | } | 247 | } |
230 | 248 | ||
249 | static inline struct stable_node *alloc_stable_node(void) | ||
250 | { | ||
251 | return kmem_cache_alloc(stable_node_cache, GFP_KERNEL); | ||
252 | } | ||
253 | |||
254 | static inline void free_stable_node(struct stable_node *stable_node) | ||
255 | { | ||
256 | kmem_cache_free(stable_node_cache, stable_node); | ||
257 | } | ||
258 | |||
231 | static inline struct mm_slot *alloc_mm_slot(void) | 259 | static inline struct mm_slot *alloc_mm_slot(void) |
232 | { | 260 | { |
233 | if (!mm_slot_cache) /* initialization failed */ | 261 | if (!mm_slot_cache) /* initialization failed */ |
@@ -429,36 +457,22 @@ static struct page *get_ksm_page(struct rmap_item *rmap_item) | |||
429 | */ | 457 | */ |
430 | static void remove_rmap_item_from_tree(struct rmap_item *rmap_item) | 458 | static void remove_rmap_item_from_tree(struct rmap_item *rmap_item) |
431 | { | 459 | { |
432 | if (in_stable_tree(rmap_item)) { | 460 | if (rmap_item->address & STABLE_FLAG) { |
433 | struct rmap_item *next_item = rmap_item->next; | 461 | struct stable_node *stable_node; |
434 | |||
435 | if (rmap_item->address & NODE_FLAG) { | ||
436 | if (next_item) { | ||
437 | rb_replace_node(&rmap_item->node, | ||
438 | &next_item->node, | ||
439 | &root_stable_tree); | ||
440 | next_item->address |= NODE_FLAG; | ||
441 | ksm_pages_sharing--; | ||
442 | } else { | ||
443 | rb_erase(&rmap_item->node, &root_stable_tree); | ||
444 | ksm_pages_shared--; | ||
445 | } | ||
446 | } else { | ||
447 | struct rmap_item *prev_item = rmap_item->prev; | ||
448 | 462 | ||
449 | BUG_ON(prev_item->next != rmap_item); | 463 | stable_node = rmap_item->head; |
450 | prev_item->next = next_item; | 464 | hlist_del(&rmap_item->hlist); |
451 | if (next_item) { | 465 | if (stable_node->hlist.first) |
452 | BUG_ON(next_item->prev != rmap_item); | ||
453 | next_item->prev = rmap_item->prev; | ||
454 | } | ||
455 | ksm_pages_sharing--; | 466 | ksm_pages_sharing--; |
467 | else { | ||
468 | rb_erase(&stable_node->node, &root_stable_tree); | ||
469 | free_stable_node(stable_node); | ||
470 | ksm_pages_shared--; | ||
456 | } | 471 | } |
457 | 472 | ||
458 | rmap_item->next = NULL; | ||
459 | rmap_item->address &= PAGE_MASK; | 473 | rmap_item->address &= PAGE_MASK; |
460 | 474 | ||
461 | } else if (rmap_item->address & NODE_FLAG) { | 475 | } else if (rmap_item->address & UNSTABLE_FLAG) { |
462 | unsigned char age; | 476 | unsigned char age; |
463 | /* | 477 | /* |
464 | * Usually ksmd can and must skip the rb_erase, because | 478 | * Usually ksmd can and must skip the rb_erase, because |
@@ -859,31 +873,32 @@ up: | |||
859 | * This function checks if there is a page inside the stable tree | 873 | * This function checks if there is a page inside the stable tree |
860 | * with identical content to the page that we are scanning right now. | 874 | * with identical content to the page that we are scanning right now. |
861 | * | 875 | * |
862 | * This function return rmap_item pointer to the identical item if found, | 876 | * This function returns the stable tree node of identical content if found, |
863 | * NULL otherwise. | 877 | * NULL otherwise. |
864 | */ | 878 | */ |
865 | static struct rmap_item *stable_tree_search(struct page *page, | 879 | static struct stable_node *stable_tree_search(struct page *page, |
866 | struct page **tree_pagep) | 880 | struct page **tree_pagep) |
867 | { | 881 | { |
868 | struct rb_node *node = root_stable_tree.rb_node; | 882 | struct rb_node *node = root_stable_tree.rb_node; |
883 | struct stable_node *stable_node; | ||
869 | 884 | ||
870 | while (node) { | 885 | while (node) { |
871 | struct rmap_item *tree_rmap_item, *next_rmap_item; | 886 | struct hlist_node *hlist, *hnext; |
887 | struct rmap_item *tree_rmap_item; | ||
872 | struct page *tree_page; | 888 | struct page *tree_page; |
873 | int ret; | 889 | int ret; |
874 | 890 | ||
875 | tree_rmap_item = rb_entry(node, struct rmap_item, node); | 891 | stable_node = rb_entry(node, struct stable_node, node); |
876 | while (tree_rmap_item) { | 892 | hlist_for_each_entry_safe(tree_rmap_item, hlist, hnext, |
893 | &stable_node->hlist, hlist) { | ||
877 | BUG_ON(!in_stable_tree(tree_rmap_item)); | 894 | BUG_ON(!in_stable_tree(tree_rmap_item)); |
878 | cond_resched(); | 895 | cond_resched(); |
879 | tree_page = get_ksm_page(tree_rmap_item); | 896 | tree_page = get_ksm_page(tree_rmap_item); |
880 | if (tree_page) | 897 | if (tree_page) |
881 | break; | 898 | break; |
882 | next_rmap_item = tree_rmap_item->next; | ||
883 | remove_rmap_item_from_tree(tree_rmap_item); | 899 | remove_rmap_item_from_tree(tree_rmap_item); |
884 | tree_rmap_item = next_rmap_item; | ||
885 | } | 900 | } |
886 | if (!tree_rmap_item) | 901 | if (!hlist) |
887 | return NULL; | 902 | return NULL; |
888 | 903 | ||
889 | ret = memcmp_pages(page, tree_page); | 904 | ret = memcmp_pages(page, tree_page); |
@@ -896,7 +911,7 @@ static struct rmap_item *stable_tree_search(struct page *page, | |||
896 | node = node->rb_right; | 911 | node = node->rb_right; |
897 | } else { | 912 | } else { |
898 | *tree_pagep = tree_page; | 913 | *tree_pagep = tree_page; |
899 | return tree_rmap_item; | 914 | return stable_node; |
900 | } | 915 | } |
901 | } | 916 | } |
902 | 917 | ||
@@ -907,31 +922,32 @@ static struct rmap_item *stable_tree_search(struct page *page, | |||
907 | * stable_tree_insert - insert rmap_item pointing to new ksm page | 922 | * stable_tree_insert - insert rmap_item pointing to new ksm page |
908 | * into the stable tree. | 923 | * into the stable tree. |
909 | * | 924 | * |
910 | * This function returns rmap_item if success, NULL otherwise. | 925 | * This function returns the stable tree node just allocated on success, |
926 | * NULL otherwise. | ||
911 | */ | 927 | */ |
912 | static struct rmap_item *stable_tree_insert(struct page *kpage, | 928 | static struct stable_node *stable_tree_insert(struct page *kpage) |
913 | struct rmap_item *rmap_item) | ||
914 | { | 929 | { |
915 | struct rb_node **new = &root_stable_tree.rb_node; | 930 | struct rb_node **new = &root_stable_tree.rb_node; |
916 | struct rb_node *parent = NULL; | 931 | struct rb_node *parent = NULL; |
932 | struct stable_node *stable_node; | ||
917 | 933 | ||
918 | while (*new) { | 934 | while (*new) { |
919 | struct rmap_item *tree_rmap_item, *next_rmap_item; | 935 | struct hlist_node *hlist, *hnext; |
936 | struct rmap_item *tree_rmap_item; | ||
920 | struct page *tree_page; | 937 | struct page *tree_page; |
921 | int ret; | 938 | int ret; |
922 | 939 | ||
923 | tree_rmap_item = rb_entry(*new, struct rmap_item, node); | 940 | stable_node = rb_entry(*new, struct stable_node, node); |
924 | while (tree_rmap_item) { | 941 | hlist_for_each_entry_safe(tree_rmap_item, hlist, hnext, |
942 | &stable_node->hlist, hlist) { | ||
925 | BUG_ON(!in_stable_tree(tree_rmap_item)); | 943 | BUG_ON(!in_stable_tree(tree_rmap_item)); |
926 | cond_resched(); | 944 | cond_resched(); |
927 | tree_page = get_ksm_page(tree_rmap_item); | 945 | tree_page = get_ksm_page(tree_rmap_item); |
928 | if (tree_page) | 946 | if (tree_page) |
929 | break; | 947 | break; |
930 | next_rmap_item = tree_rmap_item->next; | ||
931 | remove_rmap_item_from_tree(tree_rmap_item); | 948 | remove_rmap_item_from_tree(tree_rmap_item); |
932 | tree_rmap_item = next_rmap_item; | ||
933 | } | 949 | } |
934 | if (!tree_rmap_item) | 950 | if (!hlist) |
935 | return NULL; | 951 | return NULL; |
936 | 952 | ||
937 | ret = memcmp_pages(kpage, tree_page); | 953 | ret = memcmp_pages(kpage, tree_page); |
@@ -952,13 +968,16 @@ static struct rmap_item *stable_tree_insert(struct page *kpage, | |||
952 | } | 968 | } |
953 | } | 969 | } |
954 | 970 | ||
955 | rmap_item->address |= NODE_FLAG | STABLE_FLAG; | 971 | stable_node = alloc_stable_node(); |
956 | rmap_item->next = NULL; | 972 | if (!stable_node) |
957 | rb_link_node(&rmap_item->node, parent, new); | 973 | return NULL; |
958 | rb_insert_color(&rmap_item->node, &root_stable_tree); | ||
959 | 974 | ||
960 | ksm_pages_shared++; | 975 | rb_link_node(&stable_node->node, parent, new); |
961 | return rmap_item; | 976 | rb_insert_color(&stable_node->node, &root_stable_tree); |
977 | |||
978 | INIT_HLIST_HEAD(&stable_node->hlist); | ||
979 | |||
980 | return stable_node; | ||
962 | } | 981 | } |
963 | 982 | ||
964 | /* | 983 | /* |
@@ -1018,7 +1037,7 @@ struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item, | |||
1018 | } | 1037 | } |
1019 | } | 1038 | } |
1020 | 1039 | ||
1021 | rmap_item->address |= NODE_FLAG; | 1040 | rmap_item->address |= UNSTABLE_FLAG; |
1022 | rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK); | 1041 | rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK); |
1023 | rb_link_node(&rmap_item->node, parent, new); | 1042 | rb_link_node(&rmap_item->node, parent, new); |
1024 | rb_insert_color(&rmap_item->node, &root_unstable_tree); | 1043 | rb_insert_color(&rmap_item->node, &root_unstable_tree); |
@@ -1033,18 +1052,16 @@ struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item, | |||
1033 | * the same ksm page. | 1052 | * the same ksm page. |
1034 | */ | 1053 | */ |
1035 | static void stable_tree_append(struct rmap_item *rmap_item, | 1054 | static void stable_tree_append(struct rmap_item *rmap_item, |
1036 | struct rmap_item *tree_rmap_item) | 1055 | struct stable_node *stable_node) |
1037 | { | 1056 | { |
1038 | rmap_item->next = tree_rmap_item->next; | 1057 | rmap_item->head = stable_node; |
1039 | rmap_item->prev = tree_rmap_item; | ||
1040 | |||
1041 | if (tree_rmap_item->next) | ||
1042 | tree_rmap_item->next->prev = rmap_item; | ||
1043 | |||
1044 | tree_rmap_item->next = rmap_item; | ||
1045 | rmap_item->address |= STABLE_FLAG; | 1058 | rmap_item->address |= STABLE_FLAG; |
1059 | hlist_add_head(&rmap_item->hlist, &stable_node->hlist); | ||
1046 | 1060 | ||
1047 | ksm_pages_sharing++; | 1061 | if (rmap_item->hlist.next) |
1062 | ksm_pages_sharing++; | ||
1063 | else | ||
1064 | ksm_pages_shared++; | ||
1048 | } | 1065 | } |
1049 | 1066 | ||
1050 | /* | 1067 | /* |
@@ -1060,6 +1077,7 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) | |||
1060 | { | 1077 | { |
1061 | struct rmap_item *tree_rmap_item; | 1078 | struct rmap_item *tree_rmap_item; |
1062 | struct page *tree_page = NULL; | 1079 | struct page *tree_page = NULL; |
1080 | struct stable_node *stable_node; | ||
1063 | struct page *kpage; | 1081 | struct page *kpage; |
1064 | unsigned int checksum; | 1082 | unsigned int checksum; |
1065 | int err; | 1083 | int err; |
@@ -1067,8 +1085,8 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) | |||
1067 | remove_rmap_item_from_tree(rmap_item); | 1085 | remove_rmap_item_from_tree(rmap_item); |
1068 | 1086 | ||
1069 | /* We first start with searching the page inside the stable tree */ | 1087 | /* We first start with searching the page inside the stable tree */ |
1070 | tree_rmap_item = stable_tree_search(page, &tree_page); | 1088 | stable_node = stable_tree_search(page, &tree_page); |
1071 | if (tree_rmap_item) { | 1089 | if (stable_node) { |
1072 | kpage = tree_page; | 1090 | kpage = tree_page; |
1073 | if (page == kpage) /* forked */ | 1091 | if (page == kpage) /* forked */ |
1074 | err = 0; | 1092 | err = 0; |
@@ -1080,7 +1098,7 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) | |||
1080 | * The page was successfully merged: | 1098 | * The page was successfully merged: |
1081 | * add its rmap_item to the stable tree. | 1099 | * add its rmap_item to the stable tree. |
1082 | */ | 1100 | */ |
1083 | stable_tree_append(rmap_item, tree_rmap_item); | 1101 | stable_tree_append(rmap_item, stable_node); |
1084 | } | 1102 | } |
1085 | put_page(kpage); | 1103 | put_page(kpage); |
1086 | return; | 1104 | return; |
@@ -1121,19 +1139,23 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) | |||
1121 | if (kpage) { | 1139 | if (kpage) { |
1122 | remove_rmap_item_from_tree(tree_rmap_item); | 1140 | remove_rmap_item_from_tree(tree_rmap_item); |
1123 | 1141 | ||
1142 | stable_node = stable_tree_insert(kpage); | ||
1143 | if (stable_node) { | ||
1144 | stable_tree_append(tree_rmap_item, stable_node); | ||
1145 | stable_tree_append(rmap_item, stable_node); | ||
1146 | } | ||
1147 | put_page(kpage); | ||
1148 | |||
1124 | /* | 1149 | /* |
1125 | * If we fail to insert the page into the stable tree, | 1150 | * If we fail to insert the page into the stable tree, |
1126 | * we will have 2 virtual addresses that are pointing | 1151 | * we will have 2 virtual addresses that are pointing |
1127 | * to a ksm page left outside the stable tree, | 1152 | * to a ksm page left outside the stable tree, |
1128 | * in which case we need to break_cow on both. | 1153 | * in which case we need to break_cow on both. |
1129 | */ | 1154 | */ |
1130 | if (stable_tree_insert(kpage, tree_rmap_item)) | 1155 | if (!stable_node) { |
1131 | stable_tree_append(rmap_item, tree_rmap_item); | ||
1132 | else { | ||
1133 | break_cow(tree_rmap_item); | 1156 | break_cow(tree_rmap_item); |
1134 | break_cow(rmap_item); | 1157 | break_cow(rmap_item); |
1135 | } | 1158 | } |
1136 | put_page(kpage); | ||
1137 | } | 1159 | } |
1138 | } | 1160 | } |
1139 | } | 1161 | } |