aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2008-12-01 16:13:48 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2008-12-01 22:55:24 -0500
commitdc19f9db38295f811d9041bd89b113beccbd763a (patch)
tree6f1ce3a71df84981b4b5b70fd03f0d1fe20b196e
parentb29acbdcf877009af3f1fc0750bcac314c51e055 (diff)
memcg: memory hotplug fix for notifier callback
Fixes for memcg/memory hotplug. While memory hotplug allocate/free memmap, page_cgroup doesn't free page_cgroup at OFFLINE when page_cgroup is allocated via bootomem. (Because freeing bootmem requires special care.) Then, if page_cgroup is allocated by bootmem and memmap is freed/allocated by memory hotplug, page_cgroup->page == page is no longer true. But current MEM_ONLINE handler doesn't check it and update page_cgroup->page if it's not necessary to allocate page_cgroup. (This was not found because memmap is not freed if SPARSEMEM_VMEMMAP is y.) And I noticed that MEM_ONLINE can be called against "part of section". So, freeing page_cgroup at CANCEL_ONLINE will cause trouble. (freeing used page_cgroup) Don't rollback at CANCEL. One more, current memory hotplug notifier is stopped by slub because it sets NOTIFY_STOP_MASK to return vaule. So, page_cgroup's callback never be called. (low priority than slub now.) I think this slub's behavior is not intentional(BUG). and fixes it. Another way to be considered about page_cgroup allocation: - free page_cgroup at OFFLINE even if it's from bootmem and remove specieal handler. But it requires more changes. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=12041 Signed-off-by: KAMEZAWA Hiruyoki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Pavel Emelyanov <xemul@openvz.org> Tested-by: Badari Pulavarty <pbadari@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/page_cgroup.c43
-rw-r--r--mm/slub.c6
2 files changed, 33 insertions, 16 deletions
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 436c00229e70..0b3cbf090a67 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -107,19 +107,29 @@ int __init_refok init_section_page_cgroup(unsigned long pfn)
107 107
108 section = __pfn_to_section(pfn); 108 section = __pfn_to_section(pfn);
109 109
110 if (section->page_cgroup) 110 if (!section->page_cgroup) {
111 return 0; 111 nid = page_to_nid(pfn_to_page(pfn));
112 112 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
113 nid = page_to_nid(pfn_to_page(pfn)); 113 if (slab_is_available()) {
114 114 base = kmalloc_node(table_size, GFP_KERNEL, nid);
115 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; 115 if (!base)
116 if (slab_is_available()) { 116 base = vmalloc_node(table_size, nid);
117 base = kmalloc_node(table_size, GFP_KERNEL, nid); 117 } else {
118 if (!base) 118 base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
119 base = vmalloc_node(table_size, nid); 119 table_size,
120 } else {
121 base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), table_size,
122 PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); 120 PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
121 }
122 } else {
123 /*
124 * We don't have to allocate page_cgroup again, but
125 * address of memmap may be changed. So, we have to initialize
126 * again.
127 */
128 base = section->page_cgroup + pfn;
129 table_size = 0;
130 /* check address of memmap is changed or not. */
131 if (base->page == pfn_to_page(pfn))
132 return 0;
123 } 133 }
124 134
125 if (!base) { 135 if (!base) {
@@ -208,18 +218,23 @@ static int __meminit page_cgroup_callback(struct notifier_block *self,
208 ret = online_page_cgroup(mn->start_pfn, 218 ret = online_page_cgroup(mn->start_pfn,
209 mn->nr_pages, mn->status_change_nid); 219 mn->nr_pages, mn->status_change_nid);
210 break; 220 break;
211 case MEM_CANCEL_ONLINE:
212 case MEM_OFFLINE: 221 case MEM_OFFLINE:
213 offline_page_cgroup(mn->start_pfn, 222 offline_page_cgroup(mn->start_pfn,
214 mn->nr_pages, mn->status_change_nid); 223 mn->nr_pages, mn->status_change_nid);
215 break; 224 break;
225 case MEM_CANCEL_ONLINE:
216 case MEM_GOING_OFFLINE: 226 case MEM_GOING_OFFLINE:
217 break; 227 break;
218 case MEM_ONLINE: 228 case MEM_ONLINE:
219 case MEM_CANCEL_OFFLINE: 229 case MEM_CANCEL_OFFLINE:
220 break; 230 break;
221 } 231 }
222 ret = notifier_from_errno(ret); 232
233 if (ret)
234 ret = notifier_from_errno(ret);
235 else
236 ret = NOTIFY_OK;
237
223 return ret; 238 return ret;
224} 239}
225 240
diff --git a/mm/slub.c b/mm/slub.c
index 7ad489af9561..749588a50a5a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2931,8 +2931,10 @@ static int slab_memory_callback(struct notifier_block *self,
2931 case MEM_CANCEL_OFFLINE: 2931 case MEM_CANCEL_OFFLINE:
2932 break; 2932 break;
2933 } 2933 }
2934 2934 if (ret)
2935 ret = notifier_from_errno(ret); 2935 ret = notifier_from_errno(ret);
2936 else
2937 ret = NOTIFY_OK;
2936 return ret; 2938 return ret;
2937} 2939}
2938 2940