aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig3
-rw-r--r--mm/filemap.c31
-rw-r--r--mm/filemap.h4
-rw-r--r--mm/filemap_xip.c2
-rw-r--r--mm/memory_hotplug.c126
-rw-r--r--mm/page-writeback.c4
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/shmem.c4
-rw-r--r--mm/slab.c21
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/swap_state.c2
-rw-r--r--mm/vmscan.c39
12 files changed, 192 insertions, 54 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index e3644b0062b1..8f5b45615f7b 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -115,7 +115,8 @@ config SPARSEMEM_EXTREME
115# eventually, we can have this option just 'select SPARSEMEM' 115# eventually, we can have this option just 'select SPARSEMEM'
116config MEMORY_HOTPLUG 116config MEMORY_HOTPLUG
117 bool "Allow for memory hot-add" 117 bool "Allow for memory hot-add"
118 depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND 118 depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND && ARCH_ENABLE_MEMORY_HOTPLUG
119 depends on (IA64 || X86 || PPC64)
119 120
120comment "Memory hotplug is currently incompatible with Software Suspend" 121comment "Memory hotplug is currently incompatible with Software Suspend"
121 depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND 122 depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND
diff --git a/mm/filemap.c b/mm/filemap.c
index 9c7334bafda8..648f2c0c8e18 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2069,7 +2069,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2069{ 2069{
2070 struct file *file = iocb->ki_filp; 2070 struct file *file = iocb->ki_filp;
2071 struct address_space * mapping = file->f_mapping; 2071 struct address_space * mapping = file->f_mapping;
2072 struct address_space_operations *a_ops = mapping->a_ops; 2072 const struct address_space_operations *a_ops = mapping->a_ops;
2073 struct inode *inode = mapping->host; 2073 struct inode *inode = mapping->host;
2074 long status = 0; 2074 long status = 0;
2075 struct page *page; 2075 struct page *page;
@@ -2095,14 +2095,21 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2095 do { 2095 do {
2096 unsigned long index; 2096 unsigned long index;
2097 unsigned long offset; 2097 unsigned long offset;
2098 unsigned long maxlen;
2099 size_t copied; 2098 size_t copied;
2100 2099
2101 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ 2100 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
2102 index = pos >> PAGE_CACHE_SHIFT; 2101 index = pos >> PAGE_CACHE_SHIFT;
2103 bytes = PAGE_CACHE_SIZE - offset; 2102 bytes = PAGE_CACHE_SIZE - offset;
2104 if (bytes > count) 2103
2105 bytes = count; 2104 /* Limit the size of the copy to the caller's write size */
2105 bytes = min(bytes, count);
2106
2107 /*
2108 * Limit the size of the copy to that of the current segment,
2109 * because fault_in_pages_readable() doesn't know how to walk
2110 * segments.
2111 */
2112 bytes = min(bytes, cur_iov->iov_len - iov_base);
2106 2113
2107 /* 2114 /*
2108 * Bring in the user page that we will copy from _first_. 2115 * Bring in the user page that we will copy from _first_.
@@ -2110,10 +2117,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2110 * same page as we're writing to, without it being marked 2117 * same page as we're writing to, without it being marked
2111 * up-to-date. 2118 * up-to-date.
2112 */ 2119 */
2113 maxlen = cur_iov->iov_len - iov_base; 2120 fault_in_pages_readable(buf, bytes);
2114 if (maxlen > bytes)
2115 maxlen = bytes;
2116 fault_in_pages_readable(buf, maxlen);
2117 2121
2118 page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); 2122 page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
2119 if (!page) { 2123 if (!page) {
@@ -2121,6 +2125,12 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2121 break; 2125 break;
2122 } 2126 }
2123 2127
2128 if (unlikely(bytes == 0)) {
2129 status = 0;
2130 copied = 0;
2131 goto zero_length_segment;
2132 }
2133
2124 status = a_ops->prepare_write(file, page, offset, offset+bytes); 2134 status = a_ops->prepare_write(file, page, offset, offset+bytes);
2125 if (unlikely(status)) { 2135 if (unlikely(status)) {
2126 loff_t isize = i_size_read(inode); 2136 loff_t isize = i_size_read(inode);
@@ -2150,7 +2160,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2150 page_cache_release(page); 2160 page_cache_release(page);
2151 continue; 2161 continue;
2152 } 2162 }
2153 if (likely(copied > 0)) { 2163zero_length_segment:
2164 if (likely(copied >= 0)) {
2154 if (!status) 2165 if (!status)
2155 status = copied; 2166 status = copied;
2156 2167
@@ -2215,7 +2226,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
2215 unsigned long nr_segs, loff_t *ppos) 2226 unsigned long nr_segs, loff_t *ppos)
2216{ 2227{
2217 struct file *file = iocb->ki_filp; 2228 struct file *file = iocb->ki_filp;
2218 struct address_space * mapping = file->f_mapping; 2229 const struct address_space * mapping = file->f_mapping;
2219 size_t ocount; /* original count */ 2230 size_t ocount; /* original count */
2220 size_t count; /* after file limit checks */ 2231 size_t count; /* after file limit checks */
2221 struct inode *inode = mapping->host; 2232 struct inode *inode = mapping->host;
diff --git a/mm/filemap.h b/mm/filemap.h
index 536979fb4ba7..3f2a343c6015 100644
--- a/mm/filemap.h
+++ b/mm/filemap.h
@@ -88,7 +88,7 @@ filemap_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
88 const struct iovec *iov = *iovp; 88 const struct iovec *iov = *iovp;
89 size_t base = *basep; 89 size_t base = *basep;
90 90
91 while (bytes) { 91 do {
92 int copy = min(bytes, iov->iov_len - base); 92 int copy = min(bytes, iov->iov_len - base);
93 93
94 bytes -= copy; 94 bytes -= copy;
@@ -97,7 +97,7 @@ filemap_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
97 iov++; 97 iov++;
98 base = 0; 98 base = 0;
99 } 99 }
100 } 100 } while (bytes);
101 *iovp = iov; 101 *iovp = iov;
102 *basep = base; 102 *basep = base;
103} 103}
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index b960ac8e5918..b4fd0d7c9bfb 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -273,7 +273,7 @@ __xip_file_write(struct file *filp, const char __user *buf,
273 size_t count, loff_t pos, loff_t *ppos) 273 size_t count, loff_t pos, loff_t *ppos)
274{ 274{
275 struct address_space * mapping = filp->f_mapping; 275 struct address_space * mapping = filp->f_mapping;
276 struct address_space_operations *a_ops = mapping->a_ops; 276 const struct address_space_operations *a_ops = mapping->a_ops;
277 struct inode *inode = mapping->host; 277 struct inode *inode = mapping->host;
278 long status = 0; 278 long status = 0;
279 struct page *page; 279 struct page *page;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 841a077d5aeb..ea4038838b0a 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -21,6 +21,7 @@
21#include <linux/memory_hotplug.h> 21#include <linux/memory_hotplug.h>
22#include <linux/highmem.h> 22#include <linux/highmem.h>
23#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
24#include <linux/ioport.h>
24 25
25#include <asm/tlbflush.h> 26#include <asm/tlbflush.h>
26 27
@@ -126,6 +127,9 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
126 unsigned long i; 127 unsigned long i;
127 unsigned long flags; 128 unsigned long flags;
128 unsigned long onlined_pages = 0; 129 unsigned long onlined_pages = 0;
130 struct resource res;
131 u64 section_end;
132 unsigned long start_pfn;
129 struct zone *zone; 133 struct zone *zone;
130 int need_zonelists_rebuild = 0; 134 int need_zonelists_rebuild = 0;
131 135
@@ -148,10 +152,27 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
148 if (!populated_zone(zone)) 152 if (!populated_zone(zone))
149 need_zonelists_rebuild = 1; 153 need_zonelists_rebuild = 1;
150 154
151 for (i = 0; i < nr_pages; i++) { 155 res.start = (u64)pfn << PAGE_SHIFT;
152 struct page *page = pfn_to_page(pfn + i); 156 res.end = res.start + ((u64)nr_pages << PAGE_SHIFT) - 1;
153 online_page(page); 157 res.flags = IORESOURCE_MEM; /* we just need system ram */
154 onlined_pages++; 158 section_end = res.end;
159
160 while (find_next_system_ram(&res) >= 0) {
161 start_pfn = (unsigned long)(res.start >> PAGE_SHIFT);
162 nr_pages = (unsigned long)
163 ((res.end + 1 - res.start) >> PAGE_SHIFT);
164
165 if (PageReserved(pfn_to_page(start_pfn))) {
166 /* this region's page is not onlined now */
167 for (i = 0; i < nr_pages; i++) {
168 struct page *page = pfn_to_page(start_pfn + i);
169 online_page(page);
170 onlined_pages++;
171 }
172 }
173
174 res.start = res.end + 1;
175 res.end = section_end;
155 } 176 }
156 zone->present_pages += onlined_pages; 177 zone->present_pages += onlined_pages;
157 zone->zone_pgdat->node_present_pages += onlined_pages; 178 zone->zone_pgdat->node_present_pages += onlined_pages;
@@ -163,3 +184,100 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
163 vm_total_pages = nr_free_pagecache_pages(); 184 vm_total_pages = nr_free_pagecache_pages();
164 return 0; 185 return 0;
165} 186}
187
188static pg_data_t *hotadd_new_pgdat(int nid, u64 start)
189{
190 struct pglist_data *pgdat;
191 unsigned long zones_size[MAX_NR_ZONES] = {0};
192 unsigned long zholes_size[MAX_NR_ZONES] = {0};
193 unsigned long start_pfn = start >> PAGE_SHIFT;
194
195 pgdat = arch_alloc_nodedata(nid);
196 if (!pgdat)
197 return NULL;
198
199 arch_refresh_nodedata(nid, pgdat);
200
201 /* we can use NODE_DATA(nid) from here */
202
203 /* init node's zones as empty zones, we don't have any present pages.*/
204 free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size);
205
206 return pgdat;
207}
208
209static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
210{
211 arch_refresh_nodedata(nid, NULL);
212 arch_free_nodedata(pgdat);
213 return;
214}
215
216/* add this memory to iomem resource */
217static void register_memory_resource(u64 start, u64 size)
218{
219 struct resource *res;
220
221 res = kzalloc(sizeof(struct resource), GFP_KERNEL);
222 BUG_ON(!res);
223
224 res->name = "System RAM";
225 res->start = start;
226 res->end = start + size - 1;
227 res->flags = IORESOURCE_MEM;
228 if (request_resource(&iomem_resource, res) < 0) {
229 printk("System RAM resource %llx - %llx cannot be added\n",
230 (unsigned long long)res->start, (unsigned long long)res->end);
231 kfree(res);
232 }
233}
234
235
236
237int add_memory(int nid, u64 start, u64 size)
238{
239 pg_data_t *pgdat = NULL;
240 int new_pgdat = 0;
241 int ret;
242
243 if (!node_online(nid)) {
244 pgdat = hotadd_new_pgdat(nid, start);
245 if (!pgdat)
246 return -ENOMEM;
247 new_pgdat = 1;
248 ret = kswapd_run(nid);
249 if (ret)
250 goto error;
251 }
252
253 /* call arch's memory hotadd */
254 ret = arch_add_memory(nid, start, size);
255
256 if (ret < 0)
257 goto error;
258
259 /* we online node here. we can't roll back from here. */
260 node_set_online(nid);
261
262 if (new_pgdat) {
263 ret = register_one_node(nid);
264 /*
265 * If sysfs file of new node can't create, cpu on the node
266 * can't be hot-added. There is no rollback way now.
267 * So, check by BUG_ON() to catch it reluctantly..
268 */
269 BUG_ON(ret);
270 }
271
272 /* register this memory as resource */
273 register_memory_resource(start, size);
274
275 return ret;
276error:
277 /* rollback pgdat allocation and others */
278 if (new_pgdat)
279 rollback_node_hotadd(nid, pgdat);
280
281 return ret;
282}
283EXPORT_SYMBOL_GPL(add_memory);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 8ccf6f1b1473..4ec7026c7bab 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -516,14 +516,14 @@ static void set_ratelimit(void)
516 ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE; 516 ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE;
517} 517}
518 518
519static int 519static int __cpuinit
520ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) 520ratelimit_handler(struct notifier_block *self, unsigned long u, void *v)
521{ 521{
522 set_ratelimit(); 522 set_ratelimit();
523 return 0; 523 return 0;
524} 524}
525 525
526static struct notifier_block ratelimit_nb = { 526static struct notifier_block __cpuinitdata ratelimit_nb = {
527 .notifier_call = ratelimit_handler, 527 .notifier_call = ratelimit_handler,
528 .next = NULL, 528 .next = NULL,
529}; 529};
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9f86191bb632..084a2de7e52a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -446,8 +446,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
446 446
447 arch_free_page(page, order); 447 arch_free_page(page, order);
448 if (!PageHighMem(page)) 448 if (!PageHighMem(page))
449 mutex_debug_check_no_locks_freed(page_address(page), 449 debug_check_no_locks_freed(page_address(page),
450 PAGE_SIZE<<order); 450 PAGE_SIZE<<order);
451 451
452 for (i = 0 ; i < (1 << order) ; ++i) 452 for (i = 0 ; i < (1 << order) ; ++i)
453 reserved += free_pages_check(page + i); 453 reserved += free_pages_check(page + i);
@@ -2009,7 +2009,7 @@ static inline void free_zone_pagesets(int cpu)
2009 } 2009 }
2010} 2010}
2011 2011
2012static int pageset_cpuup_callback(struct notifier_block *nfb, 2012static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
2013 unsigned long action, 2013 unsigned long action,
2014 void *hcpu) 2014 void *hcpu)
2015{ 2015{
@@ -2031,7 +2031,7 @@ static int pageset_cpuup_callback(struct notifier_block *nfb,
2031 return ret; 2031 return ret;
2032} 2032}
2033 2033
2034static struct notifier_block pageset_notifier = 2034static struct notifier_block __cpuinitdata pageset_notifier =
2035 { &pageset_cpuup_callback, NULL, 0 }; 2035 { &pageset_cpuup_callback, NULL, 0 };
2036 2036
2037void __init setup_per_cpu_pageset(void) 2037void __init setup_per_cpu_pageset(void)
diff --git a/mm/shmem.c b/mm/shmem.c
index 38bc3334f263..ea64c07cbe72 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -174,7 +174,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
174} 174}
175 175
176static struct super_operations shmem_ops; 176static struct super_operations shmem_ops;
177static struct address_space_operations shmem_aops; 177static const struct address_space_operations shmem_aops;
178static struct file_operations shmem_file_operations; 178static struct file_operations shmem_file_operations;
179static struct inode_operations shmem_inode_operations; 179static struct inode_operations shmem_inode_operations;
180static struct inode_operations shmem_dir_inode_operations; 180static struct inode_operations shmem_dir_inode_operations;
@@ -2162,7 +2162,7 @@ static void destroy_inodecache(void)
2162 printk(KERN_INFO "shmem_inode_cache: not all structures were freed\n"); 2162 printk(KERN_INFO "shmem_inode_cache: not all structures were freed\n");
2163} 2163}
2164 2164
2165static struct address_space_operations shmem_aops = { 2165static const struct address_space_operations shmem_aops = {
2166 .writepage = shmem_writepage, 2166 .writepage = shmem_writepage,
2167 .set_page_dirty = __set_page_dirty_nobuffers, 2167 .set_page_dirty = __set_page_dirty_nobuffers,
2168#ifdef CONFIG_TMPFS 2168#ifdef CONFIG_TMPFS
diff --git a/mm/slab.c b/mm/slab.c
index 98ac20bc0de9..233e39d14caf 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -89,6 +89,7 @@
89#include <linux/config.h> 89#include <linux/config.h>
90#include <linux/slab.h> 90#include <linux/slab.h>
91#include <linux/mm.h> 91#include <linux/mm.h>
92#include <linux/poison.h>
92#include <linux/swap.h> 93#include <linux/swap.h>
93#include <linux/cache.h> 94#include <linux/cache.h>
94#include <linux/interrupt.h> 95#include <linux/interrupt.h>
@@ -106,6 +107,7 @@
106#include <linux/nodemask.h> 107#include <linux/nodemask.h>
107#include <linux/mempolicy.h> 108#include <linux/mempolicy.h>
108#include <linux/mutex.h> 109#include <linux/mutex.h>
110#include <linux/rtmutex.h>
109 111
110#include <asm/uaccess.h> 112#include <asm/uaccess.h>
111#include <asm/cacheflush.h> 113#include <asm/cacheflush.h>
@@ -492,17 +494,6 @@ struct kmem_cache {
492#endif 494#endif
493 495
494#if DEBUG 496#if DEBUG
495/*
496 * Magic nums for obj red zoning.
497 * Placed in the first word before and the first word after an obj.
498 */
499#define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */
500#define RED_ACTIVE 0x170FC2A5UL /* when obj is active */
501
502/* ...and for poisoning */
503#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */
504#define POISON_FREE 0x6b /* for use-after-free poisoning */
505#define POISON_END 0xa5 /* end-byte of poisoning */
506 497
507/* 498/*
508 * memory layout of objects: 499 * memory layout of objects:
@@ -1083,7 +1074,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1083 1074
1084#endif 1075#endif
1085 1076
1086static int cpuup_callback(struct notifier_block *nfb, 1077static int __devinit cpuup_callback(struct notifier_block *nfb,
1087 unsigned long action, void *hcpu) 1078 unsigned long action, void *hcpu)
1088{ 1079{
1089 long cpu = (long)hcpu; 1080 long cpu = (long)hcpu;
@@ -1265,7 +1256,9 @@ bad:
1265 return NOTIFY_BAD; 1256 return NOTIFY_BAD;
1266} 1257}
1267 1258
1268static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 }; 1259static struct notifier_block __cpuinitdata cpucache_notifier = {
1260 &cpuup_callback, NULL, 0
1261};
1269 1262
1270/* 1263/*
1271 * swap the static kmem_list3 with kmalloced memory 1264 * swap the static kmem_list3 with kmalloced memory
@@ -3405,7 +3398,7 @@ void kfree(const void *objp)
3405 local_irq_save(flags); 3398 local_irq_save(flags);
3406 kfree_debugcheck(objp); 3399 kfree_debugcheck(objp);
3407 c = virt_to_cache(objp); 3400 c = virt_to_cache(objp);
3408 mutex_debug_check_no_locks_freed(objp, obj_size(c)); 3401 debug_check_no_locks_freed(objp, obj_size(c));
3409 __cache_free(c, (void *)objp); 3402 __cache_free(c, (void *)objp);
3410 local_irq_restore(flags); 3403 local_irq_restore(flags);
3411} 3404}
diff --git a/mm/sparse.c b/mm/sparse.c
index e0a3fe48aa37..c7a2b3a0e46b 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -45,7 +45,7 @@ static struct mem_section *sparse_index_alloc(int nid)
45 45
46static int sparse_index_init(unsigned long section_nr, int nid) 46static int sparse_index_init(unsigned long section_nr, int nid)
47{ 47{
48 static spinlock_t index_init_lock = SPIN_LOCK_UNLOCKED; 48 static DEFINE_SPINLOCK(index_init_lock);
49 unsigned long root = SECTION_NR_TO_ROOT(section_nr); 49 unsigned long root = SECTION_NR_TO_ROOT(section_nr);
50 struct mem_section *section; 50 struct mem_section *section;
51 int ret = 0; 51 int ret = 0;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e0e1583f32c2..7535211bb495 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -24,7 +24,7 @@
24 * vmscan's shrink_list, to make sync_page look nicer, and to allow 24 * vmscan's shrink_list, to make sync_page look nicer, and to allow
25 * future use of radix_tree tags in the swap cache. 25 * future use of radix_tree tags in the swap cache.
26 */ 26 */
27static struct address_space_operations swap_aops = { 27static const struct address_space_operations swap_aops = {
28 .writepage = swap_writepage, 28 .writepage = swap_writepage,
29 .sync_page = block_sync_page, 29 .sync_page = block_sync_page,
30 .set_page_dirty = __set_page_dirty_nobuffers, 30 .set_page_dirty = __set_page_dirty_nobuffers,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 72babac71dea..eeacb0d695c3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -34,6 +34,7 @@
34#include <linux/notifier.h> 34#include <linux/notifier.h>
35#include <linux/rwsem.h> 35#include <linux/rwsem.h>
36#include <linux/delay.h> 36#include <linux/delay.h>
37#include <linux/kthread.h>
37 38
38#include <asm/tlbflush.h> 39#include <asm/tlbflush.h>
39#include <asm/div64.h> 40#include <asm/div64.h>
@@ -1223,7 +1224,6 @@ static int kswapd(void *p)
1223 }; 1224 };
1224 cpumask_t cpumask; 1225 cpumask_t cpumask;
1225 1226
1226 daemonize("kswapd%d", pgdat->node_id);
1227 cpumask = node_to_cpumask(pgdat->node_id); 1227 cpumask = node_to_cpumask(pgdat->node_id);
1228 if (!cpus_empty(cpumask)) 1228 if (!cpus_empty(cpumask))
1229 set_cpus_allowed(tsk, cpumask); 1229 set_cpus_allowed(tsk, cpumask);
@@ -1450,7 +1450,7 @@ out:
1450 not required for correctness. So if the last cpu in a node goes 1450 not required for correctness. So if the last cpu in a node goes
1451 away, we get changed to run anywhere: as the first one comes back, 1451 away, we get changed to run anywhere: as the first one comes back,
1452 restore their cpu bindings. */ 1452 restore their cpu bindings. */
1453static int cpu_callback(struct notifier_block *nfb, 1453static int __devinit cpu_callback(struct notifier_block *nfb,
1454 unsigned long action, void *hcpu) 1454 unsigned long action, void *hcpu)
1455{ 1455{
1456 pg_data_t *pgdat; 1456 pg_data_t *pgdat;
@@ -1468,20 +1468,35 @@ static int cpu_callback(struct notifier_block *nfb,
1468} 1468}
1469#endif /* CONFIG_HOTPLUG_CPU */ 1469#endif /* CONFIG_HOTPLUG_CPU */
1470 1470
1471/*
1472 * This kswapd start function will be called by init and node-hot-add.
1473 * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
1474 */
1475int kswapd_run(int nid)
1476{
1477 pg_data_t *pgdat = NODE_DATA(nid);
1478 int ret = 0;
1479
1480 if (pgdat->kswapd)
1481 return 0;
1482
1483 pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
1484 if (IS_ERR(pgdat->kswapd)) {
1485 /* failure at boot is fatal */
1486 BUG_ON(system_state == SYSTEM_BOOTING);
1487 printk("Failed to start kswapd on node %d\n",nid);
1488 ret = -1;
1489 }
1490 return ret;
1491}
1492
1471static int __init kswapd_init(void) 1493static int __init kswapd_init(void)
1472{ 1494{
1473 pg_data_t *pgdat; 1495 int nid;
1474 1496
1475 swap_setup(); 1497 swap_setup();
1476 for_each_online_pgdat(pgdat) { 1498 for_each_online_node(nid)
1477 pid_t pid; 1499 kswapd_run(nid);
1478
1479 pid = kernel_thread(kswapd, pgdat, CLONE_KERNEL);
1480 BUG_ON(pid < 0);
1481 read_lock(&tasklist_lock);
1482 pgdat->kswapd = find_task_by_pid(pid);
1483 read_unlock(&tasklist_lock);
1484 }
1485 hotcpu_notifier(cpu_callback, 0); 1500 hotcpu_notifier(cpu_callback, 0);
1486 return 0; 1501 return 0;
1487} 1502}