aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig9
-rw-r--r--mm/filemap.c31
-rw-r--r--mm/filemap.h4
-rw-r--r--mm/filemap_xip.c2
-rw-r--r--mm/memory_hotplug.c126
-rw-r--r--mm/page-writeback.c4
-rw-r--r--mm/page_alloc.c10
-rw-r--r--mm/readahead.c4
-rw-r--r--mm/shmem.c4
-rw-r--r--mm/slab.c21
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/swap_state.c2
-rw-r--r--mm/vmscan.c39
13 files changed, 201 insertions, 57 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 66e65ab3942..8f5b45615f7 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -115,7 +115,8 @@ config SPARSEMEM_EXTREME
115# eventually, we can have this option just 'select SPARSEMEM' 115# eventually, we can have this option just 'select SPARSEMEM'
116config MEMORY_HOTPLUG 116config MEMORY_HOTPLUG
117 bool "Allow for memory hot-add" 117 bool "Allow for memory hot-add"
118 depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND 118 depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND && ARCH_ENABLE_MEMORY_HOTPLUG
119 depends on (IA64 || X86 || PPC64)
119 120
120comment "Memory hotplug is currently incompatible with Software Suspend" 121comment "Memory hotplug is currently incompatible with Software Suspend"
121 depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND 122 depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND
@@ -145,3 +146,9 @@ config MIGRATION
145 while the virtual addresses are not changed. This is useful for 146 while the virtual addresses are not changed. This is useful for
146 example on NUMA systems to put pages nearer to the processors accessing 147 example on NUMA systems to put pages nearer to the processors accessing
147 the page. 148 the page.
149
150config RESOURCES_64BIT
151 bool "64 bit Memory and IO resources (EXPERIMENTAL)" if (!64BIT && EXPERIMENTAL)
152 default 64BIT
153 help
154 This option allows memory and IO resources to be 64 bit.
diff --git a/mm/filemap.c b/mm/filemap.c
index 9c7334bafda..648f2c0c8e1 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2069,7 +2069,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2069{ 2069{
2070 struct file *file = iocb->ki_filp; 2070 struct file *file = iocb->ki_filp;
2071 struct address_space * mapping = file->f_mapping; 2071 struct address_space * mapping = file->f_mapping;
2072 struct address_space_operations *a_ops = mapping->a_ops; 2072 const struct address_space_operations *a_ops = mapping->a_ops;
2073 struct inode *inode = mapping->host; 2073 struct inode *inode = mapping->host;
2074 long status = 0; 2074 long status = 0;
2075 struct page *page; 2075 struct page *page;
@@ -2095,14 +2095,21 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2095 do { 2095 do {
2096 unsigned long index; 2096 unsigned long index;
2097 unsigned long offset; 2097 unsigned long offset;
2098 unsigned long maxlen;
2099 size_t copied; 2098 size_t copied;
2100 2099
2101 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ 2100 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
2102 index = pos >> PAGE_CACHE_SHIFT; 2101 index = pos >> PAGE_CACHE_SHIFT;
2103 bytes = PAGE_CACHE_SIZE - offset; 2102 bytes = PAGE_CACHE_SIZE - offset;
2104 if (bytes > count) 2103
2105 bytes = count; 2104 /* Limit the size of the copy to the caller's write size */
2105 bytes = min(bytes, count);
2106
2107 /*
2108 * Limit the size of the copy to that of the current segment,
2109 * because fault_in_pages_readable() doesn't know how to walk
2110 * segments.
2111 */
2112 bytes = min(bytes, cur_iov->iov_len - iov_base);
2106 2113
2107 /* 2114 /*
2108 * Bring in the user page that we will copy from _first_. 2115 * Bring in the user page that we will copy from _first_.
@@ -2110,10 +2117,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2110 * same page as we're writing to, without it being marked 2117 * same page as we're writing to, without it being marked
2111 * up-to-date. 2118 * up-to-date.
2112 */ 2119 */
2113 maxlen = cur_iov->iov_len - iov_base; 2120 fault_in_pages_readable(buf, bytes);
2114 if (maxlen > bytes)
2115 maxlen = bytes;
2116 fault_in_pages_readable(buf, maxlen);
2117 2121
2118 page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); 2122 page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
2119 if (!page) { 2123 if (!page) {
@@ -2121,6 +2125,12 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2121 break; 2125 break;
2122 } 2126 }
2123 2127
2128 if (unlikely(bytes == 0)) {
2129 status = 0;
2130 copied = 0;
2131 goto zero_length_segment;
2132 }
2133
2124 status = a_ops->prepare_write(file, page, offset, offset+bytes); 2134 status = a_ops->prepare_write(file, page, offset, offset+bytes);
2125 if (unlikely(status)) { 2135 if (unlikely(status)) {
2126 loff_t isize = i_size_read(inode); 2136 loff_t isize = i_size_read(inode);
@@ -2150,7 +2160,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2150 page_cache_release(page); 2160 page_cache_release(page);
2151 continue; 2161 continue;
2152 } 2162 }
2153 if (likely(copied > 0)) { 2163zero_length_segment:
2164 if (likely(copied >= 0)) {
2154 if (!status) 2165 if (!status)
2155 status = copied; 2166 status = copied;
2156 2167
@@ -2215,7 +2226,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
2215 unsigned long nr_segs, loff_t *ppos) 2226 unsigned long nr_segs, loff_t *ppos)
2216{ 2227{
2217 struct file *file = iocb->ki_filp; 2228 struct file *file = iocb->ki_filp;
2218 struct address_space * mapping = file->f_mapping; 2229 const struct address_space * mapping = file->f_mapping;
2219 size_t ocount; /* original count */ 2230 size_t ocount; /* original count */
2220 size_t count; /* after file limit checks */ 2231 size_t count; /* after file limit checks */
2221 struct inode *inode = mapping->host; 2232 struct inode *inode = mapping->host;
diff --git a/mm/filemap.h b/mm/filemap.h
index 536979fb4ba..3f2a343c601 100644
--- a/mm/filemap.h
+++ b/mm/filemap.h
@@ -88,7 +88,7 @@ filemap_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
88 const struct iovec *iov = *iovp; 88 const struct iovec *iov = *iovp;
89 size_t base = *basep; 89 size_t base = *basep;
90 90
91 while (bytes) { 91 do {
92 int copy = min(bytes, iov->iov_len - base); 92 int copy = min(bytes, iov->iov_len - base);
93 93
94 bytes -= copy; 94 bytes -= copy;
@@ -97,7 +97,7 @@ filemap_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
97 iov++; 97 iov++;
98 base = 0; 98 base = 0;
99 } 99 }
100 } 100 } while (bytes);
101 *iovp = iov; 101 *iovp = iov;
102 *basep = base; 102 *basep = base;
103} 103}
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index b960ac8e591..b4fd0d7c9bf 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -273,7 +273,7 @@ __xip_file_write(struct file *filp, const char __user *buf,
273 size_t count, loff_t pos, loff_t *ppos) 273 size_t count, loff_t pos, loff_t *ppos)
274{ 274{
275 struct address_space * mapping = filp->f_mapping; 275 struct address_space * mapping = filp->f_mapping;
276 struct address_space_operations *a_ops = mapping->a_ops; 276 const struct address_space_operations *a_ops = mapping->a_ops;
277 struct inode *inode = mapping->host; 277 struct inode *inode = mapping->host;
278 long status = 0; 278 long status = 0;
279 struct page *page; 279 struct page *page;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 841a077d5ae..ea4038838b0 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -21,6 +21,7 @@
21#include <linux/memory_hotplug.h> 21#include <linux/memory_hotplug.h>
22#include <linux/highmem.h> 22#include <linux/highmem.h>
23#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
24#include <linux/ioport.h>
24 25
25#include <asm/tlbflush.h> 26#include <asm/tlbflush.h>
26 27
@@ -126,6 +127,9 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
126 unsigned long i; 127 unsigned long i;
127 unsigned long flags; 128 unsigned long flags;
128 unsigned long onlined_pages = 0; 129 unsigned long onlined_pages = 0;
130 struct resource res;
131 u64 section_end;
132 unsigned long start_pfn;
129 struct zone *zone; 133 struct zone *zone;
130 int need_zonelists_rebuild = 0; 134 int need_zonelists_rebuild = 0;
131 135
@@ -148,10 +152,27 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
148 if (!populated_zone(zone)) 152 if (!populated_zone(zone))
149 need_zonelists_rebuild = 1; 153 need_zonelists_rebuild = 1;
150 154
151 for (i = 0; i < nr_pages; i++) { 155 res.start = (u64)pfn << PAGE_SHIFT;
152 struct page *page = pfn_to_page(pfn + i); 156 res.end = res.start + ((u64)nr_pages << PAGE_SHIFT) - 1;
153 online_page(page); 157 res.flags = IORESOURCE_MEM; /* we just need system ram */
154 onlined_pages++; 158 section_end = res.end;
159
160 while (find_next_system_ram(&res) >= 0) {
161 start_pfn = (unsigned long)(res.start >> PAGE_SHIFT);
162 nr_pages = (unsigned long)
163 ((res.end + 1 - res.start) >> PAGE_SHIFT);
164
165 if (PageReserved(pfn_to_page(start_pfn))) {
166 /* this region's page is not onlined now */
167 for (i = 0; i < nr_pages; i++) {
168 struct page *page = pfn_to_page(start_pfn + i);
169 online_page(page);
170 onlined_pages++;
171 }
172 }
173
174 res.start = res.end + 1;
175 res.end = section_end;
155 } 176 }
156 zone->present_pages += onlined_pages; 177 zone->present_pages += onlined_pages;
157 zone->zone_pgdat->node_present_pages += onlined_pages; 178 zone->zone_pgdat->node_present_pages += onlined_pages;
@@ -163,3 +184,100 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
163 vm_total_pages = nr_free_pagecache_pages(); 184 vm_total_pages = nr_free_pagecache_pages();
164 return 0; 185 return 0;
165} 186}
187
188static pg_data_t *hotadd_new_pgdat(int nid, u64 start)
189{
190 struct pglist_data *pgdat;
191 unsigned long zones_size[MAX_NR_ZONES] = {0};
192 unsigned long zholes_size[MAX_NR_ZONES] = {0};
193 unsigned long start_pfn = start >> PAGE_SHIFT;
194
195 pgdat = arch_alloc_nodedata(nid);
196 if (!pgdat)
197 return NULL;
198
199 arch_refresh_nodedata(nid, pgdat);
200
201 /* we can use NODE_DATA(nid) from here */
202
203 /* init node's zones as empty zones, we don't have any present pages.*/
204 free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size);
205
206 return pgdat;
207}
208
209static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
210{
211 arch_refresh_nodedata(nid, NULL);
212 arch_free_nodedata(pgdat);
213 return;
214}
215
216/* add this memory to iomem resource */
217static void register_memory_resource(u64 start, u64 size)
218{
219 struct resource *res;
220
221 res = kzalloc(sizeof(struct resource), GFP_KERNEL);
222 BUG_ON(!res);
223
224 res->name = "System RAM";
225 res->start = start;
226 res->end = start + size - 1;
227 res->flags = IORESOURCE_MEM;
228 if (request_resource(&iomem_resource, res) < 0) {
229 printk("System RAM resource %llx - %llx cannot be added\n",
230 (unsigned long long)res->start, (unsigned long long)res->end);
231 kfree(res);
232 }
233}
234
235
236
237int add_memory(int nid, u64 start, u64 size)
238{
239 pg_data_t *pgdat = NULL;
240 int new_pgdat = 0;
241 int ret;
242
243 if (!node_online(nid)) {
244 pgdat = hotadd_new_pgdat(nid, start);
245 if (!pgdat)
246 return -ENOMEM;
247 new_pgdat = 1;
248 ret = kswapd_run(nid);
249 if (ret)
250 goto error;
251 }
252
253 /* call arch's memory hotadd */
254 ret = arch_add_memory(nid, start, size);
255
256 if (ret < 0)
257 goto error;
258
259 /* we online node here. we can't roll back from here. */
260 node_set_online(nid);
261
262 if (new_pgdat) {
263 ret = register_one_node(nid);
264 /*
265 * If sysfs file of new node can't create, cpu on the node
266 * can't be hot-added. There is no rollback way now.
267 * So, check by BUG_ON() to catch it reluctantly..
268 */
269 BUG_ON(ret);
270 }
271
272 /* register this memory as resource */
273 register_memory_resource(start, size);
274
275 return ret;
276error:
277 /* rollback pgdat allocation and others */
278 if (new_pgdat)
279 rollback_node_hotadd(nid, pgdat);
280
281 return ret;
282}
283EXPORT_SYMBOL_GPL(add_memory);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 8ccf6f1b147..4ec7026c7ba 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -516,14 +516,14 @@ static void set_ratelimit(void)
516 ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE; 516 ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE;
517} 517}
518 518
519static int 519static int __cpuinit
520ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) 520ratelimit_handler(struct notifier_block *self, unsigned long u, void *v)
521{ 521{
522 set_ratelimit(); 522 set_ratelimit();
523 return 0; 523 return 0;
524} 524}
525 525
526static struct notifier_block ratelimit_nb = { 526static struct notifier_block __cpuinitdata ratelimit_nb = {
527 .notifier_call = ratelimit_handler, 527 .notifier_call = ratelimit_handler,
528 .next = NULL, 528 .next = NULL,
529}; 529};
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6c1174fcf52..084a2de7e52 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -266,7 +266,7 @@ static inline void rmv_page_order(struct page *page)
266 * satisfies the following equation: 266 * satisfies the following equation:
267 * P = B & ~(1 << O) 267 * P = B & ~(1 << O)
268 * 268 *
269 * Assumption: *_mem_map is contigious at least up to MAX_ORDER 269 * Assumption: *_mem_map is contiguous at least up to MAX_ORDER
270 */ 270 */
271static inline struct page * 271static inline struct page *
272__page_find_buddy(struct page *page, unsigned long page_idx, unsigned int order) 272__page_find_buddy(struct page *page, unsigned long page_idx, unsigned int order)
@@ -446,8 +446,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
446 446
447 arch_free_page(page, order); 447 arch_free_page(page, order);
448 if (!PageHighMem(page)) 448 if (!PageHighMem(page))
449 mutex_debug_check_no_locks_freed(page_address(page), 449 debug_check_no_locks_freed(page_address(page),
450 PAGE_SIZE<<order); 450 PAGE_SIZE<<order);
451 451
452 for (i = 0 ; i < (1 << order) ; ++i) 452 for (i = 0 ; i < (1 << order) ; ++i)
453 reserved += free_pages_check(page + i); 453 reserved += free_pages_check(page + i);
@@ -2009,7 +2009,7 @@ static inline void free_zone_pagesets(int cpu)
2009 } 2009 }
2010} 2010}
2011 2011
2012static int pageset_cpuup_callback(struct notifier_block *nfb, 2012static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
2013 unsigned long action, 2013 unsigned long action,
2014 void *hcpu) 2014 void *hcpu)
2015{ 2015{
@@ -2031,7 +2031,7 @@ static int pageset_cpuup_callback(struct notifier_block *nfb,
2031 return ret; 2031 return ret;
2032} 2032}
2033 2033
2034static struct notifier_block pageset_notifier = 2034static struct notifier_block __cpuinitdata pageset_notifier =
2035 { &pageset_cpuup_callback, NULL, 0 }; 2035 { &pageset_cpuup_callback, NULL, 0 };
2036 2036
2037void __init setup_per_cpu_pageset(void) 2037void __init setup_per_cpu_pageset(void)
diff --git a/mm/readahead.c b/mm/readahead.c
index e39e416860d..aa7ec424656 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -390,8 +390,8 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
390 * Read 'nr_to_read' pages starting at page 'offset'. If the flag 'block' 390 * Read 'nr_to_read' pages starting at page 'offset'. If the flag 'block'
391 * is set wait till the read completes. Otherwise attempt to read without 391 * is set wait till the read completes. Otherwise attempt to read without
392 * blocking. 392 * blocking.
393 * Returns 1 meaning 'success' if read is succesfull without switching off 393 * Returns 1 meaning 'success' if read is successful without switching off
394 * readhaead mode. Otherwise return failure. 394 * readahead mode. Otherwise return failure.
395 */ 395 */
396static int 396static int
397blockable_page_cache_readahead(struct address_space *mapping, struct file *filp, 397blockable_page_cache_readahead(struct address_space *mapping, struct file *filp,
diff --git a/mm/shmem.c b/mm/shmem.c
index 355904712a8..b14ff817d16 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -173,7 +173,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
173} 173}
174 174
175static struct super_operations shmem_ops; 175static struct super_operations shmem_ops;
176static struct address_space_operations shmem_aops; 176static const struct address_space_operations shmem_aops;
177static struct file_operations shmem_file_operations; 177static struct file_operations shmem_file_operations;
178static struct inode_operations shmem_inode_operations; 178static struct inode_operations shmem_inode_operations;
179static struct inode_operations shmem_dir_inode_operations; 179static struct inode_operations shmem_dir_inode_operations;
@@ -2161,7 +2161,7 @@ static void destroy_inodecache(void)
2161 printk(KERN_INFO "shmem_inode_cache: not all structures were freed\n"); 2161 printk(KERN_INFO "shmem_inode_cache: not all structures were freed\n");
2162} 2162}
2163 2163
2164static struct address_space_operations shmem_aops = { 2164static const struct address_space_operations shmem_aops = {
2165 .writepage = shmem_writepage, 2165 .writepage = shmem_writepage,
2166 .set_page_dirty = __set_page_dirty_nobuffers, 2166 .set_page_dirty = __set_page_dirty_nobuffers,
2167#ifdef CONFIG_TMPFS 2167#ifdef CONFIG_TMPFS
diff --git a/mm/slab.c b/mm/slab.c
index 98ac20bc0de..233e39d14ca 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -89,6 +89,7 @@
89#include <linux/config.h> 89#include <linux/config.h>
90#include <linux/slab.h> 90#include <linux/slab.h>
91#include <linux/mm.h> 91#include <linux/mm.h>
92#include <linux/poison.h>
92#include <linux/swap.h> 93#include <linux/swap.h>
93#include <linux/cache.h> 94#include <linux/cache.h>
94#include <linux/interrupt.h> 95#include <linux/interrupt.h>
@@ -106,6 +107,7 @@
106#include <linux/nodemask.h> 107#include <linux/nodemask.h>
107#include <linux/mempolicy.h> 108#include <linux/mempolicy.h>
108#include <linux/mutex.h> 109#include <linux/mutex.h>
110#include <linux/rtmutex.h>
109 111
110#include <asm/uaccess.h> 112#include <asm/uaccess.h>
111#include <asm/cacheflush.h> 113#include <asm/cacheflush.h>
@@ -492,17 +494,6 @@ struct kmem_cache {
492#endif 494#endif
493 495
494#if DEBUG 496#if DEBUG
495/*
496 * Magic nums for obj red zoning.
497 * Placed in the first word before and the first word after an obj.
498 */
499#define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */
500#define RED_ACTIVE 0x170FC2A5UL /* when obj is active */
501
502/* ...and for poisoning */
503#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */
504#define POISON_FREE 0x6b /* for use-after-free poisoning */
505#define POISON_END 0xa5 /* end-byte of poisoning */
506 497
507/* 498/*
508 * memory layout of objects: 499 * memory layout of objects:
@@ -1083,7 +1074,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1083 1074
1084#endif 1075#endif
1085 1076
1086static int cpuup_callback(struct notifier_block *nfb, 1077static int __devinit cpuup_callback(struct notifier_block *nfb,
1087 unsigned long action, void *hcpu) 1078 unsigned long action, void *hcpu)
1088{ 1079{
1089 long cpu = (long)hcpu; 1080 long cpu = (long)hcpu;
@@ -1265,7 +1256,9 @@ bad:
1265 return NOTIFY_BAD; 1256 return NOTIFY_BAD;
1266} 1257}
1267 1258
1268static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 }; 1259static struct notifier_block __cpuinitdata cpucache_notifier = {
1260 &cpuup_callback, NULL, 0
1261};
1269 1262
1270/* 1263/*
1271 * swap the static kmem_list3 with kmalloced memory 1264 * swap the static kmem_list3 with kmalloced memory
@@ -3405,7 +3398,7 @@ void kfree(const void *objp)
3405 local_irq_save(flags); 3398 local_irq_save(flags);
3406 kfree_debugcheck(objp); 3399 kfree_debugcheck(objp);
3407 c = virt_to_cache(objp); 3400 c = virt_to_cache(objp);
3408 mutex_debug_check_no_locks_freed(objp, obj_size(c)); 3401 debug_check_no_locks_freed(objp, obj_size(c));
3409 __cache_free(c, (void *)objp); 3402 __cache_free(c, (void *)objp);
3410 local_irq_restore(flags); 3403 local_irq_restore(flags);
3411} 3404}
diff --git a/mm/sparse.c b/mm/sparse.c
index e0a3fe48aa3..c7a2b3a0e46 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -45,7 +45,7 @@ static struct mem_section *sparse_index_alloc(int nid)
45 45
46static int sparse_index_init(unsigned long section_nr, int nid) 46static int sparse_index_init(unsigned long section_nr, int nid)
47{ 47{
48 static spinlock_t index_init_lock = SPIN_LOCK_UNLOCKED; 48 static DEFINE_SPINLOCK(index_init_lock);
49 unsigned long root = SECTION_NR_TO_ROOT(section_nr); 49 unsigned long root = SECTION_NR_TO_ROOT(section_nr);
50 struct mem_section *section; 50 struct mem_section *section;
51 int ret = 0; 51 int ret = 0;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e0e1583f32c..7535211bb49 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -24,7 +24,7 @@
24 * vmscan's shrink_list, to make sync_page look nicer, and to allow 24 * vmscan's shrink_list, to make sync_page look nicer, and to allow
25 * future use of radix_tree tags in the swap cache. 25 * future use of radix_tree tags in the swap cache.
26 */ 26 */
27static struct address_space_operations swap_aops = { 27static const struct address_space_operations swap_aops = {
28 .writepage = swap_writepage, 28 .writepage = swap_writepage,
29 .sync_page = block_sync_page, 29 .sync_page = block_sync_page,
30 .set_page_dirty = __set_page_dirty_nobuffers, 30 .set_page_dirty = __set_page_dirty_nobuffers,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 72babac71de..eeacb0d695c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -34,6 +34,7 @@
34#include <linux/notifier.h> 34#include <linux/notifier.h>
35#include <linux/rwsem.h> 35#include <linux/rwsem.h>
36#include <linux/delay.h> 36#include <linux/delay.h>
37#include <linux/kthread.h>
37 38
38#include <asm/tlbflush.h> 39#include <asm/tlbflush.h>
39#include <asm/div64.h> 40#include <asm/div64.h>
@@ -1223,7 +1224,6 @@ static int kswapd(void *p)
1223 }; 1224 };
1224 cpumask_t cpumask; 1225 cpumask_t cpumask;
1225 1226
1226 daemonize("kswapd%d", pgdat->node_id);
1227 cpumask = node_to_cpumask(pgdat->node_id); 1227 cpumask = node_to_cpumask(pgdat->node_id);
1228 if (!cpus_empty(cpumask)) 1228 if (!cpus_empty(cpumask))
1229 set_cpus_allowed(tsk, cpumask); 1229 set_cpus_allowed(tsk, cpumask);
@@ -1450,7 +1450,7 @@ out:
1450 not required for correctness. So if the last cpu in a node goes 1450 not required for correctness. So if the last cpu in a node goes
1451 away, we get changed to run anywhere: as the first one comes back, 1451 away, we get changed to run anywhere: as the first one comes back,
1452 restore their cpu bindings. */ 1452 restore their cpu bindings. */
1453static int cpu_callback(struct notifier_block *nfb, 1453static int __devinit cpu_callback(struct notifier_block *nfb,
1454 unsigned long action, void *hcpu) 1454 unsigned long action, void *hcpu)
1455{ 1455{
1456 pg_data_t *pgdat; 1456 pg_data_t *pgdat;
@@ -1468,20 +1468,35 @@ static int cpu_callback(struct notifier_block *nfb,
1468} 1468}
1469#endif /* CONFIG_HOTPLUG_CPU */ 1469#endif /* CONFIG_HOTPLUG_CPU */
1470 1470
1471/*
1472 * This kswapd start function will be called by init and node-hot-add.
1473 * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
1474 */
1475int kswapd_run(int nid)
1476{
1477 pg_data_t *pgdat = NODE_DATA(nid);
1478 int ret = 0;
1479
1480 if (pgdat->kswapd)
1481 return 0;
1482
1483 pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
1484 if (IS_ERR(pgdat->kswapd)) {
1485 /* failure at boot is fatal */
1486 BUG_ON(system_state == SYSTEM_BOOTING);
1487 printk("Failed to start kswapd on node %d\n",nid);
1488 ret = -1;
1489 }
1490 return ret;
1491}
1492
1471static int __init kswapd_init(void) 1493static int __init kswapd_init(void)
1472{ 1494{
1473 pg_data_t *pgdat; 1495 int nid;
1474 1496
1475 swap_setup(); 1497 swap_setup();
1476 for_each_online_pgdat(pgdat) { 1498 for_each_online_node(nid)
1477 pid_t pid; 1499 kswapd_run(nid);
1478
1479 pid = kernel_thread(kswapd, pgdat, CLONE_KERNEL);
1480 BUG_ON(pid < 0);
1481 read_lock(&tasklist_lock);
1482 pgdat->kswapd = find_task_by_pid(pid);
1483 read_unlock(&tasklist_lock);
1484 }
1485 hotcpu_notifier(cpu_callback, 0); 1500 hotcpu_notifier(cpu_callback, 0);
1486 return 0; 1501 return 0;
1487} 1502}