aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig3
-rw-r--r--mm/Makefile2
-rw-r--r--mm/backing-dev.c18
-rw-r--r--mm/bootmem.c24
-rw-r--r--mm/highmem.c7
-rw-r--r--mm/memblock.c541
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/memory-failure.c33
-rw-r--r--mm/memory.c16
-rw-r--r--mm/mempolicy.c9
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/page-writeback.c38
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/page_cgroup.c7
-rw-r--r--mm/percpu.c121
-rw-r--r--mm/slab.c3
-rw-r--r--mm/slob.c14
-rw-r--r--mm/slub.c87
-rw-r--r--mm/vmalloc.c2
-rw-r--r--mm/vmscan.c10
20 files changed, 815 insertions, 138 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 527136b22384..f4e516e9c37c 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -128,6 +128,9 @@ config SPARSEMEM_VMEMMAP
128 pfn_to_page and page_to_pfn operations. This is the most 128 pfn_to_page and page_to_pfn operations. This is the most
129 efficient option when sufficient kernel resources are available. 129 efficient option when sufficient kernel resources are available.
130 130
131config HAVE_MEMBLOCK
132 boolean
133
131# eventually, we can have this option just 'select SPARSEMEM' 134# eventually, we can have this option just 'select SPARSEMEM'
132config MEMORY_HOTPLUG 135config MEMORY_HOTPLUG
133 bool "Allow for memory hot-add" 136 bool "Allow for memory hot-add"
diff --git a/mm/Makefile b/mm/Makefile
index 8982504bd03b..34b2546a9e37 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -15,6 +15,8 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
15 $(mmu-y) 15 $(mmu-y)
16obj-y += init-mm.o 16obj-y += init-mm.o
17 17
18obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
19
18obj-$(CONFIG_BOUNCE) += bounce.o 20obj-$(CONFIG_BOUNCE) += bounce.o
19obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o 21obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
20obj-$(CONFIG_HAS_DMA) += dmapool.o 22obj-$(CONFIG_HAS_DMA) += dmapool.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 660a87a22511..f9fd3dd3916b 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -104,15 +104,13 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
104 "b_more_io: %8lu\n" 104 "b_more_io: %8lu\n"
105 "bdi_list: %8u\n" 105 "bdi_list: %8u\n"
106 "state: %8lx\n" 106 "state: %8lx\n"
107 "wb_mask: %8lx\n" 107 "wb_list: %8u\n",
108 "wb_list: %8u\n"
109 "wb_cnt: %8u\n",
110 (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), 108 (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
111 (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), 109 (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
112 K(bdi_thresh), K(dirty_thresh), 110 K(bdi_thresh), K(dirty_thresh),
113 K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io, 111 K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io,
114 !list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask, 112 !list_empty(&bdi->bdi_list), bdi->state,
115 !list_empty(&bdi->wb_list), bdi->wb_cnt); 113 !list_empty(&bdi->wb_list));
116#undef K 114#undef K
117 115
118 return 0; 116 return 0;
@@ -340,14 +338,13 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
340static void bdi_flush_io(struct backing_dev_info *bdi) 338static void bdi_flush_io(struct backing_dev_info *bdi)
341{ 339{
342 struct writeback_control wbc = { 340 struct writeback_control wbc = {
343 .bdi = bdi,
344 .sync_mode = WB_SYNC_NONE, 341 .sync_mode = WB_SYNC_NONE,
345 .older_than_this = NULL, 342 .older_than_this = NULL,
346 .range_cyclic = 1, 343 .range_cyclic = 1,
347 .nr_to_write = 1024, 344 .nr_to_write = 1024,
348 }; 345 };
349 346
350 writeback_inodes_wbc(&wbc); 347 writeback_inodes_wb(&bdi->wb, &wbc);
351} 348}
352 349
353/* 350/*
@@ -668,19 +665,12 @@ int bdi_init(struct backing_dev_info *bdi)
668 bdi->max_ratio = 100; 665 bdi->max_ratio = 100;
669 bdi->max_prop_frac = PROP_FRAC_BASE; 666 bdi->max_prop_frac = PROP_FRAC_BASE;
670 spin_lock_init(&bdi->wb_lock); 667 spin_lock_init(&bdi->wb_lock);
671 INIT_RCU_HEAD(&bdi->rcu_head);
672 INIT_LIST_HEAD(&bdi->bdi_list); 668 INIT_LIST_HEAD(&bdi->bdi_list);
673 INIT_LIST_HEAD(&bdi->wb_list); 669 INIT_LIST_HEAD(&bdi->wb_list);
674 INIT_LIST_HEAD(&bdi->work_list); 670 INIT_LIST_HEAD(&bdi->work_list);
675 671
676 bdi_wb_init(&bdi->wb, bdi); 672 bdi_wb_init(&bdi->wb, bdi);
677 673
678 /*
679 * Just one thread support for now, hard code mask and count
680 */
681 bdi->wb_mask = 1;
682 bdi->wb_cnt = 1;
683
684 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { 674 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
685 err = percpu_counter_init(&bdi->bdi_stat[i], 0); 675 err = percpu_counter_init(&bdi->bdi_stat[i], 0);
686 if (err) 676 if (err)
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 58c66cc5056a..142c84a54993 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -833,15 +833,24 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
833void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, 833void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
834 unsigned long align, unsigned long goal) 834 unsigned long align, unsigned long goal)
835{ 835{
836 void *ptr;
837
836 if (WARN_ON_ONCE(slab_is_available())) 838 if (WARN_ON_ONCE(slab_is_available()))
837 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 839 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
838 840
839#ifdef CONFIG_NO_BOOTMEM 841#ifdef CONFIG_NO_BOOTMEM
840 return __alloc_memory_core_early(pgdat->node_id, size, align, 842 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
843 goal, -1ULL);
844 if (ptr)
845 return ptr;
846
847 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
841 goal, -1ULL); 848 goal, -1ULL);
842#else 849#else
843 return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); 850 ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
844#endif 851#endif
852
853 return ptr;
845} 854}
846 855
847void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, 856void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -977,14 +986,21 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
977void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, 986void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
978 unsigned long align, unsigned long goal) 987 unsigned long align, unsigned long goal)
979{ 988{
989 void *ptr;
990
980 if (WARN_ON_ONCE(slab_is_available())) 991 if (WARN_ON_ONCE(slab_is_available()))
981 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 992 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
982 993
983#ifdef CONFIG_NO_BOOTMEM 994#ifdef CONFIG_NO_BOOTMEM
984 return __alloc_memory_core_early(pgdat->node_id, size, align, 995 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
996 goal, ARCH_LOW_ADDRESS_LIMIT);
997 if (ptr)
998 return ptr;
999 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
985 goal, ARCH_LOW_ADDRESS_LIMIT); 1000 goal, ARCH_LOW_ADDRESS_LIMIT);
986#else 1001#else
987 return ___alloc_bootmem_node(pgdat->bdata, size, align, 1002 ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
988 goal, ARCH_LOW_ADDRESS_LIMIT); 1003 goal, ARCH_LOW_ADDRESS_LIMIT);
989#endif 1004#endif
1005 return ptr;
990} 1006}
diff --git a/mm/highmem.c b/mm/highmem.c
index 66baa20f78f5..7a0aa1be4993 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -26,6 +26,7 @@
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/hash.h> 27#include <linux/hash.h>
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29#include <linux/kgdb.h>
29#include <asm/tlbflush.h> 30#include <asm/tlbflush.h>
30 31
31/* 32/*
@@ -470,6 +471,12 @@ void debug_kmap_atomic(enum km_type type)
470 warn_count--; 471 warn_count--;
471 } 472 }
472 } 473 }
474#ifdef CONFIG_KGDB_KDB
475 if (unlikely(type == KM_KDB && atomic_read(&kgdb_active) == -1)) {
476 WARN_ON(1);
477 warn_count--;
478 }
479#endif /* CONFIG_KGDB_KDB */
473} 480}
474 481
475#endif 482#endif
diff --git a/mm/memblock.c b/mm/memblock.c
new file mode 100644
index 000000000000..3024eb30fc27
--- /dev/null
+++ b/mm/memblock.c
@@ -0,0 +1,541 @@
1/*
2 * Procedures for maintaining information about logical memory blocks.
3 *
4 * Peter Bergner, IBM Corp. June 2001.
5 * Copyright (C) 2001 Peter Bergner.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/kernel.h>
14#include <linux/init.h>
15#include <linux/bitops.h>
16#include <linux/memblock.h>
17
18#define MEMBLOCK_ALLOC_ANYWHERE 0
19
20struct memblock memblock;
21
22static int memblock_debug;
23
24static int __init early_memblock(char *p)
25{
26 if (p && strstr(p, "debug"))
27 memblock_debug = 1;
28 return 0;
29}
30early_param("memblock", early_memblock);
31
32static void memblock_dump(struct memblock_region *region, char *name)
33{
34 unsigned long long base, size;
35 int i;
36
37 pr_info(" %s.cnt = 0x%lx\n", name, region->cnt);
38
39 for (i = 0; i < region->cnt; i++) {
40 base = region->region[i].base;
41 size = region->region[i].size;
42
43 pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
44 name, i, base, base + size - 1, size);
45 }
46}
47
48void memblock_dump_all(void)
49{
50 if (!memblock_debug)
51 return;
52
53 pr_info("MEMBLOCK configuration:\n");
54 pr_info(" rmo_size = 0x%llx\n", (unsigned long long)memblock.rmo_size);
55 pr_info(" memory.size = 0x%llx\n", (unsigned long long)memblock.memory.size);
56
57 memblock_dump(&memblock.memory, "memory");
58 memblock_dump(&memblock.reserved, "reserved");
59}
60
61static unsigned long memblock_addrs_overlap(u64 base1, u64 size1, u64 base2,
62 u64 size2)
63{
64 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
65}
66
67static long memblock_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
68{
69 if (base2 == base1 + size1)
70 return 1;
71 else if (base1 == base2 + size2)
72 return -1;
73
74 return 0;
75}
76
77static long memblock_regions_adjacent(struct memblock_region *rgn,
78 unsigned long r1, unsigned long r2)
79{
80 u64 base1 = rgn->region[r1].base;
81 u64 size1 = rgn->region[r1].size;
82 u64 base2 = rgn->region[r2].base;
83 u64 size2 = rgn->region[r2].size;
84
85 return memblock_addrs_adjacent(base1, size1, base2, size2);
86}
87
88static void memblock_remove_region(struct memblock_region *rgn, unsigned long r)
89{
90 unsigned long i;
91
92 for (i = r; i < rgn->cnt - 1; i++) {
93 rgn->region[i].base = rgn->region[i + 1].base;
94 rgn->region[i].size = rgn->region[i + 1].size;
95 }
96 rgn->cnt--;
97}
98
99/* Assumption: base addr of region 1 < base addr of region 2 */
100static void memblock_coalesce_regions(struct memblock_region *rgn,
101 unsigned long r1, unsigned long r2)
102{
103 rgn->region[r1].size += rgn->region[r2].size;
104 memblock_remove_region(rgn, r2);
105}
106
107void __init memblock_init(void)
108{
109 /* Create a dummy zero size MEMBLOCK which will get coalesced away later.
110 * This simplifies the memblock_add() code below...
111 */
112 memblock.memory.region[0].base = 0;
113 memblock.memory.region[0].size = 0;
114 memblock.memory.cnt = 1;
115
116 /* Ditto. */
117 memblock.reserved.region[0].base = 0;
118 memblock.reserved.region[0].size = 0;
119 memblock.reserved.cnt = 1;
120}
121
122void __init memblock_analyze(void)
123{
124 int i;
125
126 memblock.memory.size = 0;
127
128 for (i = 0; i < memblock.memory.cnt; i++)
129 memblock.memory.size += memblock.memory.region[i].size;
130}
131
132static long memblock_add_region(struct memblock_region *rgn, u64 base, u64 size)
133{
134 unsigned long coalesced = 0;
135 long adjacent, i;
136
137 if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) {
138 rgn->region[0].base = base;
139 rgn->region[0].size = size;
140 return 0;
141 }
142
143 /* First try and coalesce this MEMBLOCK with another. */
144 for (i = 0; i < rgn->cnt; i++) {
145 u64 rgnbase = rgn->region[i].base;
146 u64 rgnsize = rgn->region[i].size;
147
148 if ((rgnbase == base) && (rgnsize == size))
149 /* Already have this region, so we're done */
150 return 0;
151
152 adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize);
153 if (adjacent > 0) {
154 rgn->region[i].base -= size;
155 rgn->region[i].size += size;
156 coalesced++;
157 break;
158 } else if (adjacent < 0) {
159 rgn->region[i].size += size;
160 coalesced++;
161 break;
162 }
163 }
164
165 if ((i < rgn->cnt - 1) && memblock_regions_adjacent(rgn, i, i+1)) {
166 memblock_coalesce_regions(rgn, i, i+1);
167 coalesced++;
168 }
169
170 if (coalesced)
171 return coalesced;
172 if (rgn->cnt >= MAX_MEMBLOCK_REGIONS)
173 return -1;
174
175 /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */
176 for (i = rgn->cnt - 1; i >= 0; i--) {
177 if (base < rgn->region[i].base) {
178 rgn->region[i+1].base = rgn->region[i].base;
179 rgn->region[i+1].size = rgn->region[i].size;
180 } else {
181 rgn->region[i+1].base = base;
182 rgn->region[i+1].size = size;
183 break;
184 }
185 }
186
187 if (base < rgn->region[0].base) {
188 rgn->region[0].base = base;
189 rgn->region[0].size = size;
190 }
191 rgn->cnt++;
192
193 return 0;
194}
195
196long memblock_add(u64 base, u64 size)
197{
198 struct memblock_region *_rgn = &memblock.memory;
199
200 /* On pSeries LPAR systems, the first MEMBLOCK is our RMO region. */
201 if (base == 0)
202 memblock.rmo_size = size;
203
204 return memblock_add_region(_rgn, base, size);
205
206}
207
208static long __memblock_remove(struct memblock_region *rgn, u64 base, u64 size)
209{
210 u64 rgnbegin, rgnend;
211 u64 end = base + size;
212 int i;
213
214 rgnbegin = rgnend = 0; /* supress gcc warnings */
215
216 /* Find the region where (base, size) belongs to */
217 for (i=0; i < rgn->cnt; i++) {
218 rgnbegin = rgn->region[i].base;
219 rgnend = rgnbegin + rgn->region[i].size;
220
221 if ((rgnbegin <= base) && (end <= rgnend))
222 break;
223 }
224
225 /* Didn't find the region */
226 if (i == rgn->cnt)
227 return -1;
228
229 /* Check to see if we are removing entire region */
230 if ((rgnbegin == base) && (rgnend == end)) {
231 memblock_remove_region(rgn, i);
232 return 0;
233 }
234
235 /* Check to see if region is matching at the front */
236 if (rgnbegin == base) {
237 rgn->region[i].base = end;
238 rgn->region[i].size -= size;
239 return 0;
240 }
241
242 /* Check to see if the region is matching at the end */
243 if (rgnend == end) {
244 rgn->region[i].size -= size;
245 return 0;
246 }
247
248 /*
249 * We need to split the entry - adjust the current one to the
250 * beginging of the hole and add the region after hole.
251 */
252 rgn->region[i].size = base - rgn->region[i].base;
253 return memblock_add_region(rgn, end, rgnend - end);
254}
255
256long memblock_remove(u64 base, u64 size)
257{
258 return __memblock_remove(&memblock.memory, base, size);
259}
260
261long __init memblock_free(u64 base, u64 size)
262{
263 return __memblock_remove(&memblock.reserved, base, size);
264}
265
266long __init memblock_reserve(u64 base, u64 size)
267{
268 struct memblock_region *_rgn = &memblock.reserved;
269
270 BUG_ON(0 == size);
271
272 return memblock_add_region(_rgn, base, size);
273}
274
275long memblock_overlaps_region(struct memblock_region *rgn, u64 base, u64 size)
276{
277 unsigned long i;
278
279 for (i = 0; i < rgn->cnt; i++) {
280 u64 rgnbase = rgn->region[i].base;
281 u64 rgnsize = rgn->region[i].size;
282 if (memblock_addrs_overlap(base, size, rgnbase, rgnsize))
283 break;
284 }
285
286 return (i < rgn->cnt) ? i : -1;
287}
288
289static u64 memblock_align_down(u64 addr, u64 size)
290{
291 return addr & ~(size - 1);
292}
293
294static u64 memblock_align_up(u64 addr, u64 size)
295{
296 return (addr + (size - 1)) & ~(size - 1);
297}
298
299static u64 __init memblock_alloc_nid_unreserved(u64 start, u64 end,
300 u64 size, u64 align)
301{
302 u64 base, res_base;
303 long j;
304
305 base = memblock_align_down((end - size), align);
306 while (start <= base) {
307 j = memblock_overlaps_region(&memblock.reserved, base, size);
308 if (j < 0) {
309 /* this area isn't reserved, take it */
310 if (memblock_add_region(&memblock.reserved, base, size) < 0)
311 base = ~(u64)0;
312 return base;
313 }
314 res_base = memblock.reserved.region[j].base;
315 if (res_base < size)
316 break;
317 base = memblock_align_down(res_base - size, align);
318 }
319
320 return ~(u64)0;
321}
322
323static u64 __init memblock_alloc_nid_region(struct memblock_property *mp,
324 u64 (*nid_range)(u64, u64, int *),
325 u64 size, u64 align, int nid)
326{
327 u64 start, end;
328
329 start = mp->base;
330 end = start + mp->size;
331
332 start = memblock_align_up(start, align);
333 while (start < end) {
334 u64 this_end;
335 int this_nid;
336
337 this_end = nid_range(start, end, &this_nid);
338 if (this_nid == nid) {
339 u64 ret = memblock_alloc_nid_unreserved(start, this_end,
340 size, align);
341 if (ret != ~(u64)0)
342 return ret;
343 }
344 start = this_end;
345 }
346
347 return ~(u64)0;
348}
349
350u64 __init memblock_alloc_nid(u64 size, u64 align, int nid,
351 u64 (*nid_range)(u64 start, u64 end, int *nid))
352{
353 struct memblock_region *mem = &memblock.memory;
354 int i;
355
356 BUG_ON(0 == size);
357
358 size = memblock_align_up(size, align);
359
360 for (i = 0; i < mem->cnt; i++) {
361 u64 ret = memblock_alloc_nid_region(&mem->region[i],
362 nid_range,
363 size, align, nid);
364 if (ret != ~(u64)0)
365 return ret;
366 }
367
368 return memblock_alloc(size, align);
369}
370
371u64 __init memblock_alloc(u64 size, u64 align)
372{
373 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE);
374}
375
376u64 __init memblock_alloc_base(u64 size, u64 align, u64 max_addr)
377{
378 u64 alloc;
379
380 alloc = __memblock_alloc_base(size, align, max_addr);
381
382 if (alloc == 0)
383 panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
384 (unsigned long long) size, (unsigned long long) max_addr);
385
386 return alloc;
387}
388
389u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr)
390{
391 long i, j;
392 u64 base = 0;
393 u64 res_base;
394
395 BUG_ON(0 == size);
396
397 size = memblock_align_up(size, align);
398
399 /* On some platforms, make sure we allocate lowmem */
400 /* Note that MEMBLOCK_REAL_LIMIT may be MEMBLOCK_ALLOC_ANYWHERE */
401 if (max_addr == MEMBLOCK_ALLOC_ANYWHERE)
402 max_addr = MEMBLOCK_REAL_LIMIT;
403
404 for (i = memblock.memory.cnt - 1; i >= 0; i--) {
405 u64 memblockbase = memblock.memory.region[i].base;
406 u64 memblocksize = memblock.memory.region[i].size;
407
408 if (memblocksize < size)
409 continue;
410 if (max_addr == MEMBLOCK_ALLOC_ANYWHERE)
411 base = memblock_align_down(memblockbase + memblocksize - size, align);
412 else if (memblockbase < max_addr) {
413 base = min(memblockbase + memblocksize, max_addr);
414 base = memblock_align_down(base - size, align);
415 } else
416 continue;
417
418 while (base && memblockbase <= base) {
419 j = memblock_overlaps_region(&memblock.reserved, base, size);
420 if (j < 0) {
421 /* this area isn't reserved, take it */
422 if (memblock_add_region(&memblock.reserved, base, size) < 0)
423 return 0;
424 return base;
425 }
426 res_base = memblock.reserved.region[j].base;
427 if (res_base < size)
428 break;
429 base = memblock_align_down(res_base - size, align);
430 }
431 }
432 return 0;
433}
434
435/* You must call memblock_analyze() before this. */
436u64 __init memblock_phys_mem_size(void)
437{
438 return memblock.memory.size;
439}
440
441u64 memblock_end_of_DRAM(void)
442{
443 int idx = memblock.memory.cnt - 1;
444
445 return (memblock.memory.region[idx].base + memblock.memory.region[idx].size);
446}
447
448/* You must call memblock_analyze() after this. */
449void __init memblock_enforce_memory_limit(u64 memory_limit)
450{
451 unsigned long i;
452 u64 limit;
453 struct memblock_property *p;
454
455 if (!memory_limit)
456 return;
457
458 /* Truncate the memblock regions to satisfy the memory limit. */
459 limit = memory_limit;
460 for (i = 0; i < memblock.memory.cnt; i++) {
461 if (limit > memblock.memory.region[i].size) {
462 limit -= memblock.memory.region[i].size;
463 continue;
464 }
465
466 memblock.memory.region[i].size = limit;
467 memblock.memory.cnt = i + 1;
468 break;
469 }
470
471 if (memblock.memory.region[0].size < memblock.rmo_size)
472 memblock.rmo_size = memblock.memory.region[0].size;
473
474 memory_limit = memblock_end_of_DRAM();
475
476 /* And truncate any reserves above the limit also. */
477 for (i = 0; i < memblock.reserved.cnt; i++) {
478 p = &memblock.reserved.region[i];
479
480 if (p->base > memory_limit)
481 p->size = 0;
482 else if ((p->base + p->size) > memory_limit)
483 p->size = memory_limit - p->base;
484
485 if (p->size == 0) {
486 memblock_remove_region(&memblock.reserved, i);
487 i--;
488 }
489 }
490}
491
492int __init memblock_is_reserved(u64 addr)
493{
494 int i;
495
496 for (i = 0; i < memblock.reserved.cnt; i++) {
497 u64 upper = memblock.reserved.region[i].base +
498 memblock.reserved.region[i].size - 1;
499 if ((addr >= memblock.reserved.region[i].base) && (addr <= upper))
500 return 1;
501 }
502 return 0;
503}
504
505int memblock_is_region_reserved(u64 base, u64 size)
506{
507 return memblock_overlaps_region(&memblock.reserved, base, size);
508}
509
510/*
511 * Given a <base, len>, find which memory regions belong to this range.
512 * Adjust the request and return a contiguous chunk.
513 */
514int memblock_find(struct memblock_property *res)
515{
516 int i;
517 u64 rstart, rend;
518
519 rstart = res->base;
520 rend = rstart + res->size - 1;
521
522 for (i = 0; i < memblock.memory.cnt; i++) {
523 u64 start = memblock.memory.region[i].base;
524 u64 end = start + memblock.memory.region[i].size - 1;
525
526 if (start > rend)
527 return -1;
528
529 if ((end >= rstart) && (start < rend)) {
530 /* adjust the request */
531 if (rstart < start)
532 rstart = start;
533 if (rend > end)
534 rend = end;
535 res->base = rstart;
536 res->size = rend - rstart + 1;
537 return 0;
538 }
539 }
540 return -1;
541}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c6ece0a57595..20a8193a7af8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1370,7 +1370,7 @@ static void memcg_wakeup_oom(struct mem_cgroup *mem)
1370 1370
1371static void memcg_oom_recover(struct mem_cgroup *mem) 1371static void memcg_oom_recover(struct mem_cgroup *mem)
1372{ 1372{
1373 if (mem->oom_kill_disable && atomic_read(&mem->oom_lock)) 1373 if (atomic_read(&mem->oom_lock))
1374 memcg_wakeup_oom(mem); 1374 memcg_wakeup_oom(mem);
1375} 1375}
1376 1376
@@ -3781,6 +3781,8 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
3781 return -EINVAL; 3781 return -EINVAL;
3782 } 3782 }
3783 mem->oom_kill_disable = val; 3783 mem->oom_kill_disable = val;
3784 if (!val)
3785 memcg_oom_recover(mem);
3784 cgroup_unlock(); 3786 cgroup_unlock();
3785 return 0; 3787 return 0;
3786} 3788}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 620b0b461593..6b44e52cacaa 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -45,6 +45,7 @@
45#include <linux/page-isolation.h> 45#include <linux/page-isolation.h>
46#include <linux/suspend.h> 46#include <linux/suspend.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include <linux/swapops.h>
48#include "internal.h" 49#include "internal.h"
49 50
50int sysctl_memory_failure_early_kill __read_mostly = 0; 51int sysctl_memory_failure_early_kill __read_mostly = 0;
@@ -1296,3 +1297,35 @@ done:
1296 /* keep elevated page count for bad page */ 1297 /* keep elevated page count for bad page */
1297 return ret; 1298 return ret;
1298} 1299}
1300
1301/*
1302 * The caller must hold current->mm->mmap_sem in read mode.
1303 */
1304int is_hwpoison_address(unsigned long addr)
1305{
1306 pgd_t *pgdp;
1307 pud_t pud, *pudp;
1308 pmd_t pmd, *pmdp;
1309 pte_t pte, *ptep;
1310 swp_entry_t entry;
1311
1312 pgdp = pgd_offset(current->mm, addr);
1313 if (!pgd_present(*pgdp))
1314 return 0;
1315 pudp = pud_offset(pgdp, addr);
1316 pud = *pudp;
1317 if (!pud_present(pud) || pud_large(pud))
1318 return 0;
1319 pmdp = pmd_offset(pudp, addr);
1320 pmd = *pmdp;
1321 if (!pmd_present(pmd) || pmd_large(pmd))
1322 return 0;
1323 ptep = pte_offset_map(pmdp, addr);
1324 pte = *ptep;
1325 pte_unmap(ptep);
1326 if (!is_swap_pte(pte))
1327 return 0;
1328 entry = pte_to_swp_entry(pte);
1329 return is_hwpoison_entry(entry);
1330}
1331EXPORT_SYMBOL_GPL(is_hwpoison_address);
diff --git a/mm/memory.c b/mm/memory.c
index 119b7ccdf39b..bde42c6d3633 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1394,10 +1394,20 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1394 return i ? : -EFAULT; 1394 return i ? : -EFAULT;
1395 } 1395 }
1396 if (pages) { 1396 if (pages) {
1397 struct page *page = vm_normal_page(gate_vma, start, *pte); 1397 struct page *page;
1398
1399 page = vm_normal_page(gate_vma, start, *pte);
1400 if (!page) {
1401 if (!(gup_flags & FOLL_DUMP) &&
1402 is_zero_pfn(pte_pfn(*pte)))
1403 page = pte_page(*pte);
1404 else {
1405 pte_unmap(pte);
1406 return i ? : -EFAULT;
1407 }
1408 }
1398 pages[i] = page; 1409 pages[i] = page;
1399 if (page) 1410 get_page(page);
1400 get_page(page);
1401 } 1411 }
1402 pte_unmap(pte); 1412 pte_unmap(pte);
1403 if (vmas) 1413 if (vmas)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 5d6fb339de03..5bc0a96beb51 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2094,7 +2094,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
2094 NODEMASK_SCRATCH(scratch); 2094 NODEMASK_SCRATCH(scratch);
2095 2095
2096 if (!scratch) 2096 if (!scratch)
2097 return; 2097 goto put_mpol;
2098 /* contextualize the tmpfs mount point mempolicy */ 2098 /* contextualize the tmpfs mount point mempolicy */
2099 new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); 2099 new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask);
2100 if (IS_ERR(new)) 2100 if (IS_ERR(new))
@@ -2103,19 +2103,20 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
2103 task_lock(current); 2103 task_lock(current);
2104 ret = mpol_set_nodemask(new, &mpol->w.user_nodemask, scratch); 2104 ret = mpol_set_nodemask(new, &mpol->w.user_nodemask, scratch);
2105 task_unlock(current); 2105 task_unlock(current);
2106 mpol_put(mpol); /* drop our ref on sb mpol */
2107 if (ret) 2106 if (ret)
2108 goto put_free; 2107 goto put_new;
2109 2108
2110 /* Create pseudo-vma that contains just the policy */ 2109 /* Create pseudo-vma that contains just the policy */
2111 memset(&pvma, 0, sizeof(struct vm_area_struct)); 2110 memset(&pvma, 0, sizeof(struct vm_area_struct));
2112 pvma.vm_end = TASK_SIZE; /* policy covers entire file */ 2111 pvma.vm_end = TASK_SIZE; /* policy covers entire file */
2113 mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ 2112 mpol_set_shared_policy(sp, &pvma, new); /* adds ref */
2114 2113
2115put_free: 2114put_new:
2116 mpol_put(new); /* drop initial ref */ 2115 mpol_put(new); /* drop initial ref */
2117free_scratch: 2116free_scratch:
2118 NODEMASK_SCRATCH_FREE(scratch); 2117 NODEMASK_SCRATCH_FREE(scratch);
2118put_mpol:
2119 mpol_put(mpol); /* drop our incoming ref on sb mpol */
2119 } 2120 }
2120} 2121}
2121 2122
diff --git a/mm/mmap.c b/mm/mmap.c
index 456ec6f27889..e38e910cb756 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1734,8 +1734,10 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1734 grow = (address - vma->vm_end) >> PAGE_SHIFT; 1734 grow = (address - vma->vm_end) >> PAGE_SHIFT;
1735 1735
1736 error = acct_stack_growth(vma, size, grow); 1736 error = acct_stack_growth(vma, size, grow);
1737 if (!error) 1737 if (!error) {
1738 vma->vm_end = address; 1738 vma->vm_end = address;
1739 perf_event_mmap(vma);
1740 }
1739 } 1741 }
1740 anon_vma_unlock(vma); 1742 anon_vma_unlock(vma);
1741 return error; 1743 return error;
@@ -1781,6 +1783,7 @@ static int expand_downwards(struct vm_area_struct *vma,
1781 if (!error) { 1783 if (!error) {
1782 vma->vm_start = address; 1784 vma->vm_start = address;
1783 vma->vm_pgoff -= grow; 1785 vma->vm_pgoff -= grow;
1786 perf_event_mmap(vma);
1784 } 1787 }
1785 } 1788 }
1786 anon_vma_unlock(vma); 1789 anon_vma_unlock(vma);
@@ -2208,6 +2211,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
2208 vma->vm_page_prot = vm_get_page_prot(flags); 2211 vma->vm_page_prot = vm_get_page_prot(flags);
2209 vma_link(mm, vma, prev, rb_link, rb_parent); 2212 vma_link(mm, vma, prev, rb_link, rb_parent);
2210out: 2213out:
2214 perf_event_mmap(vma);
2211 mm->total_vm += len >> PAGE_SHIFT; 2215 mm->total_vm += len >> PAGE_SHIFT;
2212 if (flags & VM_LOCKED) { 2216 if (flags & VM_LOCKED) {
2213 if (!mlock_vma_pages_range(vma, addr, addr + len)) 2217 if (!mlock_vma_pages_range(vma, addr, addr + len))
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 5fa63bdf52e4..37498ef61548 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -495,7 +495,6 @@ static void balance_dirty_pages(struct address_space *mapping,
495 495
496 for (;;) { 496 for (;;) {
497 struct writeback_control wbc = { 497 struct writeback_control wbc = {
498 .bdi = bdi,
499 .sync_mode = WB_SYNC_NONE, 498 .sync_mode = WB_SYNC_NONE,
500 .older_than_this = NULL, 499 .older_than_this = NULL,
501 .nr_to_write = write_chunk, 500 .nr_to_write = write_chunk,
@@ -537,7 +536,7 @@ static void balance_dirty_pages(struct address_space *mapping,
537 * up. 536 * up.
538 */ 537 */
539 if (bdi_nr_reclaimable > bdi_thresh) { 538 if (bdi_nr_reclaimable > bdi_thresh) {
540 writeback_inodes_wbc(&wbc); 539 writeback_inodes_wb(&bdi->wb, &wbc);
541 pages_written += write_chunk - wbc.nr_to_write; 540 pages_written += write_chunk - wbc.nr_to_write;
542 get_dirty_limits(&background_thresh, &dirty_thresh, 541 get_dirty_limits(&background_thresh, &dirty_thresh,
543 &bdi_thresh, bdi); 542 &bdi_thresh, bdi);
@@ -597,7 +596,7 @@ static void balance_dirty_pages(struct address_space *mapping,
597 (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) 596 (!laptop_mode && ((global_page_state(NR_FILE_DIRTY)
598 + global_page_state(NR_UNSTABLE_NFS)) 597 + global_page_state(NR_UNSTABLE_NFS))
599 > background_thresh))) 598 > background_thresh)))
600 bdi_start_writeback(bdi, NULL, 0); 599 bdi_start_background_writeback(bdi);
601} 600}
602 601
603void set_page_dirty_balance(struct page *page, int page_mkwrite) 602void set_page_dirty_balance(struct page *page, int page_mkwrite)
@@ -705,9 +704,8 @@ void laptop_mode_timer_fn(unsigned long data)
705 * We want to write everything out, not just down to the dirty 704 * We want to write everything out, not just down to the dirty
706 * threshold 705 * threshold
707 */ 706 */
708
709 if (bdi_has_dirty_io(&q->backing_dev_info)) 707 if (bdi_has_dirty_io(&q->backing_dev_info))
710 bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages); 708 bdi_start_writeback(&q->backing_dev_info, nr_pages);
711} 709}
712 710
713/* 711/*
@@ -835,7 +833,6 @@ int write_cache_pages(struct address_space *mapping,
835 pgoff_t done_index; 833 pgoff_t done_index;
836 int cycled; 834 int cycled;
837 int range_whole = 0; 835 int range_whole = 0;
838 long nr_to_write = wbc->nr_to_write;
839 836
840 pagevec_init(&pvec, 0); 837 pagevec_init(&pvec, 0);
841 if (wbc->range_cyclic) { 838 if (wbc->range_cyclic) {
@@ -852,7 +849,22 @@ int write_cache_pages(struct address_space *mapping,
852 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 849 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
853 range_whole = 1; 850 range_whole = 1;
854 cycled = 1; /* ignore range_cyclic tests */ 851 cycled = 1; /* ignore range_cyclic tests */
852
853 /*
854 * If this is a data integrity sync, cap the writeback to the
855 * current end of file. Any extension to the file that occurs
856 * after this is a new write and we don't need to write those
857 * pages out to fulfil our data integrity requirements. If we
858 * try to write them out, we can get stuck in this scan until
859 * the concurrent writer stops adding dirty pages and extending
860 * EOF.
861 */
862 if (wbc->sync_mode == WB_SYNC_ALL &&
863 wbc->range_end == LLONG_MAX) {
864 end = i_size_read(mapping->host) >> PAGE_CACHE_SHIFT;
865 }
855 } 866 }
867
856retry: 868retry:
857 done_index = index; 869 done_index = index;
858 while (!done && (index <= end)) { 870 while (!done && (index <= end)) {
@@ -935,11 +947,10 @@ continue_unlock:
935 done = 1; 947 done = 1;
936 break; 948 break;
937 } 949 }
938 } 950 }
939 951
940 if (nr_to_write > 0) { 952 if (wbc->nr_to_write > 0) {
941 nr_to_write--; 953 if (--wbc->nr_to_write == 0 &&
942 if (nr_to_write == 0 &&
943 wbc->sync_mode == WB_SYNC_NONE) { 954 wbc->sync_mode == WB_SYNC_NONE) {
944 /* 955 /*
945 * We stop writing back only if we are 956 * We stop writing back only if we are
@@ -970,11 +981,8 @@ continue_unlock:
970 end = writeback_index - 1; 981 end = writeback_index - 1;
971 goto retry; 982 goto retry;
972 } 983 }
973 if (!wbc->no_nrwrite_index_update) { 984 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
974 if (wbc->range_cyclic || (range_whole && nr_to_write > 0)) 985 mapping->writeback_index = done_index;
975 mapping->writeback_index = done_index;
976 wbc->nr_to_write = nr_to_write;
977 }
978 986
979 return ret; 987 return ret;
980} 988}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 431214b941ac..9bd339eb04c6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3634,6 +3634,9 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
3634 int i; 3634 int i;
3635 void *ptr; 3635 void *ptr;
3636 3636
3637 if (limit > get_max_mapped())
3638 limit = get_max_mapped();
3639
3637 /* need to go over early_node_map to find out good range for node */ 3640 /* need to go over early_node_map to find out good range for node */
3638 for_each_active_range_index_in_nid(i, nid) { 3641 for_each_active_range_index_in_nid(i, nid) {
3639 u64 addr; 3642 u64 addr;
@@ -3659,6 +3662,11 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
3659 ptr = phys_to_virt(addr); 3662 ptr = phys_to_virt(addr);
3660 memset(ptr, 0, size); 3663 memset(ptr, 0, size);
3661 reserve_early_without_check(addr, addr + size, "BOOTMEM"); 3664 reserve_early_without_check(addr, addr + size, "BOOTMEM");
3665 /*
3666 * The min_count is set to 0 so that bootmem allocated blocks
3667 * are never reported as leaks.
3668 */
3669 kmemleak_alloc(ptr, size, 0, 0);
3662 return ptr; 3670 return ptr;
3663 } 3671 }
3664 3672
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 6c0081441a32..5bffada7cde1 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -9,6 +9,7 @@
9#include <linux/vmalloc.h> 9#include <linux/vmalloc.h>
10#include <linux/cgroup.h> 10#include <linux/cgroup.h>
11#include <linux/swapops.h> 11#include <linux/swapops.h>
12#include <linux/kmemleak.h>
12 13
13static void __meminit 14static void __meminit
14__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) 15__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
@@ -126,6 +127,12 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
126 if (!base) 127 if (!base)
127 base = vmalloc(table_size); 128 base = vmalloc(table_size);
128 } 129 }
130 /*
131 * The value stored in section->page_cgroup is (base - pfn)
132 * and it does not point to the memory block allocated above,
133 * causing kmemleak false positives.
134 */
135 kmemleak_not_leak(base);
129 } else { 136 } else {
130 /* 137 /*
131 * We don't have to allocate page_cgroup again, but 138 * We don't have to allocate page_cgroup again, but
diff --git a/mm/percpu.c b/mm/percpu.c
index 39f7dfd59585..e61dc2cc5873 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -229,8 +229,8 @@ static int __maybe_unused pcpu_page_idx(unsigned int cpu, int page_idx)
229 return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx; 229 return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx;
230} 230}
231 231
232static unsigned long __maybe_unused pcpu_chunk_addr(struct pcpu_chunk *chunk, 232static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
233 unsigned int cpu, int page_idx) 233 unsigned int cpu, int page_idx)
234{ 234{
235 return (unsigned long)chunk->base_addr + pcpu_unit_offsets[cpu] + 235 return (unsigned long)chunk->base_addr + pcpu_unit_offsets[cpu] +
236 (page_idx << PAGE_SHIFT); 236 (page_idx << PAGE_SHIFT);
@@ -282,6 +282,9 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
282 */ 282 */
283static void *pcpu_mem_alloc(size_t size) 283static void *pcpu_mem_alloc(size_t size)
284{ 284{
285 if (WARN_ON_ONCE(!slab_is_available()))
286 return NULL;
287
285 if (size <= PAGE_SIZE) 288 if (size <= PAGE_SIZE)
286 return kzalloc(size, GFP_KERNEL); 289 return kzalloc(size, GFP_KERNEL);
287 else { 290 else {
@@ -392,13 +395,6 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
392 old_size = chunk->map_alloc * sizeof(chunk->map[0]); 395 old_size = chunk->map_alloc * sizeof(chunk->map[0]);
393 memcpy(new, chunk->map, old_size); 396 memcpy(new, chunk->map, old_size);
394 397
395 /*
396 * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is
397 * one of the first chunks and still using static map.
398 */
399 if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC)
400 old = chunk->map;
401
402 chunk->map_alloc = new_alloc; 398 chunk->map_alloc = new_alloc;
403 chunk->map = new; 399 chunk->map = new;
404 new = NULL; 400 new = NULL;
@@ -604,7 +600,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
604{ 600{
605 struct pcpu_chunk *chunk; 601 struct pcpu_chunk *chunk;
606 602
607 chunk = kzalloc(pcpu_chunk_struct_size, GFP_KERNEL); 603 chunk = pcpu_mem_alloc(pcpu_chunk_struct_size);
608 if (!chunk) 604 if (!chunk)
609 return NULL; 605 return NULL;
610 606
@@ -978,7 +974,32 @@ bool is_kernel_percpu_address(unsigned long addr)
978 */ 974 */
979phys_addr_t per_cpu_ptr_to_phys(void *addr) 975phys_addr_t per_cpu_ptr_to_phys(void *addr)
980{ 976{
981 if (pcpu_addr_in_first_chunk(addr)) { 977 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
978 bool in_first_chunk = false;
979 unsigned long first_start, first_end;
980 unsigned int cpu;
981
982 /*
983 * The following test on first_start/end isn't strictly
984 * necessary but will speed up lookups of addresses which
985 * aren't in the first chunk.
986 */
987 first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0);
988 first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu,
989 pcpu_unit_pages);
990 if ((unsigned long)addr >= first_start &&
991 (unsigned long)addr < first_end) {
992 for_each_possible_cpu(cpu) {
993 void *start = per_cpu_ptr(base, cpu);
994
995 if (addr >= start && addr < start + pcpu_unit_size) {
996 in_first_chunk = true;
997 break;
998 }
999 }
1000 }
1001
1002 if (in_first_chunk) {
982 if ((unsigned long)addr < VMALLOC_START || 1003 if ((unsigned long)addr < VMALLOC_START ||
983 (unsigned long)addr >= VMALLOC_END) 1004 (unsigned long)addr >= VMALLOC_END)
984 return __pa(addr); 1005 return __pa(addr);
@@ -988,20 +1009,6 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
988 return page_to_phys(pcpu_addr_to_page(addr)); 1009 return page_to_phys(pcpu_addr_to_page(addr));
989} 1010}
990 1011
991static inline size_t pcpu_calc_fc_sizes(size_t static_size,
992 size_t reserved_size,
993 ssize_t *dyn_sizep)
994{
995 size_t size_sum;
996
997 size_sum = PFN_ALIGN(static_size + reserved_size +
998 (*dyn_sizep >= 0 ? *dyn_sizep : 0));
999 if (*dyn_sizep != 0)
1000 *dyn_sizep = size_sum - static_size - reserved_size;
1001
1002 return size_sum;
1003}
1004
1005/** 1012/**
1006 * pcpu_alloc_alloc_info - allocate percpu allocation info 1013 * pcpu_alloc_alloc_info - allocate percpu allocation info
1007 * @nr_groups: the number of groups 1014 * @nr_groups: the number of groups
@@ -1060,7 +1067,7 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
1060/** 1067/**
1061 * pcpu_build_alloc_info - build alloc_info considering distances between CPUs 1068 * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
1062 * @reserved_size: the size of reserved percpu area in bytes 1069 * @reserved_size: the size of reserved percpu area in bytes
1063 * @dyn_size: free size for dynamic allocation in bytes, -1 for auto 1070 * @dyn_size: minimum free size for dynamic allocation in bytes
1064 * @atom_size: allocation atom size 1071 * @atom_size: allocation atom size
1065 * @cpu_distance_fn: callback to determine distance between cpus, optional 1072 * @cpu_distance_fn: callback to determine distance between cpus, optional
1066 * 1073 *
@@ -1078,15 +1085,15 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
1078 * On success, pointer to the new allocation_info is returned. On 1085 * On success, pointer to the new allocation_info is returned. On
1079 * failure, ERR_PTR value is returned. 1086 * failure, ERR_PTR value is returned.
1080 */ 1087 */
1081struct pcpu_alloc_info * __init pcpu_build_alloc_info( 1088static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1082 size_t reserved_size, ssize_t dyn_size, 1089 size_t reserved_size, size_t dyn_size,
1083 size_t atom_size, 1090 size_t atom_size,
1084 pcpu_fc_cpu_distance_fn_t cpu_distance_fn) 1091 pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
1085{ 1092{
1086 static int group_map[NR_CPUS] __initdata; 1093 static int group_map[NR_CPUS] __initdata;
1087 static int group_cnt[NR_CPUS] __initdata; 1094 static int group_cnt[NR_CPUS] __initdata;
1088 const size_t static_size = __per_cpu_end - __per_cpu_start; 1095 const size_t static_size = __per_cpu_end - __per_cpu_start;
1089 int group_cnt_max = 0, nr_groups = 1, nr_units = 0; 1096 int nr_groups = 1, nr_units = 0;
1090 size_t size_sum, min_unit_size, alloc_size; 1097 size_t size_sum, min_unit_size, alloc_size;
1091 int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ 1098 int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
1092 int last_allocs, group, unit; 1099 int last_allocs, group, unit;
@@ -1096,7 +1103,12 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1096 1103
1097 /* this function may be called multiple times */ 1104 /* this function may be called multiple times */
1098 memset(group_map, 0, sizeof(group_map)); 1105 memset(group_map, 0, sizeof(group_map));
1099 memset(group_cnt, 0, sizeof(group_map)); 1106 memset(group_cnt, 0, sizeof(group_cnt));
1107
1108 /* calculate size_sum and ensure dyn_size is enough for early alloc */
1109 size_sum = PFN_ALIGN(static_size + reserved_size +
1110 max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
1111 dyn_size = size_sum - static_size - reserved_size;
1100 1112
1101 /* 1113 /*
1102 * Determine min_unit_size, alloc_size and max_upa such that 1114 * Determine min_unit_size, alloc_size and max_upa such that
@@ -1104,7 +1116,6 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1104 * which can accomodate 4k aligned segments which are equal to 1116 * which can accomodate 4k aligned segments which are equal to
1105 * or larger than min_unit_size. 1117 * or larger than min_unit_size.
1106 */ 1118 */
1107 size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
1108 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); 1119 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
1109 1120
1110 alloc_size = roundup(min_unit_size, atom_size); 1121 alloc_size = roundup(min_unit_size, atom_size);
@@ -1130,7 +1141,6 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1130 } 1141 }
1131 group_map[cpu] = group; 1142 group_map[cpu] = group;
1132 group_cnt[group]++; 1143 group_cnt[group]++;
1133 group_cnt_max = max(group_cnt_max, group_cnt[group]);
1134 } 1144 }
1135 1145
1136 /* 1146 /*
@@ -1326,7 +1336,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1326 void *base_addr) 1336 void *base_addr)
1327{ 1337{
1328 static char cpus_buf[4096] __initdata; 1338 static char cpus_buf[4096] __initdata;
1329 static int smap[2], dmap[2]; 1339 static int smap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata;
1340 static int dmap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata;
1330 size_t dyn_size = ai->dyn_size; 1341 size_t dyn_size = ai->dyn_size;
1331 size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; 1342 size_t size_sum = ai->static_size + ai->reserved_size + dyn_size;
1332 struct pcpu_chunk *schunk, *dchunk = NULL; 1343 struct pcpu_chunk *schunk, *dchunk = NULL;
@@ -1349,14 +1360,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1349} while (0) 1360} while (0)
1350 1361
1351 /* sanity checks */ 1362 /* sanity checks */
1352 BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
1353 ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
1354 PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); 1363 PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
1355 PCPU_SETUP_BUG_ON(!ai->static_size); 1364 PCPU_SETUP_BUG_ON(!ai->static_size);
1356 PCPU_SETUP_BUG_ON(!base_addr); 1365 PCPU_SETUP_BUG_ON(!base_addr);
1357 PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); 1366 PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
1358 PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); 1367 PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
1359 PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); 1368 PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
1369 PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE);
1360 PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0); 1370 PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
1361 1371
1362 /* process group information and build config tables accordingly */ 1372 /* process group information and build config tables accordingly */
@@ -1508,7 +1518,7 @@ early_param("percpu_alloc", percpu_alloc_setup);
1508/** 1518/**
1509 * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem 1519 * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
1510 * @reserved_size: the size of reserved percpu area in bytes 1520 * @reserved_size: the size of reserved percpu area in bytes
1511 * @dyn_size: free size for dynamic allocation in bytes, -1 for auto 1521 * @dyn_size: minimum free size for dynamic allocation in bytes
1512 * @atom_size: allocation atom size 1522 * @atom_size: allocation atom size
1513 * @cpu_distance_fn: callback to determine distance between cpus, optional 1523 * @cpu_distance_fn: callback to determine distance between cpus, optional
1514 * @alloc_fn: function to allocate percpu page 1524 * @alloc_fn: function to allocate percpu page
@@ -1529,10 +1539,7 @@ early_param("percpu_alloc", percpu_alloc_setup);
1529 * vmalloc space is not orders of magnitude larger than distances 1539 * vmalloc space is not orders of magnitude larger than distances
1530 * between node memory addresses (ie. 32bit NUMA machines). 1540 * between node memory addresses (ie. 32bit NUMA machines).
1531 * 1541 *
1532 * When @dyn_size is positive, dynamic area might be larger than 1542 * @dyn_size specifies the minimum dynamic area size.
1533 * specified to fill page alignment. When @dyn_size is auto,
1534 * @dyn_size is just big enough to fill page alignment after static
1535 * and reserved areas.
1536 * 1543 *
1537 * If the needed size is smaller than the minimum or specified unit 1544 * If the needed size is smaller than the minimum or specified unit
1538 * size, the leftover is returned using @free_fn. 1545 * size, the leftover is returned using @free_fn.
@@ -1540,7 +1547,7 @@ early_param("percpu_alloc", percpu_alloc_setup);
1540 * RETURNS: 1547 * RETURNS:
1541 * 0 on success, -errno on failure. 1548 * 0 on success, -errno on failure.
1542 */ 1549 */
1543int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, 1550int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
1544 size_t atom_size, 1551 size_t atom_size,
1545 pcpu_fc_cpu_distance_fn_t cpu_distance_fn, 1552 pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
1546 pcpu_fc_alloc_fn_t alloc_fn, 1553 pcpu_fc_alloc_fn_t alloc_fn,
@@ -1671,7 +1678,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
1671 1678
1672 snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); 1679 snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
1673 1680
1674 ai = pcpu_build_alloc_info(reserved_size, -1, PAGE_SIZE, NULL); 1681 ai = pcpu_build_alloc_info(reserved_size, 0, PAGE_SIZE, NULL);
1675 if (IS_ERR(ai)) 1682 if (IS_ERR(ai))
1676 return PTR_ERR(ai); 1683 return PTR_ERR(ai);
1677 BUG_ON(ai->nr_groups != 1); 1684 BUG_ON(ai->nr_groups != 1);
@@ -1797,3 +1804,33 @@ void __init setup_per_cpu_areas(void)
1797 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; 1804 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
1798} 1805}
1799#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ 1806#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
1807
1808/*
1809 * First and reserved chunks are initialized with temporary allocation
1810 * map in initdata so that they can be used before slab is online.
1811 * This function is called after slab is brought up and replaces those
1812 * with properly allocated maps.
1813 */
1814void __init percpu_init_late(void)
1815{
1816 struct pcpu_chunk *target_chunks[] =
1817 { pcpu_first_chunk, pcpu_reserved_chunk, NULL };
1818 struct pcpu_chunk *chunk;
1819 unsigned long flags;
1820 int i;
1821
1822 for (i = 0; (chunk = target_chunks[i]); i++) {
1823 int *map;
1824 const size_t size = PERCPU_DYNAMIC_EARLY_SLOTS * sizeof(map[0]);
1825
1826 BUILD_BUG_ON(size > PAGE_SIZE);
1827
1828 map = pcpu_mem_alloc(size);
1829 BUG_ON(!map);
1830
1831 spin_lock_irqsave(&pcpu_lock, flags);
1832 memcpy(map, chunk->map, size);
1833 chunk->map = map;
1834 spin_unlock_irqrestore(&pcpu_lock, flags);
1835 }
1836}
diff --git a/mm/slab.c b/mm/slab.c
index e49f8f46f46d..736e497733d6 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,7 +102,6 @@
102#include <linux/cpu.h> 102#include <linux/cpu.h>
103#include <linux/sysctl.h> 103#include <linux/sysctl.h>
104#include <linux/module.h> 104#include <linux/module.h>
105#include <linux/kmemtrace.h>
106#include <linux/rcupdate.h> 105#include <linux/rcupdate.h>
107#include <linux/string.h> 106#include <linux/string.h>
108#include <linux/uaccess.h> 107#include <linux/uaccess.h>
@@ -861,7 +860,7 @@ static void __cpuinit start_cpu_timer(int cpu)
861 */ 860 */
862 if (keventd_up() && reap_work->work.func == NULL) { 861 if (keventd_up() && reap_work->work.func == NULL) {
863 init_reap_node(cpu); 862 init_reap_node(cpu);
864 INIT_DELAYED_WORK(reap_work, cache_reap); 863 INIT_DELAYED_WORK_DEFERRABLE(reap_work, cache_reap);
865 schedule_delayed_work_on(cpu, reap_work, 864 schedule_delayed_work_on(cpu, reap_work,
866 __round_jiffies_relative(HZ, cpu)); 865 __round_jiffies_relative(HZ, cpu));
867 } 866 }
diff --git a/mm/slob.c b/mm/slob.c
index 23631e2bb57a..d582171c8101 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -66,8 +66,10 @@
66#include <linux/module.h> 66#include <linux/module.h>
67#include <linux/rcupdate.h> 67#include <linux/rcupdate.h>
68#include <linux/list.h> 68#include <linux/list.h>
69#include <linux/kmemtrace.h>
70#include <linux/kmemleak.h> 69#include <linux/kmemleak.h>
70
71#include <trace/events/kmem.h>
72
71#include <asm/atomic.h> 73#include <asm/atomic.h>
72 74
73/* 75/*
@@ -394,6 +396,7 @@ static void slob_free(void *block, int size)
394 slob_t *prev, *next, *b = (slob_t *)block; 396 slob_t *prev, *next, *b = (slob_t *)block;
395 slobidx_t units; 397 slobidx_t units;
396 unsigned long flags; 398 unsigned long flags;
399 struct list_head *slob_list;
397 400
398 if (unlikely(ZERO_OR_NULL_PTR(block))) 401 if (unlikely(ZERO_OR_NULL_PTR(block)))
399 return; 402 return;
@@ -422,7 +425,13 @@ static void slob_free(void *block, int size)
422 set_slob(b, units, 425 set_slob(b, units,
423 (void *)((unsigned long)(b + 426 (void *)((unsigned long)(b +
424 SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK)); 427 SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
425 set_slob_page_free(sp, &free_slob_small); 428 if (size < SLOB_BREAK1)
429 slob_list = &free_slob_small;
430 else if (size < SLOB_BREAK2)
431 slob_list = &free_slob_medium;
432 else
433 slob_list = &free_slob_large;
434 set_slob_page_free(sp, slob_list);
426 goto out; 435 goto out;
427 } 436 }
428 437
@@ -639,7 +648,6 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
639 if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) { 648 if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) {
640 struct slob_rcu *slob_rcu; 649 struct slob_rcu *slob_rcu;
641 slob_rcu = b + (c->size - sizeof(struct slob_rcu)); 650 slob_rcu = b + (c->size - sizeof(struct slob_rcu));
642 INIT_RCU_HEAD(&slob_rcu->head);
643 slob_rcu->size = c->size; 651 slob_rcu->size = c->size;
644 call_rcu(&slob_rcu->head, kmem_rcu_free); 652 call_rcu(&slob_rcu->head, kmem_rcu_free);
645 } else { 653 } else {
diff --git a/mm/slub.c b/mm/slub.c
index 578f68f3c51f..13fffe1f0f3d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -17,7 +17,6 @@
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/proc_fs.h> 18#include <linux/proc_fs.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/kmemtrace.h>
21#include <linux/kmemcheck.h> 20#include <linux/kmemcheck.h>
22#include <linux/cpu.h> 21#include <linux/cpu.h>
23#include <linux/cpuset.h> 22#include <linux/cpuset.h>
@@ -107,11 +106,17 @@
107 * the fast path and disables lockless freelists. 106 * the fast path and disables lockless freelists.
108 */ 107 */
109 108
109#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
110 SLAB_TRACE | SLAB_DEBUG_FREE)
111
112static inline int kmem_cache_debug(struct kmem_cache *s)
113{
110#ifdef CONFIG_SLUB_DEBUG 114#ifdef CONFIG_SLUB_DEBUG
111#define SLABDEBUG 1 115 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
112#else 116#else
113#define SLABDEBUG 0 117 return 0;
114#endif 118#endif
119}
115 120
116/* 121/*
117 * Issues still to be resolved: 122 * Issues still to be resolved:
@@ -162,8 +167,8 @@
162#define MAX_OBJS_PER_PAGE 65535 /* since page.objects is u16 */ 167#define MAX_OBJS_PER_PAGE 65535 /* since page.objects is u16 */
163 168
164/* Internal SLUB flags */ 169/* Internal SLUB flags */
165#define __OBJECT_POISON 0x80000000 /* Poison object */ 170#define __OBJECT_POISON 0x80000000UL /* Poison object */
166#define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ 171#define __SYSFS_ADD_DEFERRED 0x40000000UL /* Not yet visible via sysfs */
167 172
168static int kmem_size = sizeof(struct kmem_cache); 173static int kmem_size = sizeof(struct kmem_cache);
169 174
@@ -1073,7 +1078,7 @@ static inline struct page *alloc_slab_page(gfp_t flags, int node,
1073 1078
1074 flags |= __GFP_NOTRACK; 1079 flags |= __GFP_NOTRACK;
1075 1080
1076 if (node == -1) 1081 if (node == NUMA_NO_NODE)
1077 return alloc_pages(flags, order); 1082 return alloc_pages(flags, order);
1078 else 1083 else
1079 return alloc_pages_exact_node(node, flags, order); 1084 return alloc_pages_exact_node(node, flags, order);
@@ -1157,9 +1162,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1157 inc_slabs_node(s, page_to_nid(page), page->objects); 1162 inc_slabs_node(s, page_to_nid(page), page->objects);
1158 page->slab = s; 1163 page->slab = s;
1159 page->flags |= 1 << PG_slab; 1164 page->flags |= 1 << PG_slab;
1160 if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
1161 SLAB_STORE_USER | SLAB_TRACE))
1162 __SetPageSlubDebug(page);
1163 1165
1164 start = page_address(page); 1166 start = page_address(page);
1165 1167
@@ -1186,14 +1188,13 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1186 int order = compound_order(page); 1188 int order = compound_order(page);
1187 int pages = 1 << order; 1189 int pages = 1 << order;
1188 1190
1189 if (unlikely(SLABDEBUG && PageSlubDebug(page))) { 1191 if (kmem_cache_debug(s)) {
1190 void *p; 1192 void *p;
1191 1193
1192 slab_pad_check(s, page); 1194 slab_pad_check(s, page);
1193 for_each_object(p, s, page_address(page), 1195 for_each_object(p, s, page_address(page),
1194 page->objects) 1196 page->objects)
1195 check_object(s, page, p, 0); 1197 check_object(s, page, p, 0);
1196 __ClearPageSlubDebug(page);
1197 } 1198 }
1198 1199
1199 kmemcheck_free_shadow(page, compound_order(page)); 1200 kmemcheck_free_shadow(page, compound_order(page));
@@ -1387,10 +1388,10 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1387static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) 1388static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
1388{ 1389{
1389 struct page *page; 1390 struct page *page;
1390 int searchnode = (node == -1) ? numa_node_id() : node; 1391 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
1391 1392
1392 page = get_partial_node(get_node(s, searchnode)); 1393 page = get_partial_node(get_node(s, searchnode));
1393 if (page || (flags & __GFP_THISNODE)) 1394 if (page || node != -1)
1394 return page; 1395 return page;
1395 1396
1396 return get_any_partial(s, flags); 1397 return get_any_partial(s, flags);
@@ -1415,8 +1416,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1415 stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); 1416 stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
1416 } else { 1417 } else {
1417 stat(s, DEACTIVATE_FULL); 1418 stat(s, DEACTIVATE_FULL);
1418 if (SLABDEBUG && PageSlubDebug(page) && 1419 if (kmem_cache_debug(s) && (s->flags & SLAB_STORE_USER))
1419 (s->flags & SLAB_STORE_USER))
1420 add_full(n, page); 1420 add_full(n, page);
1421 } 1421 }
1422 slab_unlock(page); 1422 slab_unlock(page);
@@ -1515,7 +1515,7 @@ static void flush_all(struct kmem_cache *s)
1515static inline int node_match(struct kmem_cache_cpu *c, int node) 1515static inline int node_match(struct kmem_cache_cpu *c, int node)
1516{ 1516{
1517#ifdef CONFIG_NUMA 1517#ifdef CONFIG_NUMA
1518 if (node != -1 && c->node != node) 1518 if (node != NUMA_NO_NODE && c->node != node)
1519 return 0; 1519 return 0;
1520#endif 1520#endif
1521 return 1; 1521 return 1;
@@ -1624,7 +1624,7 @@ load_freelist:
1624 object = c->page->freelist; 1624 object = c->page->freelist;
1625 if (unlikely(!object)) 1625 if (unlikely(!object))
1626 goto another_slab; 1626 goto another_slab;
1627 if (unlikely(SLABDEBUG && PageSlubDebug(c->page))) 1627 if (kmem_cache_debug(s))
1628 goto debug; 1628 goto debug;
1629 1629
1630 c->freelist = get_freepointer(s, object); 1630 c->freelist = get_freepointer(s, object);
@@ -1727,7 +1727,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1727 1727
1728void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) 1728void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
1729{ 1729{
1730 void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_); 1730 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
1731 1731
1732 trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags); 1732 trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
1733 1733
@@ -1738,7 +1738,7 @@ EXPORT_SYMBOL(kmem_cache_alloc);
1738#ifdef CONFIG_TRACING 1738#ifdef CONFIG_TRACING
1739void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags) 1739void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
1740{ 1740{
1741 return slab_alloc(s, gfpflags, -1, _RET_IP_); 1741 return slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
1742} 1742}
1743EXPORT_SYMBOL(kmem_cache_alloc_notrace); 1743EXPORT_SYMBOL(kmem_cache_alloc_notrace);
1744#endif 1744#endif
@@ -1783,7 +1783,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
1783 stat(s, FREE_SLOWPATH); 1783 stat(s, FREE_SLOWPATH);
1784 slab_lock(page); 1784 slab_lock(page);
1785 1785
1786 if (unlikely(SLABDEBUG && PageSlubDebug(page))) 1786 if (kmem_cache_debug(s))
1787 goto debug; 1787 goto debug;
1788 1788
1789checks_ok: 1789checks_ok:
@@ -2490,7 +2490,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
2490 s->refcount--; 2490 s->refcount--;
2491 if (!s->refcount) { 2491 if (!s->refcount) {
2492 list_del(&s->list); 2492 list_del(&s->list);
2493 up_write(&slub_lock);
2494 if (kmem_cache_close(s)) { 2493 if (kmem_cache_close(s)) {
2495 printk(KERN_ERR "SLUB %s: %s called for cache that " 2494 printk(KERN_ERR "SLUB %s: %s called for cache that "
2496 "still has objects.\n", s->name, __func__); 2495 "still has objects.\n", s->name, __func__);
@@ -2499,8 +2498,8 @@ void kmem_cache_destroy(struct kmem_cache *s)
2499 if (s->flags & SLAB_DESTROY_BY_RCU) 2498 if (s->flags & SLAB_DESTROY_BY_RCU)
2500 rcu_barrier(); 2499 rcu_barrier();
2501 sysfs_slab_remove(s); 2500 sysfs_slab_remove(s);
2502 } else 2501 }
2503 up_write(&slub_lock); 2502 up_write(&slub_lock);
2504} 2503}
2505EXPORT_SYMBOL(kmem_cache_destroy); 2504EXPORT_SYMBOL(kmem_cache_destroy);
2506 2505
@@ -2728,7 +2727,7 @@ void *__kmalloc(size_t size, gfp_t flags)
2728 if (unlikely(ZERO_OR_NULL_PTR(s))) 2727 if (unlikely(ZERO_OR_NULL_PTR(s)))
2729 return s; 2728 return s;
2730 2729
2731 ret = slab_alloc(s, flags, -1, _RET_IP_); 2730 ret = slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_);
2732 2731
2733 trace_kmalloc(_RET_IP_, ret, size, s->size, flags); 2732 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
2734 2733
@@ -3118,9 +3117,12 @@ void __init kmem_cache_init(void)
3118 slab_state = UP; 3117 slab_state = UP;
3119 3118
3120 /* Provide the correct kmalloc names now that the caches are up */ 3119 /* Provide the correct kmalloc names now that the caches are up */
3121 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) 3120 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3122 kmalloc_caches[i]. name = 3121 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
3123 kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); 3122
3123 BUG_ON(!s);
3124 kmalloc_caches[i].name = s;
3125 }
3124 3126
3125#ifdef CONFIG_SMP 3127#ifdef CONFIG_SMP
3126 register_cpu_notifier(&slab_notifier); 3128 register_cpu_notifier(&slab_notifier);
@@ -3223,14 +3225,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3223 */ 3225 */
3224 s->objsize = max(s->objsize, (int)size); 3226 s->objsize = max(s->objsize, (int)size);
3225 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 3227 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3226 up_write(&slub_lock);
3227 3228
3228 if (sysfs_slab_alias(s, name)) { 3229 if (sysfs_slab_alias(s, name)) {
3229 down_write(&slub_lock);
3230 s->refcount--; 3230 s->refcount--;
3231 up_write(&slub_lock);
3232 goto err; 3231 goto err;
3233 } 3232 }
3233 up_write(&slub_lock);
3234 return s; 3234 return s;
3235 } 3235 }
3236 3236
@@ -3239,14 +3239,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3239 if (kmem_cache_open(s, GFP_KERNEL, name, 3239 if (kmem_cache_open(s, GFP_KERNEL, name,
3240 size, align, flags, ctor)) { 3240 size, align, flags, ctor)) {
3241 list_add(&s->list, &slab_caches); 3241 list_add(&s->list, &slab_caches);
3242 up_write(&slub_lock);
3243 if (sysfs_slab_add(s)) { 3242 if (sysfs_slab_add(s)) {
3244 down_write(&slub_lock);
3245 list_del(&s->list); 3243 list_del(&s->list);
3246 up_write(&slub_lock);
3247 kfree(s); 3244 kfree(s);
3248 goto err; 3245 goto err;
3249 } 3246 }
3247 up_write(&slub_lock);
3250 return s; 3248 return s;
3251 } 3249 }
3252 kfree(s); 3250 kfree(s);
@@ -3312,7 +3310,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3312 if (unlikely(ZERO_OR_NULL_PTR(s))) 3310 if (unlikely(ZERO_OR_NULL_PTR(s)))
3313 return s; 3311 return s;
3314 3312
3315 ret = slab_alloc(s, gfpflags, -1, caller); 3313 ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller);
3316 3314
3317 /* Honor the call site pointer we recieved. */ 3315 /* Honor the call site pointer we recieved. */
3318 trace_kmalloc(caller, ret, size, s->size, gfpflags); 3316 trace_kmalloc(caller, ret, size, s->size, gfpflags);
@@ -3395,16 +3393,6 @@ static void validate_slab_slab(struct kmem_cache *s, struct page *page,
3395 } else 3393 } else
3396 printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n", 3394 printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n",
3397 s->name, page); 3395 s->name, page);
3398
3399 if (s->flags & DEBUG_DEFAULT_FLAGS) {
3400 if (!PageSlubDebug(page))
3401 printk(KERN_ERR "SLUB %s: SlubDebug not set "
3402 "on slab 0x%p\n", s->name, page);
3403 } else {
3404 if (PageSlubDebug(page))
3405 printk(KERN_ERR "SLUB %s: SlubDebug set on "
3406 "slab 0x%p\n", s->name, page);
3407 }
3408} 3396}
3409 3397
3410static int validate_slab_node(struct kmem_cache *s, 3398static int validate_slab_node(struct kmem_cache *s,
@@ -4504,6 +4492,13 @@ static int sysfs_slab_add(struct kmem_cache *s)
4504 4492
4505static void sysfs_slab_remove(struct kmem_cache *s) 4493static void sysfs_slab_remove(struct kmem_cache *s)
4506{ 4494{
4495 if (slab_state < SYSFS)
4496 /*
4497 * Sysfs has not been setup yet so no need to remove the
4498 * cache from sysfs.
4499 */
4500 return;
4501
4507 kobject_uevent(&s->kobj, KOBJ_REMOVE); 4502 kobject_uevent(&s->kobj, KOBJ_REMOVE);
4508 kobject_del(&s->kobj); 4503 kobject_del(&s->kobj);
4509 kobject_put(&s->kobj); 4504 kobject_put(&s->kobj);
@@ -4549,8 +4544,11 @@ static int __init slab_sysfs_init(void)
4549 struct kmem_cache *s; 4544 struct kmem_cache *s;
4550 int err; 4545 int err;
4551 4546
4547 down_write(&slub_lock);
4548
4552 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); 4549 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
4553 if (!slab_kset) { 4550 if (!slab_kset) {
4551 up_write(&slub_lock);
4554 printk(KERN_ERR "Cannot register slab subsystem.\n"); 4552 printk(KERN_ERR "Cannot register slab subsystem.\n");
4555 return -ENOSYS; 4553 return -ENOSYS;
4556 } 4554 }
@@ -4575,6 +4573,7 @@ static int __init slab_sysfs_init(void)
4575 kfree(al); 4573 kfree(al);
4576 } 4574 }
4577 4575
4576 up_write(&slub_lock);
4578 resiliency_test(); 4577 resiliency_test();
4579 return 0; 4578 return 0;
4580} 4579}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ae007462b7f6..b7e314b1009f 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2403,7 +2403,7 @@ static int s_show(struct seq_file *m, void *p)
2403 seq_printf(m, " pages=%d", v->nr_pages); 2403 seq_printf(m, " pages=%d", v->nr_pages);
2404 2404
2405 if (v->phys_addr) 2405 if (v->phys_addr)
2406 seq_printf(m, " phys=%lx", v->phys_addr); 2406 seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
2407 2407
2408 if (v->flags & VM_IOREMAP) 2408 if (v->flags & VM_IOREMAP)
2409 seq_printf(m, " ioremap"); 2409 seq_printf(m, " ioremap");
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9c7e57cc63a3..b94fe1b3da43 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -213,8 +213,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
213 list_for_each_entry(shrinker, &shrinker_list, list) { 213 list_for_each_entry(shrinker, &shrinker_list, list) {
214 unsigned long long delta; 214 unsigned long long delta;
215 unsigned long total_scan; 215 unsigned long total_scan;
216 unsigned long max_pass = (*shrinker->shrink)(0, gfp_mask); 216 unsigned long max_pass;
217 217
218 max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
218 delta = (4 * scanned) / shrinker->seeks; 219 delta = (4 * scanned) / shrinker->seeks;
219 delta *= max_pass; 220 delta *= max_pass;
220 do_div(delta, lru_pages + 1); 221 do_div(delta, lru_pages + 1);
@@ -242,8 +243,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
242 int shrink_ret; 243 int shrink_ret;
243 int nr_before; 244 int nr_before;
244 245
245 nr_before = (*shrinker->shrink)(0, gfp_mask); 246 nr_before = (*shrinker->shrink)(shrinker, 0, gfp_mask);
246 shrink_ret = (*shrinker->shrink)(this_scan, gfp_mask); 247 shrink_ret = (*shrinker->shrink)(shrinker, this_scan,
248 gfp_mask);
247 if (shrink_ret == -1) 249 if (shrink_ret == -1)
248 break; 250 break;
249 if (shrink_ret < nr_before) 251 if (shrink_ret < nr_before)
@@ -296,7 +298,7 @@ static int may_write_to_queue(struct backing_dev_info *bdi)
296static void handle_write_error(struct address_space *mapping, 298static void handle_write_error(struct address_space *mapping,
297 struct page *page, int error) 299 struct page *page, int error)
298{ 300{
299 lock_page(page); 301 lock_page_nosync(page);
300 if (page_mapping(page) == mapping) 302 if (page_mapping(page) == mapping)
301 mapping_set_error(mapping, error); 303 mapping_set_error(mapping, error);
302 unlock_page(page); 304 unlock_page(page);