aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig3
-rw-r--r--mm/Makefile2
-rw-r--r--mm/backing-dev.c17
-rw-r--r--mm/bootmem.c24
-rw-r--r--mm/memblock.c541
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/mempolicy.c9
-rw-r--r--mm/page-writeback.c8
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/page_cgroup.c7
-rw-r--r--mm/percpu.c36
-rw-r--r--mm/vmscan.c10
12 files changed, 632 insertions, 37 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 527136b22384..f4e516e9c37c 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -128,6 +128,9 @@ config SPARSEMEM_VMEMMAP
128 pfn_to_page and page_to_pfn operations. This is the most 128 pfn_to_page and page_to_pfn operations. This is the most
129 efficient option when sufficient kernel resources are available. 129 efficient option when sufficient kernel resources are available.
130 130
131config HAVE_MEMBLOCK
132 boolean
133
131# eventually, we can have this option just 'select SPARSEMEM' 134# eventually, we can have this option just 'select SPARSEMEM'
132config MEMORY_HOTPLUG 135config MEMORY_HOTPLUG
133 bool "Allow for memory hot-add" 136 bool "Allow for memory hot-add"
diff --git a/mm/Makefile b/mm/Makefile
index 8982504bd03b..34b2546a9e37 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -15,6 +15,8 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
15 $(mmu-y) 15 $(mmu-y)
16obj-y += init-mm.o 16obj-y += init-mm.o
17 17
18obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
19
18obj-$(CONFIG_BOUNCE) += bounce.o 20obj-$(CONFIG_BOUNCE) += bounce.o
19obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o 21obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
20obj-$(CONFIG_HAS_DMA) += dmapool.o 22obj-$(CONFIG_HAS_DMA) += dmapool.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 660a87a22511..123bcef13e51 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -104,15 +104,13 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
104 "b_more_io: %8lu\n" 104 "b_more_io: %8lu\n"
105 "bdi_list: %8u\n" 105 "bdi_list: %8u\n"
106 "state: %8lx\n" 106 "state: %8lx\n"
107 "wb_mask: %8lx\n" 107 "wb_list: %8u\n",
108 "wb_list: %8u\n"
109 "wb_cnt: %8u\n",
110 (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), 108 (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
111 (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), 109 (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
112 K(bdi_thresh), K(dirty_thresh), 110 K(bdi_thresh), K(dirty_thresh),
113 K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io, 111 K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io,
114 !list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask, 112 !list_empty(&bdi->bdi_list), bdi->state,
115 !list_empty(&bdi->wb_list), bdi->wb_cnt); 113 !list_empty(&bdi->wb_list));
116#undef K 114#undef K
117 115
118 return 0; 116 return 0;
@@ -340,14 +338,13 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
340static void bdi_flush_io(struct backing_dev_info *bdi) 338static void bdi_flush_io(struct backing_dev_info *bdi)
341{ 339{
342 struct writeback_control wbc = { 340 struct writeback_control wbc = {
343 .bdi = bdi,
344 .sync_mode = WB_SYNC_NONE, 341 .sync_mode = WB_SYNC_NONE,
345 .older_than_this = NULL, 342 .older_than_this = NULL,
346 .range_cyclic = 1, 343 .range_cyclic = 1,
347 .nr_to_write = 1024, 344 .nr_to_write = 1024,
348 }; 345 };
349 346
350 writeback_inodes_wbc(&wbc); 347 writeback_inodes_wb(&bdi->wb, &wbc);
351} 348}
352 349
353/* 350/*
@@ -675,12 +672,6 @@ int bdi_init(struct backing_dev_info *bdi)
675 672
676 bdi_wb_init(&bdi->wb, bdi); 673 bdi_wb_init(&bdi->wb, bdi);
677 674
678 /*
679 * Just one thread support for now, hard code mask and count
680 */
681 bdi->wb_mask = 1;
682 bdi->wb_cnt = 1;
683
684 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { 675 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
685 err = percpu_counter_init(&bdi->bdi_stat[i], 0); 676 err = percpu_counter_init(&bdi->bdi_stat[i], 0);
686 if (err) 677 if (err)
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 58c66cc5056a..142c84a54993 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -833,15 +833,24 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
833void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, 833void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
834 unsigned long align, unsigned long goal) 834 unsigned long align, unsigned long goal)
835{ 835{
836 void *ptr;
837
836 if (WARN_ON_ONCE(slab_is_available())) 838 if (WARN_ON_ONCE(slab_is_available()))
837 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 839 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
838 840
839#ifdef CONFIG_NO_BOOTMEM 841#ifdef CONFIG_NO_BOOTMEM
840 return __alloc_memory_core_early(pgdat->node_id, size, align, 842 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
843 goal, -1ULL);
844 if (ptr)
845 return ptr;
846
847 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
841 goal, -1ULL); 848 goal, -1ULL);
842#else 849#else
843 return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); 850 ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
844#endif 851#endif
852
853 return ptr;
845} 854}
846 855
847void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, 856void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -977,14 +986,21 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
977void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, 986void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
978 unsigned long align, unsigned long goal) 987 unsigned long align, unsigned long goal)
979{ 988{
989 void *ptr;
990
980 if (WARN_ON_ONCE(slab_is_available())) 991 if (WARN_ON_ONCE(slab_is_available()))
981 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 992 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
982 993
983#ifdef CONFIG_NO_BOOTMEM 994#ifdef CONFIG_NO_BOOTMEM
984 return __alloc_memory_core_early(pgdat->node_id, size, align, 995 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
996 goal, ARCH_LOW_ADDRESS_LIMIT);
997 if (ptr)
998 return ptr;
999 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
985 goal, ARCH_LOW_ADDRESS_LIMIT); 1000 goal, ARCH_LOW_ADDRESS_LIMIT);
986#else 1001#else
987 return ___alloc_bootmem_node(pgdat->bdata, size, align, 1002 ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
988 goal, ARCH_LOW_ADDRESS_LIMIT); 1003 goal, ARCH_LOW_ADDRESS_LIMIT);
989#endif 1004#endif
1005 return ptr;
990} 1006}
diff --git a/mm/memblock.c b/mm/memblock.c
new file mode 100644
index 000000000000..3024eb30fc27
--- /dev/null
+++ b/mm/memblock.c
@@ -0,0 +1,541 @@
1/*
2 * Procedures for maintaining information about logical memory blocks.
3 *
4 * Peter Bergner, IBM Corp. June 2001.
5 * Copyright (C) 2001 Peter Bergner.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/kernel.h>
14#include <linux/init.h>
15#include <linux/bitops.h>
16#include <linux/memblock.h>
17
18#define MEMBLOCK_ALLOC_ANYWHERE 0
19
20struct memblock memblock;
21
22static int memblock_debug;
23
24static int __init early_memblock(char *p)
25{
26 if (p && strstr(p, "debug"))
27 memblock_debug = 1;
28 return 0;
29}
30early_param("memblock", early_memblock);
31
32static void memblock_dump(struct memblock_region *region, char *name)
33{
34 unsigned long long base, size;
35 int i;
36
37 pr_info(" %s.cnt = 0x%lx\n", name, region->cnt);
38
39 for (i = 0; i < region->cnt; i++) {
40 base = region->region[i].base;
41 size = region->region[i].size;
42
43 pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
44 name, i, base, base + size - 1, size);
45 }
46}
47
48void memblock_dump_all(void)
49{
50 if (!memblock_debug)
51 return;
52
53 pr_info("MEMBLOCK configuration:\n");
54 pr_info(" rmo_size = 0x%llx\n", (unsigned long long)memblock.rmo_size);
55 pr_info(" memory.size = 0x%llx\n", (unsigned long long)memblock.memory.size);
56
57 memblock_dump(&memblock.memory, "memory");
58 memblock_dump(&memblock.reserved, "reserved");
59}
60
61static unsigned long memblock_addrs_overlap(u64 base1, u64 size1, u64 base2,
62 u64 size2)
63{
64 return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
65}
66
67static long memblock_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
68{
69 if (base2 == base1 + size1)
70 return 1;
71 else if (base1 == base2 + size2)
72 return -1;
73
74 return 0;
75}
76
77static long memblock_regions_adjacent(struct memblock_region *rgn,
78 unsigned long r1, unsigned long r2)
79{
80 u64 base1 = rgn->region[r1].base;
81 u64 size1 = rgn->region[r1].size;
82 u64 base2 = rgn->region[r2].base;
83 u64 size2 = rgn->region[r2].size;
84
85 return memblock_addrs_adjacent(base1, size1, base2, size2);
86}
87
88static void memblock_remove_region(struct memblock_region *rgn, unsigned long r)
89{
90 unsigned long i;
91
92 for (i = r; i < rgn->cnt - 1; i++) {
93 rgn->region[i].base = rgn->region[i + 1].base;
94 rgn->region[i].size = rgn->region[i + 1].size;
95 }
96 rgn->cnt--;
97}
98
99/* Assumption: base addr of region 1 < base addr of region 2 */
100static void memblock_coalesce_regions(struct memblock_region *rgn,
101 unsigned long r1, unsigned long r2)
102{
103 rgn->region[r1].size += rgn->region[r2].size;
104 memblock_remove_region(rgn, r2);
105}
106
107void __init memblock_init(void)
108{
109 /* Create a dummy zero size MEMBLOCK which will get coalesced away later.
110 * This simplifies the memblock_add() code below...
111 */
112 memblock.memory.region[0].base = 0;
113 memblock.memory.region[0].size = 0;
114 memblock.memory.cnt = 1;
115
116 /* Ditto. */
117 memblock.reserved.region[0].base = 0;
118 memblock.reserved.region[0].size = 0;
119 memblock.reserved.cnt = 1;
120}
121
122void __init memblock_analyze(void)
123{
124 int i;
125
126 memblock.memory.size = 0;
127
128 for (i = 0; i < memblock.memory.cnt; i++)
129 memblock.memory.size += memblock.memory.region[i].size;
130}
131
132static long memblock_add_region(struct memblock_region *rgn, u64 base, u64 size)
133{
134 unsigned long coalesced = 0;
135 long adjacent, i;
136
137 if ((rgn->cnt == 1) && (rgn->region[0].size == 0)) {
138 rgn->region[0].base = base;
139 rgn->region[0].size = size;
140 return 0;
141 }
142
143 /* First try and coalesce this MEMBLOCK with another. */
144 for (i = 0; i < rgn->cnt; i++) {
145 u64 rgnbase = rgn->region[i].base;
146 u64 rgnsize = rgn->region[i].size;
147
148 if ((rgnbase == base) && (rgnsize == size))
149 /* Already have this region, so we're done */
150 return 0;
151
152 adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize);
153 if (adjacent > 0) {
154 rgn->region[i].base -= size;
155 rgn->region[i].size += size;
156 coalesced++;
157 break;
158 } else if (adjacent < 0) {
159 rgn->region[i].size += size;
160 coalesced++;
161 break;
162 }
163 }
164
165 if ((i < rgn->cnt - 1) && memblock_regions_adjacent(rgn, i, i+1)) {
166 memblock_coalesce_regions(rgn, i, i+1);
167 coalesced++;
168 }
169
170 if (coalesced)
171 return coalesced;
172 if (rgn->cnt >= MAX_MEMBLOCK_REGIONS)
173 return -1;
174
175 /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */
176 for (i = rgn->cnt - 1; i >= 0; i--) {
177 if (base < rgn->region[i].base) {
178 rgn->region[i+1].base = rgn->region[i].base;
179 rgn->region[i+1].size = rgn->region[i].size;
180 } else {
181 rgn->region[i+1].base = base;
182 rgn->region[i+1].size = size;
183 break;
184 }
185 }
186
187 if (base < rgn->region[0].base) {
188 rgn->region[0].base = base;
189 rgn->region[0].size = size;
190 }
191 rgn->cnt++;
192
193 return 0;
194}
195
196long memblock_add(u64 base, u64 size)
197{
198 struct memblock_region *_rgn = &memblock.memory;
199
200 /* On pSeries LPAR systems, the first MEMBLOCK is our RMO region. */
201 if (base == 0)
202 memblock.rmo_size = size;
203
204 return memblock_add_region(_rgn, base, size);
205
206}
207
208static long __memblock_remove(struct memblock_region *rgn, u64 base, u64 size)
209{
210 u64 rgnbegin, rgnend;
211 u64 end = base + size;
212 int i;
213
214 rgnbegin = rgnend = 0; /* supress gcc warnings */
215
216 /* Find the region where (base, size) belongs to */
217 for (i=0; i < rgn->cnt; i++) {
218 rgnbegin = rgn->region[i].base;
219 rgnend = rgnbegin + rgn->region[i].size;
220
221 if ((rgnbegin <= base) && (end <= rgnend))
222 break;
223 }
224
225 /* Didn't find the region */
226 if (i == rgn->cnt)
227 return -1;
228
229 /* Check to see if we are removing entire region */
230 if ((rgnbegin == base) && (rgnend == end)) {
231 memblock_remove_region(rgn, i);
232 return 0;
233 }
234
235 /* Check to see if region is matching at the front */
236 if (rgnbegin == base) {
237 rgn->region[i].base = end;
238 rgn->region[i].size -= size;
239 return 0;
240 }
241
242 /* Check to see if the region is matching at the end */
243 if (rgnend == end) {
244 rgn->region[i].size -= size;
245 return 0;
246 }
247
248 /*
249 * We need to split the entry - adjust the current one to the
250 * beginging of the hole and add the region after hole.
251 */
252 rgn->region[i].size = base - rgn->region[i].base;
253 return memblock_add_region(rgn, end, rgnend - end);
254}
255
256long memblock_remove(u64 base, u64 size)
257{
258 return __memblock_remove(&memblock.memory, base, size);
259}
260
261long __init memblock_free(u64 base, u64 size)
262{
263 return __memblock_remove(&memblock.reserved, base, size);
264}
265
266long __init memblock_reserve(u64 base, u64 size)
267{
268 struct memblock_region *_rgn = &memblock.reserved;
269
270 BUG_ON(0 == size);
271
272 return memblock_add_region(_rgn, base, size);
273}
274
275long memblock_overlaps_region(struct memblock_region *rgn, u64 base, u64 size)
276{
277 unsigned long i;
278
279 for (i = 0; i < rgn->cnt; i++) {
280 u64 rgnbase = rgn->region[i].base;
281 u64 rgnsize = rgn->region[i].size;
282 if (memblock_addrs_overlap(base, size, rgnbase, rgnsize))
283 break;
284 }
285
286 return (i < rgn->cnt) ? i : -1;
287}
288
289static u64 memblock_align_down(u64 addr, u64 size)
290{
291 return addr & ~(size - 1);
292}
293
294static u64 memblock_align_up(u64 addr, u64 size)
295{
296 return (addr + (size - 1)) & ~(size - 1);
297}
298
299static u64 __init memblock_alloc_nid_unreserved(u64 start, u64 end,
300 u64 size, u64 align)
301{
302 u64 base, res_base;
303 long j;
304
305 base = memblock_align_down((end - size), align);
306 while (start <= base) {
307 j = memblock_overlaps_region(&memblock.reserved, base, size);
308 if (j < 0) {
309 /* this area isn't reserved, take it */
310 if (memblock_add_region(&memblock.reserved, base, size) < 0)
311 base = ~(u64)0;
312 return base;
313 }
314 res_base = memblock.reserved.region[j].base;
315 if (res_base < size)
316 break;
317 base = memblock_align_down(res_base - size, align);
318 }
319
320 return ~(u64)0;
321}
322
323static u64 __init memblock_alloc_nid_region(struct memblock_property *mp,
324 u64 (*nid_range)(u64, u64, int *),
325 u64 size, u64 align, int nid)
326{
327 u64 start, end;
328
329 start = mp->base;
330 end = start + mp->size;
331
332 start = memblock_align_up(start, align);
333 while (start < end) {
334 u64 this_end;
335 int this_nid;
336
337 this_end = nid_range(start, end, &this_nid);
338 if (this_nid == nid) {
339 u64 ret = memblock_alloc_nid_unreserved(start, this_end,
340 size, align);
341 if (ret != ~(u64)0)
342 return ret;
343 }
344 start = this_end;
345 }
346
347 return ~(u64)0;
348}
349
350u64 __init memblock_alloc_nid(u64 size, u64 align, int nid,
351 u64 (*nid_range)(u64 start, u64 end, int *nid))
352{
353 struct memblock_region *mem = &memblock.memory;
354 int i;
355
356 BUG_ON(0 == size);
357
358 size = memblock_align_up(size, align);
359
360 for (i = 0; i < mem->cnt; i++) {
361 u64 ret = memblock_alloc_nid_region(&mem->region[i],
362 nid_range,
363 size, align, nid);
364 if (ret != ~(u64)0)
365 return ret;
366 }
367
368 return memblock_alloc(size, align);
369}
370
371u64 __init memblock_alloc(u64 size, u64 align)
372{
373 return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE);
374}
375
376u64 __init memblock_alloc_base(u64 size, u64 align, u64 max_addr)
377{
378 u64 alloc;
379
380 alloc = __memblock_alloc_base(size, align, max_addr);
381
382 if (alloc == 0)
383 panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
384 (unsigned long long) size, (unsigned long long) max_addr);
385
386 return alloc;
387}
388
389u64 __init __memblock_alloc_base(u64 size, u64 align, u64 max_addr)
390{
391 long i, j;
392 u64 base = 0;
393 u64 res_base;
394
395 BUG_ON(0 == size);
396
397 size = memblock_align_up(size, align);
398
399 /* On some platforms, make sure we allocate lowmem */
400 /* Note that MEMBLOCK_REAL_LIMIT may be MEMBLOCK_ALLOC_ANYWHERE */
401 if (max_addr == MEMBLOCK_ALLOC_ANYWHERE)
402 max_addr = MEMBLOCK_REAL_LIMIT;
403
404 for (i = memblock.memory.cnt - 1; i >= 0; i--) {
405 u64 memblockbase = memblock.memory.region[i].base;
406 u64 memblocksize = memblock.memory.region[i].size;
407
408 if (memblocksize < size)
409 continue;
410 if (max_addr == MEMBLOCK_ALLOC_ANYWHERE)
411 base = memblock_align_down(memblockbase + memblocksize - size, align);
412 else if (memblockbase < max_addr) {
413 base = min(memblockbase + memblocksize, max_addr);
414 base = memblock_align_down(base - size, align);
415 } else
416 continue;
417
418 while (base && memblockbase <= base) {
419 j = memblock_overlaps_region(&memblock.reserved, base, size);
420 if (j < 0) {
421 /* this area isn't reserved, take it */
422 if (memblock_add_region(&memblock.reserved, base, size) < 0)
423 return 0;
424 return base;
425 }
426 res_base = memblock.reserved.region[j].base;
427 if (res_base < size)
428 break;
429 base = memblock_align_down(res_base - size, align);
430 }
431 }
432 return 0;
433}
434
435/* You must call memblock_analyze() before this. */
436u64 __init memblock_phys_mem_size(void)
437{
438 return memblock.memory.size;
439}
440
441u64 memblock_end_of_DRAM(void)
442{
443 int idx = memblock.memory.cnt - 1;
444
445 return (memblock.memory.region[idx].base + memblock.memory.region[idx].size);
446}
447
448/* You must call memblock_analyze() after this. */
449void __init memblock_enforce_memory_limit(u64 memory_limit)
450{
451 unsigned long i;
452 u64 limit;
453 struct memblock_property *p;
454
455 if (!memory_limit)
456 return;
457
458 /* Truncate the memblock regions to satisfy the memory limit. */
459 limit = memory_limit;
460 for (i = 0; i < memblock.memory.cnt; i++) {
461 if (limit > memblock.memory.region[i].size) {
462 limit -= memblock.memory.region[i].size;
463 continue;
464 }
465
466 memblock.memory.region[i].size = limit;
467 memblock.memory.cnt = i + 1;
468 break;
469 }
470
471 if (memblock.memory.region[0].size < memblock.rmo_size)
472 memblock.rmo_size = memblock.memory.region[0].size;
473
474 memory_limit = memblock_end_of_DRAM();
475
476 /* And truncate any reserves above the limit also. */
477 for (i = 0; i < memblock.reserved.cnt; i++) {
478 p = &memblock.reserved.region[i];
479
480 if (p->base > memory_limit)
481 p->size = 0;
482 else if ((p->base + p->size) > memory_limit)
483 p->size = memory_limit - p->base;
484
485 if (p->size == 0) {
486 memblock_remove_region(&memblock.reserved, i);
487 i--;
488 }
489 }
490}
491
492int __init memblock_is_reserved(u64 addr)
493{
494 int i;
495
496 for (i = 0; i < memblock.reserved.cnt; i++) {
497 u64 upper = memblock.reserved.region[i].base +
498 memblock.reserved.region[i].size - 1;
499 if ((addr >= memblock.reserved.region[i].base) && (addr <= upper))
500 return 1;
501 }
502 return 0;
503}
504
505int memblock_is_region_reserved(u64 base, u64 size)
506{
507 return memblock_overlaps_region(&memblock.reserved, base, size);
508}
509
510/*
511 * Given a <base, len>, find which memory regions belong to this range.
512 * Adjust the request and return a contiguous chunk.
513 */
514int memblock_find(struct memblock_property *res)
515{
516 int i;
517 u64 rstart, rend;
518
519 rstart = res->base;
520 rend = rstart + res->size - 1;
521
522 for (i = 0; i < memblock.memory.cnt; i++) {
523 u64 start = memblock.memory.region[i].base;
524 u64 end = start + memblock.memory.region[i].size - 1;
525
526 if (start > rend)
527 return -1;
528
529 if ((end >= rstart) && (start < rend)) {
530 /* adjust the request */
531 if (rstart < start)
532 rstart = start;
533 if (rend > end)
534 rend = end;
535 res->base = rstart;
536 res->size = rend - rstart + 1;
537 return 0;
538 }
539 }
540 return -1;
541}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c6ece0a57595..20a8193a7af8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1370,7 +1370,7 @@ static void memcg_wakeup_oom(struct mem_cgroup *mem)
1370 1370
1371static void memcg_oom_recover(struct mem_cgroup *mem) 1371static void memcg_oom_recover(struct mem_cgroup *mem)
1372{ 1372{
1373 if (mem->oom_kill_disable && atomic_read(&mem->oom_lock)) 1373 if (atomic_read(&mem->oom_lock))
1374 memcg_wakeup_oom(mem); 1374 memcg_wakeup_oom(mem);
1375} 1375}
1376 1376
@@ -3781,6 +3781,8 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
3781 return -EINVAL; 3781 return -EINVAL;
3782 } 3782 }
3783 mem->oom_kill_disable = val; 3783 mem->oom_kill_disable = val;
3784 if (!val)
3785 memcg_oom_recover(mem);
3784 cgroup_unlock(); 3786 cgroup_unlock();
3785 return 0; 3787 return 0;
3786} 3788}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 5d6fb339de03..5bc0a96beb51 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2094,7 +2094,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
2094 NODEMASK_SCRATCH(scratch); 2094 NODEMASK_SCRATCH(scratch);
2095 2095
2096 if (!scratch) 2096 if (!scratch)
2097 return; 2097 goto put_mpol;
2098 /* contextualize the tmpfs mount point mempolicy */ 2098 /* contextualize the tmpfs mount point mempolicy */
2099 new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); 2099 new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask);
2100 if (IS_ERR(new)) 2100 if (IS_ERR(new))
@@ -2103,19 +2103,20 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
2103 task_lock(current); 2103 task_lock(current);
2104 ret = mpol_set_nodemask(new, &mpol->w.user_nodemask, scratch); 2104 ret = mpol_set_nodemask(new, &mpol->w.user_nodemask, scratch);
2105 task_unlock(current); 2105 task_unlock(current);
2106 mpol_put(mpol); /* drop our ref on sb mpol */
2107 if (ret) 2106 if (ret)
2108 goto put_free; 2107 goto put_new;
2109 2108
2110 /* Create pseudo-vma that contains just the policy */ 2109 /* Create pseudo-vma that contains just the policy */
2111 memset(&pvma, 0, sizeof(struct vm_area_struct)); 2110 memset(&pvma, 0, sizeof(struct vm_area_struct));
2112 pvma.vm_end = TASK_SIZE; /* policy covers entire file */ 2111 pvma.vm_end = TASK_SIZE; /* policy covers entire file */
2113 mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ 2112 mpol_set_shared_policy(sp, &pvma, new); /* adds ref */
2114 2113
2115put_free: 2114put_new:
2116 mpol_put(new); /* drop initial ref */ 2115 mpol_put(new); /* drop initial ref */
2117free_scratch: 2116free_scratch:
2118 NODEMASK_SCRATCH_FREE(scratch); 2117 NODEMASK_SCRATCH_FREE(scratch);
2118put_mpol:
2119 mpol_put(mpol); /* drop our incoming ref on sb mpol */
2119 } 2120 }
2120} 2121}
2121 2122
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index bbd396ac9546..37498ef61548 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -495,7 +495,6 @@ static void balance_dirty_pages(struct address_space *mapping,
495 495
496 for (;;) { 496 for (;;) {
497 struct writeback_control wbc = { 497 struct writeback_control wbc = {
498 .bdi = bdi,
499 .sync_mode = WB_SYNC_NONE, 498 .sync_mode = WB_SYNC_NONE,
500 .older_than_this = NULL, 499 .older_than_this = NULL,
501 .nr_to_write = write_chunk, 500 .nr_to_write = write_chunk,
@@ -537,7 +536,7 @@ static void balance_dirty_pages(struct address_space *mapping,
537 * up. 536 * up.
538 */ 537 */
539 if (bdi_nr_reclaimable > bdi_thresh) { 538 if (bdi_nr_reclaimable > bdi_thresh) {
540 writeback_inodes_wbc(&wbc); 539 writeback_inodes_wb(&bdi->wb, &wbc);
541 pages_written += write_chunk - wbc.nr_to_write; 540 pages_written += write_chunk - wbc.nr_to_write;
542 get_dirty_limits(&background_thresh, &dirty_thresh, 541 get_dirty_limits(&background_thresh, &dirty_thresh,
543 &bdi_thresh, bdi); 542 &bdi_thresh, bdi);
@@ -597,7 +596,7 @@ static void balance_dirty_pages(struct address_space *mapping,
597 (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) 596 (!laptop_mode && ((global_page_state(NR_FILE_DIRTY)
598 + global_page_state(NR_UNSTABLE_NFS)) 597 + global_page_state(NR_UNSTABLE_NFS))
599 > background_thresh))) 598 > background_thresh)))
600 bdi_start_writeback(bdi, NULL, 0); 599 bdi_start_background_writeback(bdi);
601} 600}
602 601
603void set_page_dirty_balance(struct page *page, int page_mkwrite) 602void set_page_dirty_balance(struct page *page, int page_mkwrite)
@@ -705,9 +704,8 @@ void laptop_mode_timer_fn(unsigned long data)
705 * We want to write everything out, not just down to the dirty 704 * We want to write everything out, not just down to the dirty
706 * threshold 705 * threshold
707 */ 706 */
708
709 if (bdi_has_dirty_io(&q->backing_dev_info)) 707 if (bdi_has_dirty_io(&q->backing_dev_info))
710 bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages); 708 bdi_start_writeback(&q->backing_dev_info, nr_pages);
711} 709}
712 710
713/* 711/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 431214b941ac..9bd339eb04c6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3634,6 +3634,9 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
3634 int i; 3634 int i;
3635 void *ptr; 3635 void *ptr;
3636 3636
3637 if (limit > get_max_mapped())
3638 limit = get_max_mapped();
3639
3637 /* need to go over early_node_map to find out good range for node */ 3640 /* need to go over early_node_map to find out good range for node */
3638 for_each_active_range_index_in_nid(i, nid) { 3641 for_each_active_range_index_in_nid(i, nid) {
3639 u64 addr; 3642 u64 addr;
@@ -3659,6 +3662,11 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
3659 ptr = phys_to_virt(addr); 3662 ptr = phys_to_virt(addr);
3660 memset(ptr, 0, size); 3663 memset(ptr, 0, size);
3661 reserve_early_without_check(addr, addr + size, "BOOTMEM"); 3664 reserve_early_without_check(addr, addr + size, "BOOTMEM");
3665 /*
3666 * The min_count is set to 0 so that bootmem allocated blocks
3667 * are never reported as leaks.
3668 */
3669 kmemleak_alloc(ptr, size, 0, 0);
3662 return ptr; 3670 return ptr;
3663 } 3671 }
3664 3672
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 6c0081441a32..5bffada7cde1 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -9,6 +9,7 @@
9#include <linux/vmalloc.h> 9#include <linux/vmalloc.h>
10#include <linux/cgroup.h> 10#include <linux/cgroup.h>
11#include <linux/swapops.h> 11#include <linux/swapops.h>
12#include <linux/kmemleak.h>
12 13
13static void __meminit 14static void __meminit
14__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) 15__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
@@ -126,6 +127,12 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
126 if (!base) 127 if (!base)
127 base = vmalloc(table_size); 128 base = vmalloc(table_size);
128 } 129 }
130 /*
131 * The value stored in section->page_cgroup is (base - pfn)
132 * and it does not point to the memory block allocated above,
133 * causing kmemleak false positives.
134 */
135 kmemleak_not_leak(base);
129 } else { 136 } else {
130 /* 137 /*
131 * We don't have to allocate page_cgroup again, but 138 * We don't have to allocate page_cgroup again, but
diff --git a/mm/percpu.c b/mm/percpu.c
index 39f7dfd59585..6470e7710231 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -229,8 +229,8 @@ static int __maybe_unused pcpu_page_idx(unsigned int cpu, int page_idx)
229 return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx; 229 return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx;
230} 230}
231 231
232static unsigned long __maybe_unused pcpu_chunk_addr(struct pcpu_chunk *chunk, 232static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
233 unsigned int cpu, int page_idx) 233 unsigned int cpu, int page_idx)
234{ 234{
235 return (unsigned long)chunk->base_addr + pcpu_unit_offsets[cpu] + 235 return (unsigned long)chunk->base_addr + pcpu_unit_offsets[cpu] +
236 (page_idx << PAGE_SHIFT); 236 (page_idx << PAGE_SHIFT);
@@ -978,7 +978,32 @@ bool is_kernel_percpu_address(unsigned long addr)
978 */ 978 */
979phys_addr_t per_cpu_ptr_to_phys(void *addr) 979phys_addr_t per_cpu_ptr_to_phys(void *addr)
980{ 980{
981 if (pcpu_addr_in_first_chunk(addr)) { 981 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
982 bool in_first_chunk = false;
983 unsigned long first_start, first_end;
984 unsigned int cpu;
985
986 /*
987 * The following test on first_start/end isn't strictly
988 * necessary but will speed up lookups of addresses which
989 * aren't in the first chunk.
990 */
991 first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0);
992 first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu,
993 pcpu_unit_pages);
994 if ((unsigned long)addr >= first_start &&
995 (unsigned long)addr < first_end) {
996 for_each_possible_cpu(cpu) {
997 void *start = per_cpu_ptr(base, cpu);
998
999 if (addr >= start && addr < start + pcpu_unit_size) {
1000 in_first_chunk = true;
1001 break;
1002 }
1003 }
1004 }
1005
1006 if (in_first_chunk) {
982 if ((unsigned long)addr < VMALLOC_START || 1007 if ((unsigned long)addr < VMALLOC_START ||
983 (unsigned long)addr >= VMALLOC_END) 1008 (unsigned long)addr >= VMALLOC_END)
984 return __pa(addr); 1009 return __pa(addr);
@@ -1086,7 +1111,7 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1086 static int group_map[NR_CPUS] __initdata; 1111 static int group_map[NR_CPUS] __initdata;
1087 static int group_cnt[NR_CPUS] __initdata; 1112 static int group_cnt[NR_CPUS] __initdata;
1088 const size_t static_size = __per_cpu_end - __per_cpu_start; 1113 const size_t static_size = __per_cpu_end - __per_cpu_start;
1089 int group_cnt_max = 0, nr_groups = 1, nr_units = 0; 1114 int nr_groups = 1, nr_units = 0;
1090 size_t size_sum, min_unit_size, alloc_size; 1115 size_t size_sum, min_unit_size, alloc_size;
1091 int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */ 1116 int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
1092 int last_allocs, group, unit; 1117 int last_allocs, group, unit;
@@ -1096,7 +1121,7 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1096 1121
1097 /* this function may be called multiple times */ 1122 /* this function may be called multiple times */
1098 memset(group_map, 0, sizeof(group_map)); 1123 memset(group_map, 0, sizeof(group_map));
1099 memset(group_cnt, 0, sizeof(group_map)); 1124 memset(group_cnt, 0, sizeof(group_cnt));
1100 1125
1101 /* 1126 /*
1102 * Determine min_unit_size, alloc_size and max_upa such that 1127 * Determine min_unit_size, alloc_size and max_upa such that
@@ -1130,7 +1155,6 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1130 } 1155 }
1131 group_map[cpu] = group; 1156 group_map[cpu] = group;
1132 group_cnt[group]++; 1157 group_cnt[group]++;
1133 group_cnt_max = max(group_cnt_max, group_cnt[group]);
1134 } 1158 }
1135 1159
1136 /* 1160 /*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9c7e57cc63a3..b94fe1b3da43 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -213,8 +213,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
213 list_for_each_entry(shrinker, &shrinker_list, list) { 213 list_for_each_entry(shrinker, &shrinker_list, list) {
214 unsigned long long delta; 214 unsigned long long delta;
215 unsigned long total_scan; 215 unsigned long total_scan;
216 unsigned long max_pass = (*shrinker->shrink)(0, gfp_mask); 216 unsigned long max_pass;
217 217
218 max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
218 delta = (4 * scanned) / shrinker->seeks; 219 delta = (4 * scanned) / shrinker->seeks;
219 delta *= max_pass; 220 delta *= max_pass;
220 do_div(delta, lru_pages + 1); 221 do_div(delta, lru_pages + 1);
@@ -242,8 +243,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
242 int shrink_ret; 243 int shrink_ret;
243 int nr_before; 244 int nr_before;
244 245
245 nr_before = (*shrinker->shrink)(0, gfp_mask); 246 nr_before = (*shrinker->shrink)(shrinker, 0, gfp_mask);
246 shrink_ret = (*shrinker->shrink)(this_scan, gfp_mask); 247 shrink_ret = (*shrinker->shrink)(shrinker, this_scan,
248 gfp_mask);
247 if (shrink_ret == -1) 249 if (shrink_ret == -1)
248 break; 250 break;
249 if (shrink_ret < nr_before) 251 if (shrink_ret < nr_before)
@@ -296,7 +298,7 @@ static int may_write_to_queue(struct backing_dev_info *bdi)
296static void handle_write_error(struct address_space *mapping, 298static void handle_write_error(struct address_space *mapping,
297 struct page *page, int error) 299 struct page *page, int error)
298{ 300{
299 lock_page(page); 301 lock_page_nosync(page);
300 if (page_mapping(page) == mapping) 302 if (page_mapping(page) == mapping)
301 mapping_set_error(mapping, error); 303 mapping_set_error(mapping, error);
302 unlock_page(page); 304 unlock_page(page);