aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <mchehab@redhat.com>2012-07-29 20:09:39 -0400
committerMauro Carvalho Chehab <mchehab@redhat.com>2012-07-29 20:09:39 -0400
commit73bcc49959e4e40911dd0dd634bf1b353827df66 (patch)
tree6b0c1d440c490a65c51ab5cf5aee7095cb4089d3 /mm
parent8447c4d15e357a458c9051ddc84aa6c8b9c27000 (diff)
parent28a33cbc24e4256c143dce96c7d93bf423229f92 (diff)
Merge tag 'v3.5'
Linux 3.5 * tag 'v3.5': (1242 commits) Linux 3.5 Remove SYSTEM_SUSPEND_DISK system state kdb: Switch to nolock variants of kmsg_dump functions printk: Implement some unlocked kmsg_dump functions printk: Remove kdb_syslog_data kdb: Revive dmesg command dm raid1: set discard_zeroes_data_unsupported dm thin: do not send discards to shared blocks dm raid1: fix crash with mirror recovery and discard pnfs-obj: Fix __r4w_get_page when offset is beyond i_size pnfs-obj: don't leak objio_state if ore_write/read fails ore: Unlock r4w pages in exact reverse order of locking ore: Remove support of partial IO request (NFS crash) ore: Fix NFS crash by supporting any unaligned RAID IO UBIFS: fix a bug in empty space fix-up cx25821: Remove bad strcpy to read-only char* HID: hid-multitouch: add support for Zytronic panels MIPS: PCI: Move fixups from __init to __devinit. MIPS: Fix bug.h MIPS build regression MIPS: sync-r4k: remove redundant irq operation ...
Diffstat (limited to 'mm')
-rw-r--r--mm/bootmem.c6
-rw-r--r--mm/compaction.c5
-rw-r--r--mm/madvise.c18
-rw-r--r--mm/memblock.c115
-rw-r--r--mm/memcontrol.c6
-rw-r--r--mm/memory.c12
-rw-r--r--mm/memory_hotplug.c2
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/nobootmem.c40
-rw-r--r--mm/oom_kill.c17
-rw-r--r--mm/page_alloc.c7
-rw-r--r--mm/page_cgroup.c4
-rw-r--r--mm/pagewalk.c1
-rw-r--r--mm/percpu-vm.c1
-rw-r--r--mm/shmem.c196
-rw-r--r--mm/sparse.c20
-rw-r--r--mm/swapfile.c12
-rw-r--r--mm/vmscan.c12
18 files changed, 244 insertions, 232 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c
index ec4fcb7a56c8..bcb63ac48cc5 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -698,7 +698,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
698 return ___alloc_bootmem(size, align, goal, limit); 698 return ___alloc_bootmem(size, align, goal, limit);
699} 699}
700 700
701static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, 701void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
702 unsigned long size, unsigned long align, 702 unsigned long size, unsigned long align,
703 unsigned long goal, unsigned long limit) 703 unsigned long goal, unsigned long limit)
704{ 704{
@@ -710,6 +710,10 @@ again:
710 if (ptr) 710 if (ptr)
711 return ptr; 711 return ptr;
712 712
713 /* do not panic in alloc_bootmem_bdata() */
714 if (limit && goal + size > limit)
715 limit = 0;
716
713 ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit); 717 ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit);
714 if (ptr) 718 if (ptr)
715 return ptr; 719 return ptr;
diff --git a/mm/compaction.c b/mm/compaction.c
index 7ea259d82a99..2f42d9528539 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -701,8 +701,11 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
701 if (err) { 701 if (err) {
702 putback_lru_pages(&cc->migratepages); 702 putback_lru_pages(&cc->migratepages);
703 cc->nr_migratepages = 0; 703 cc->nr_migratepages = 0;
704 if (err == -ENOMEM) {
705 ret = COMPACT_PARTIAL;
706 goto out;
707 }
704 } 708 }
705
706 } 709 }
707 710
708out: 711out:
diff --git a/mm/madvise.c b/mm/madvise.c
index deff1b64a08c..14d260fa0d17 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -15,6 +15,7 @@
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/ksm.h> 16#include <linux/ksm.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/file.h>
18 19
19/* 20/*
20 * Any behaviour which results in changes to the vma->vm_flags needs to 21 * Any behaviour which results in changes to the vma->vm_flags needs to
@@ -204,14 +205,16 @@ static long madvise_remove(struct vm_area_struct *vma,
204{ 205{
205 loff_t offset; 206 loff_t offset;
206 int error; 207 int error;
208 struct file *f;
207 209
208 *prev = NULL; /* tell sys_madvise we drop mmap_sem */ 210 *prev = NULL; /* tell sys_madvise we drop mmap_sem */
209 211
210 if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB)) 212 if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
211 return -EINVAL; 213 return -EINVAL;
212 214
213 if (!vma->vm_file || !vma->vm_file->f_mapping 215 f = vma->vm_file;
214 || !vma->vm_file->f_mapping->host) { 216
217 if (!f || !f->f_mapping || !f->f_mapping->host) {
215 return -EINVAL; 218 return -EINVAL;
216 } 219 }
217 220
@@ -221,11 +224,18 @@ static long madvise_remove(struct vm_area_struct *vma,
221 offset = (loff_t)(start - vma->vm_start) 224 offset = (loff_t)(start - vma->vm_start)
222 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 225 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
223 226
224 /* filesystem's fallocate may need to take i_mutex */ 227 /*
228 * Filesystem's fallocate may need to take i_mutex. We need to
229 * explicitly grab a reference because the vma (and hence the
230 * vma's reference to the file) can go away as soon as we drop
231 * mmap_sem.
232 */
233 get_file(f);
225 up_read(&current->mm->mmap_sem); 234 up_read(&current->mm->mmap_sem);
226 error = do_fallocate(vma->vm_file, 235 error = do_fallocate(f,
227 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 236 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
228 offset, end - start); 237 offset, end - start);
238 fput(f);
229 down_read(&current->mm->mmap_sem); 239 down_read(&current->mm->mmap_sem);
230 return error; 240 return error;
231} 241}
diff --git a/mm/memblock.c b/mm/memblock.c
index 952123eba433..5cc6731b00cc 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -143,30 +143,6 @@ phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
143 MAX_NUMNODES); 143 MAX_NUMNODES);
144} 144}
145 145
146/*
147 * Free memblock.reserved.regions
148 */
149int __init_memblock memblock_free_reserved_regions(void)
150{
151 if (memblock.reserved.regions == memblock_reserved_init_regions)
152 return 0;
153
154 return memblock_free(__pa(memblock.reserved.regions),
155 sizeof(struct memblock_region) * memblock.reserved.max);
156}
157
158/*
159 * Reserve memblock.reserved.regions
160 */
161int __init_memblock memblock_reserve_reserved_regions(void)
162{
163 if (memblock.reserved.regions == memblock_reserved_init_regions)
164 return 0;
165
166 return memblock_reserve(__pa(memblock.reserved.regions),
167 sizeof(struct memblock_region) * memblock.reserved.max);
168}
169
170static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) 146static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
171{ 147{
172 type->total_size -= type->regions[r].size; 148 type->total_size -= type->regions[r].size;
@@ -184,9 +160,39 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
184 } 160 }
185} 161}
186 162
187static int __init_memblock memblock_double_array(struct memblock_type *type) 163phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info(
164 phys_addr_t *addr)
165{
166 if (memblock.reserved.regions == memblock_reserved_init_regions)
167 return 0;
168
169 *addr = __pa(memblock.reserved.regions);
170
171 return PAGE_ALIGN(sizeof(struct memblock_region) *
172 memblock.reserved.max);
173}
174
175/**
176 * memblock_double_array - double the size of the memblock regions array
177 * @type: memblock type of the regions array being doubled
178 * @new_area_start: starting address of memory range to avoid overlap with
179 * @new_area_size: size of memory range to avoid overlap with
180 *
181 * Double the size of the @type regions array. If memblock is being used to
182 * allocate memory for a new reserved regions array and there is a previously
183 * allocated memory range [@new_area_start,@new_area_start+@new_area_size]
184 * waiting to be reserved, ensure the memory used by the new array does
185 * not overlap.
186 *
187 * RETURNS:
188 * 0 on success, -1 on failure.
189 */
190static int __init_memblock memblock_double_array(struct memblock_type *type,
191 phys_addr_t new_area_start,
192 phys_addr_t new_area_size)
188{ 193{
189 struct memblock_region *new_array, *old_array; 194 struct memblock_region *new_array, *old_array;
195 phys_addr_t old_alloc_size, new_alloc_size;
190 phys_addr_t old_size, new_size, addr; 196 phys_addr_t old_size, new_size, addr;
191 int use_slab = slab_is_available(); 197 int use_slab = slab_is_available();
192 int *in_slab; 198 int *in_slab;
@@ -200,6 +206,12 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
200 /* Calculate new doubled size */ 206 /* Calculate new doubled size */
201 old_size = type->max * sizeof(struct memblock_region); 207 old_size = type->max * sizeof(struct memblock_region);
202 new_size = old_size << 1; 208 new_size = old_size << 1;
209 /*
210 * We need to allocated new one align to PAGE_SIZE,
211 * so we can free them completely later.
212 */
213 old_alloc_size = PAGE_ALIGN(old_size);
214 new_alloc_size = PAGE_ALIGN(new_size);
203 215
204 /* Retrieve the slab flag */ 216 /* Retrieve the slab flag */
205 if (type == &memblock.memory) 217 if (type == &memblock.memory)
@@ -222,7 +234,18 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
222 new_array = kmalloc(new_size, GFP_KERNEL); 234 new_array = kmalloc(new_size, GFP_KERNEL);
223 addr = new_array ? __pa(new_array) : 0; 235 addr = new_array ? __pa(new_array) : 0;
224 } else { 236 } else {
225 addr = memblock_find_in_range(0, MEMBLOCK_ALLOC_ACCESSIBLE, new_size, sizeof(phys_addr_t)); 237 /* only exclude range when trying to double reserved.regions */
238 if (type != &memblock.reserved)
239 new_area_start = new_area_size = 0;
240
241 addr = memblock_find_in_range(new_area_start + new_area_size,
242 memblock.current_limit,
243 new_alloc_size, PAGE_SIZE);
244 if (!addr && new_area_size)
245 addr = memblock_find_in_range(0,
246 min(new_area_start, memblock.current_limit),
247 new_alloc_size, PAGE_SIZE);
248
226 new_array = addr ? __va(addr) : 0; 249 new_array = addr ? __va(addr) : 0;
227 } 250 }
228 if (!addr) { 251 if (!addr) {
@@ -251,13 +274,13 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
251 kfree(old_array); 274 kfree(old_array);
252 else if (old_array != memblock_memory_init_regions && 275 else if (old_array != memblock_memory_init_regions &&
253 old_array != memblock_reserved_init_regions) 276 old_array != memblock_reserved_init_regions)
254 memblock_free(__pa(old_array), old_size); 277 memblock_free(__pa(old_array), old_alloc_size);
255 278
256 /* Reserve the new array if that comes from the memblock. 279 /* Reserve the new array if that comes from the memblock.
257 * Otherwise, we needn't do it 280 * Otherwise, we needn't do it
258 */ 281 */
259 if (!use_slab) 282 if (!use_slab)
260 BUG_ON(memblock_reserve(addr, new_size)); 283 BUG_ON(memblock_reserve(addr, new_alloc_size));
261 284
262 /* Update slab flag */ 285 /* Update slab flag */
263 *in_slab = use_slab; 286 *in_slab = use_slab;
@@ -399,7 +422,7 @@ repeat:
399 */ 422 */
400 if (!insert) { 423 if (!insert) {
401 while (type->cnt + nr_new > type->max) 424 while (type->cnt + nr_new > type->max)
402 if (memblock_double_array(type) < 0) 425 if (memblock_double_array(type, obase, size) < 0)
403 return -ENOMEM; 426 return -ENOMEM;
404 insert = true; 427 insert = true;
405 goto repeat; 428 goto repeat;
@@ -450,7 +473,7 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type,
450 473
451 /* we'll create at most two more regions */ 474 /* we'll create at most two more regions */
452 while (type->cnt + 2 > type->max) 475 while (type->cnt + 2 > type->max)
453 if (memblock_double_array(type) < 0) 476 if (memblock_double_array(type, base, size) < 0)
454 return -ENOMEM; 477 return -ENOMEM;
455 478
456 for (i = 0; i < type->cnt; i++) { 479 for (i = 0; i < type->cnt; i++) {
@@ -540,9 +563,9 @@ int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
540 * __next_free_mem_range - next function for for_each_free_mem_range() 563 * __next_free_mem_range - next function for for_each_free_mem_range()
541 * @idx: pointer to u64 loop variable 564 * @idx: pointer to u64 loop variable
542 * @nid: nid: node selector, %MAX_NUMNODES for all nodes 565 * @nid: nid: node selector, %MAX_NUMNODES for all nodes
543 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL 566 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
544 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL 567 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
545 * @p_nid: ptr to int for nid of the range, can be %NULL 568 * @out_nid: ptr to int for nid of the range, can be %NULL
546 * 569 *
547 * Find the first free area from *@idx which matches @nid, fill the out 570 * Find the first free area from *@idx which matches @nid, fill the out
548 * parameters, and update *@idx for the next iteration. The lower 32bit of 571 * parameters, and update *@idx for the next iteration. The lower 32bit of
@@ -616,9 +639,9 @@ void __init_memblock __next_free_mem_range(u64 *idx, int nid,
616 * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse() 639 * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse()
617 * @idx: pointer to u64 loop variable 640 * @idx: pointer to u64 loop variable
618 * @nid: nid: node selector, %MAX_NUMNODES for all nodes 641 * @nid: nid: node selector, %MAX_NUMNODES for all nodes
619 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL 642 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
620 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL 643 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
621 * @p_nid: ptr to int for nid of the range, can be %NULL 644 * @out_nid: ptr to int for nid of the range, can be %NULL
622 * 645 *
623 * Reverse of __next_free_mem_range(). 646 * Reverse of __next_free_mem_range().
624 */ 647 */
@@ -867,6 +890,16 @@ int __init_memblock memblock_is_memory(phys_addr_t addr)
867 return memblock_search(&memblock.memory, addr) != -1; 890 return memblock_search(&memblock.memory, addr) != -1;
868} 891}
869 892
893/**
894 * memblock_is_region_memory - check if a region is a subset of memory
895 * @base: base of region to check
896 * @size: size of region to check
897 *
898 * Check if the region [@base, @base+@size) is a subset of a memory block.
899 *
900 * RETURNS:
901 * 0 if false, non-zero if true
902 */
870int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) 903int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
871{ 904{
872 int idx = memblock_search(&memblock.memory, base); 905 int idx = memblock_search(&memblock.memory, base);
@@ -879,6 +912,16 @@ int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size
879 memblock.memory.regions[idx].size) >= end; 912 memblock.memory.regions[idx].size) >= end;
880} 913}
881 914
915/**
916 * memblock_is_region_reserved - check if a region intersects reserved memory
917 * @base: base of region to check
918 * @size: size of region to check
919 *
920 * Check if the region [@base, @base+@size) intersects a reserved memory block.
921 *
922 * RETURNS:
923 * 0 if false, non-zero if true
924 */
882int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) 925int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
883{ 926{
884 memblock_cap_size(base, &size); 927 memblock_cap_size(base, &size);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ac35bccadb7b..f72b5e52451a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1148,7 +1148,7 @@ bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
1148{ 1148{
1149 if (root_memcg == memcg) 1149 if (root_memcg == memcg)
1150 return true; 1150 return true;
1151 if (!root_memcg->use_hierarchy) 1151 if (!root_memcg->use_hierarchy || !memcg)
1152 return false; 1152 return false;
1153 return css_is_ancestor(&memcg->css, &root_memcg->css); 1153 return css_is_ancestor(&memcg->css, &root_memcg->css);
1154} 1154}
@@ -1234,7 +1234,7 @@ int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec)
1234 1234
1235/** 1235/**
1236 * mem_cgroup_margin - calculate chargeable space of a memory cgroup 1236 * mem_cgroup_margin - calculate chargeable space of a memory cgroup
1237 * @mem: the memory cgroup 1237 * @memcg: the memory cgroup
1238 * 1238 *
1239 * Returns the maximum amount of memory @mem can be charged with, in 1239 * Returns the maximum amount of memory @mem can be charged with, in
1240 * pages. 1240 * pages.
@@ -1508,7 +1508,7 @@ static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
1508 1508
1509/** 1509/**
1510 * test_mem_cgroup_node_reclaimable 1510 * test_mem_cgroup_node_reclaimable
1511 * @mem: the target memcg 1511 * @memcg: the target memcg
1512 * @nid: the node ID to be checked. 1512 * @nid: the node ID to be checked.
1513 * @noswap : specify true here if the user wants flle only information. 1513 * @noswap : specify true here if the user wants flle only information.
1514 * 1514 *
diff --git a/mm/memory.c b/mm/memory.c
index 1b7dc662bf9f..2466d1250231 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1225,7 +1225,15 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
1225 next = pmd_addr_end(addr, end); 1225 next = pmd_addr_end(addr, end);
1226 if (pmd_trans_huge(*pmd)) { 1226 if (pmd_trans_huge(*pmd)) {
1227 if (next - addr != HPAGE_PMD_SIZE) { 1227 if (next - addr != HPAGE_PMD_SIZE) {
1228 VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem)); 1228#ifdef CONFIG_DEBUG_VM
1229 if (!rwsem_is_locked(&tlb->mm->mmap_sem)) {
1230 pr_err("%s: mmap_sem is unlocked! addr=0x%lx end=0x%lx vma->vm_start=0x%lx vma->vm_end=0x%lx\n",
1231 __func__, addr, end,
1232 vma->vm_start,
1233 vma->vm_end);
1234 BUG();
1235 }
1236#endif
1229 split_huge_page_pmd(vma->vm_mm, pmd); 1237 split_huge_page_pmd(vma->vm_mm, pmd);
1230 } else if (zap_huge_pmd(tlb, vma, pmd, addr)) 1238 } else if (zap_huge_pmd(tlb, vma, pmd, addr))
1231 goto next; 1239 goto next;
@@ -1366,7 +1374,7 @@ void unmap_vmas(struct mmu_gather *tlb,
1366/** 1374/**
1367 * zap_page_range - remove user pages in a given range 1375 * zap_page_range - remove user pages in a given range
1368 * @vma: vm_area_struct holding the applicable pages 1376 * @vma: vm_area_struct holding the applicable pages
1369 * @address: starting address of pages to zap 1377 * @start: starting address of pages to zap
1370 * @size: number of bytes to zap 1378 * @size: number of bytes to zap
1371 * @details: details of nonlinear truncation or shared cache invalidation 1379 * @details: details of nonlinear truncation or shared cache invalidation
1372 * 1380 *
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 0d7e3ec8e0f3..427bb291dd0f 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -618,7 +618,7 @@ int __ref add_memory(int nid, u64 start, u64 size)
618 pgdat = hotadd_new_pgdat(nid, start); 618 pgdat = hotadd_new_pgdat(nid, start);
619 ret = -ENOMEM; 619 ret = -ENOMEM;
620 if (!pgdat) 620 if (!pgdat)
621 goto out; 621 goto error;
622 new_pgdat = 1; 622 new_pgdat = 1;
623 } 623 }
624 624
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index f15c1b24ca18..1d771e4200d2 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1177,7 +1177,7 @@ static long do_mbind(unsigned long start, unsigned long len,
1177 if (!list_empty(&pagelist)) { 1177 if (!list_empty(&pagelist)) {
1178 nr_failed = migrate_pages(&pagelist, new_vma_page, 1178 nr_failed = migrate_pages(&pagelist, new_vma_page,
1179 (unsigned long)vma, 1179 (unsigned long)vma,
1180 false, true); 1180 false, MIGRATE_SYNC);
1181 if (nr_failed) 1181 if (nr_failed)
1182 putback_lru_pages(&pagelist); 1182 putback_lru_pages(&pagelist);
1183 } 1183 }
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index d23415c001bc..405573010f99 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -105,27 +105,35 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end)
105 __free_pages_bootmem(pfn_to_page(i), 0); 105 __free_pages_bootmem(pfn_to_page(i), 0);
106} 106}
107 107
108static unsigned long __init __free_memory_core(phys_addr_t start,
109 phys_addr_t end)
110{
111 unsigned long start_pfn = PFN_UP(start);
112 unsigned long end_pfn = min_t(unsigned long,
113 PFN_DOWN(end), max_low_pfn);
114
115 if (start_pfn > end_pfn)
116 return 0;
117
118 __free_pages_memory(start_pfn, end_pfn);
119
120 return end_pfn - start_pfn;
121}
122
108unsigned long __init free_low_memory_core_early(int nodeid) 123unsigned long __init free_low_memory_core_early(int nodeid)
109{ 124{
110 unsigned long count = 0; 125 unsigned long count = 0;
111 phys_addr_t start, end; 126 phys_addr_t start, end, size;
112 u64 i; 127 u64 i;
113 128
114 /* free reserved array temporarily so that it's treated as free area */ 129 for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL)
115 memblock_free_reserved_regions(); 130 count += __free_memory_core(start, end);
116 131
117 for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) { 132 /* free range that is used for reserved array if we allocate it */
118 unsigned long start_pfn = PFN_UP(start); 133 size = get_allocated_memblock_reserved_regions_info(&start);
119 unsigned long end_pfn = min_t(unsigned long, 134 if (size)
120 PFN_DOWN(end), max_low_pfn); 135 count += __free_memory_core(start, start + size);
121 if (start_pfn < end_pfn) {
122 __free_pages_memory(start_pfn, end_pfn);
123 count += end_pfn - start_pfn;
124 }
125 }
126 136
127 /* put region array back? */
128 memblock_reserve_reserved_regions();
129 return count; 137 return count;
130} 138}
131 139
@@ -274,7 +282,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
274 return ___alloc_bootmem(size, align, goal, limit); 282 return ___alloc_bootmem(size, align, goal, limit);
275} 283}
276 284
277static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, 285void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
278 unsigned long size, 286 unsigned long size,
279 unsigned long align, 287 unsigned long align,
280 unsigned long goal, 288 unsigned long goal,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 416637f0e924..ac300c99baf6 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -184,6 +184,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
184 const nodemask_t *nodemask, unsigned long totalpages) 184 const nodemask_t *nodemask, unsigned long totalpages)
185{ 185{
186 long points; 186 long points;
187 long adj;
187 188
188 if (oom_unkillable_task(p, memcg, nodemask)) 189 if (oom_unkillable_task(p, memcg, nodemask))
189 return 0; 190 return 0;
@@ -192,7 +193,8 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
192 if (!p) 193 if (!p)
193 return 0; 194 return 0;
194 195
195 if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { 196 adj = p->signal->oom_score_adj;
197 if (adj == OOM_SCORE_ADJ_MIN) {
196 task_unlock(p); 198 task_unlock(p);
197 return 0; 199 return 0;
198 } 200 }
@@ -210,14 +212,11 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
210 * implementation used by LSMs. 212 * implementation used by LSMs.
211 */ 213 */
212 if (has_capability_noaudit(p, CAP_SYS_ADMIN)) 214 if (has_capability_noaudit(p, CAP_SYS_ADMIN))
213 points -= 30 * totalpages / 1000; 215 adj -= 30;
214 216
215 /* 217 /* Normalize to oom_score_adj units */
216 * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may 218 adj *= totalpages / 1000;
217 * either completely disable oom killing or always prefer a certain 219 points += adj;
218 * task.
219 */
220 points += p->signal->oom_score_adj * totalpages / 1000;
221 220
222 /* 221 /*
223 * Never return 0 for an eligible task regardless of the root bonus and 222 * Never return 0 for an eligible task regardless of the root bonus and
@@ -366,7 +365,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
366 365
367/** 366/**
368 * dump_tasks - dump current memory state of all system tasks 367 * dump_tasks - dump current memory state of all system tasks
369 * @mem: current's memory controller, if constrained 368 * @memcg: current's memory controller, if constrained
370 * @nodemask: nodemask passed to page allocator for mempolicy ooms 369 * @nodemask: nodemask passed to page allocator for mempolicy ooms
371 * 370 *
372 * Dumps the current memory state of all eligible tasks. Tasks not in the same 371 * Dumps the current memory state of all eligible tasks. Tasks not in the same
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 44030096da63..4a4f9219683f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5635,7 +5635,12 @@ static struct page *
5635__alloc_contig_migrate_alloc(struct page *page, unsigned long private, 5635__alloc_contig_migrate_alloc(struct page *page, unsigned long private,
5636 int **resultp) 5636 int **resultp)
5637{ 5637{
5638 return alloc_page(GFP_HIGHUSER_MOVABLE); 5638 gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
5639
5640 if (PageHighMem(page))
5641 gfp_mask |= __GFP_HIGHMEM;
5642
5643 return alloc_page(gfp_mask);
5639} 5644}
5640 5645
5641/* [start, end) must belong to a single zone. */ 5646/* [start, end) must belong to a single zone. */
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 1ccbd714059c..eb750f851395 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -392,7 +392,7 @@ static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
392 392
393/** 393/**
394 * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry. 394 * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
395 * @end: swap entry to be cmpxchged 395 * @ent: swap entry to be cmpxchged
396 * @old: old id 396 * @old: old id
397 * @new: new id 397 * @new: new id
398 * 398 *
@@ -422,7 +422,7 @@ unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
422/** 422/**
423 * swap_cgroup_record - record mem_cgroup for this swp_entry. 423 * swap_cgroup_record - record mem_cgroup for this swp_entry.
424 * @ent: swap entry to be recorded into 424 * @ent: swap entry to be recorded into
425 * @mem: mem_cgroup to be recorded 425 * @id: mem_cgroup to be recorded
426 * 426 *
427 * Returns old value at success, 0 at failure. 427 * Returns old value at success, 0 at failure.
428 * (Of course, old value can be 0.) 428 * (Of course, old value can be 0.)
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index aa9701e12714..6c118d012bb5 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -162,7 +162,6 @@ static int walk_hugetlb_range(struct vm_area_struct *vma,
162 162
163/** 163/**
164 * walk_page_range - walk a memory map's page tables with a callback 164 * walk_page_range - walk a memory map's page tables with a callback
165 * @mm: memory map to walk
166 * @addr: starting address 165 * @addr: starting address
167 * @end: ending address 166 * @end: ending address
168 * @walk: set of callbacks to invoke for each level of the tree 167 * @walk: set of callbacks to invoke for each level of the tree
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 405d331804c3..3707c71ae4cd 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -360,7 +360,6 @@ err_free:
360 * @chunk: chunk to depopulate 360 * @chunk: chunk to depopulate
361 * @off: offset to the area to depopulate 361 * @off: offset to the area to depopulate
362 * @size: size of the area to depopulate in bytes 362 * @size: size of the area to depopulate in bytes
363 * @flush: whether to flush cache and tlb or not
364 * 363 *
365 * For each cpu, depopulate and unmap pages [@page_start,@page_end) 364 * For each cpu, depopulate and unmap pages [@page_start,@page_end)
366 * from @chunk. If @flush is true, vcache is flushed before unmapping 365 * from @chunk. If @flush is true, vcache is flushed before unmapping
diff --git a/mm/shmem.c b/mm/shmem.c
index a15a466d0d1d..bd106361be4b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -264,46 +264,55 @@ static int shmem_radix_tree_replace(struct address_space *mapping,
264} 264}
265 265
266/* 266/*
267 * Sometimes, before we decide whether to proceed or to fail, we must check
268 * that an entry was not already brought back from swap by a racing thread.
269 *
270 * Checking page is not enough: by the time a SwapCache page is locked, it
271 * might be reused, and again be SwapCache, using the same swap as before.
272 */
273static bool shmem_confirm_swap(struct address_space *mapping,
274 pgoff_t index, swp_entry_t swap)
275{
276 void *item;
277
278 rcu_read_lock();
279 item = radix_tree_lookup(&mapping->page_tree, index);
280 rcu_read_unlock();
281 return item == swp_to_radix_entry(swap);
282}
283
284/*
267 * Like add_to_page_cache_locked, but error if expected item has gone. 285 * Like add_to_page_cache_locked, but error if expected item has gone.
268 */ 286 */
269static int shmem_add_to_page_cache(struct page *page, 287static int shmem_add_to_page_cache(struct page *page,
270 struct address_space *mapping, 288 struct address_space *mapping,
271 pgoff_t index, gfp_t gfp, void *expected) 289 pgoff_t index, gfp_t gfp, void *expected)
272{ 290{
273 int error = 0; 291 int error;
274 292
275 VM_BUG_ON(!PageLocked(page)); 293 VM_BUG_ON(!PageLocked(page));
276 VM_BUG_ON(!PageSwapBacked(page)); 294 VM_BUG_ON(!PageSwapBacked(page));
277 295
296 page_cache_get(page);
297 page->mapping = mapping;
298 page->index = index;
299
300 spin_lock_irq(&mapping->tree_lock);
278 if (!expected) 301 if (!expected)
279 error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); 302 error = radix_tree_insert(&mapping->page_tree, index, page);
303 else
304 error = shmem_radix_tree_replace(mapping, index, expected,
305 page);
280 if (!error) { 306 if (!error) {
281 page_cache_get(page); 307 mapping->nrpages++;
282 page->mapping = mapping; 308 __inc_zone_page_state(page, NR_FILE_PAGES);
283 page->index = index; 309 __inc_zone_page_state(page, NR_SHMEM);
284 310 spin_unlock_irq(&mapping->tree_lock);
285 spin_lock_irq(&mapping->tree_lock); 311 } else {
286 if (!expected) 312 page->mapping = NULL;
287 error = radix_tree_insert(&mapping->page_tree, 313 spin_unlock_irq(&mapping->tree_lock);
288 index, page); 314 page_cache_release(page);
289 else
290 error = shmem_radix_tree_replace(mapping, index,
291 expected, page);
292 if (!error) {
293 mapping->nrpages++;
294 __inc_zone_page_state(page, NR_FILE_PAGES);
295 __inc_zone_page_state(page, NR_SHMEM);
296 spin_unlock_irq(&mapping->tree_lock);
297 } else {
298 page->mapping = NULL;
299 spin_unlock_irq(&mapping->tree_lock);
300 page_cache_release(page);
301 }
302 if (!expected)
303 radix_tree_preload_end();
304 } 315 }
305 if (error)
306 mem_cgroup_uncharge_cache_page(page);
307 return error; 316 return error;
308} 317}
309 318
@@ -1124,9 +1133,9 @@ repeat:
1124 /* We have to do this with page locked to prevent races */ 1133 /* We have to do this with page locked to prevent races */
1125 lock_page(page); 1134 lock_page(page);
1126 if (!PageSwapCache(page) || page_private(page) != swap.val || 1135 if (!PageSwapCache(page) || page_private(page) != swap.val ||
1127 page->mapping) { 1136 !shmem_confirm_swap(mapping, index, swap)) {
1128 error = -EEXIST; /* try again */ 1137 error = -EEXIST; /* try again */
1129 goto failed; 1138 goto unlock;
1130 } 1139 }
1131 if (!PageUptodate(page)) { 1140 if (!PageUptodate(page)) {
1132 error = -EIO; 1141 error = -EIO;
@@ -1142,9 +1151,12 @@ repeat:
1142 1151
1143 error = mem_cgroup_cache_charge(page, current->mm, 1152 error = mem_cgroup_cache_charge(page, current->mm,
1144 gfp & GFP_RECLAIM_MASK); 1153 gfp & GFP_RECLAIM_MASK);
1145 if (!error) 1154 if (!error) {
1146 error = shmem_add_to_page_cache(page, mapping, index, 1155 error = shmem_add_to_page_cache(page, mapping, index,
1147 gfp, swp_to_radix_entry(swap)); 1156 gfp, swp_to_radix_entry(swap));
1157 /* We already confirmed swap, and make no allocation */
1158 VM_BUG_ON(error);
1159 }
1148 if (error) 1160 if (error)
1149 goto failed; 1161 goto failed;
1150 1162
@@ -1181,11 +1193,18 @@ repeat:
1181 __set_page_locked(page); 1193 __set_page_locked(page);
1182 error = mem_cgroup_cache_charge(page, current->mm, 1194 error = mem_cgroup_cache_charge(page, current->mm,
1183 gfp & GFP_RECLAIM_MASK); 1195 gfp & GFP_RECLAIM_MASK);
1184 if (!error)
1185 error = shmem_add_to_page_cache(page, mapping, index,
1186 gfp, NULL);
1187 if (error) 1196 if (error)
1188 goto decused; 1197 goto decused;
1198 error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
1199 if (!error) {
1200 error = shmem_add_to_page_cache(page, mapping, index,
1201 gfp, NULL);
1202 radix_tree_preload_end();
1203 }
1204 if (error) {
1205 mem_cgroup_uncharge_cache_page(page);
1206 goto decused;
1207 }
1189 lru_cache_add_anon(page); 1208 lru_cache_add_anon(page);
1190 1209
1191 spin_lock(&info->lock); 1210 spin_lock(&info->lock);
@@ -1245,14 +1264,10 @@ decused:
1245unacct: 1264unacct:
1246 shmem_unacct_blocks(info->flags, 1); 1265 shmem_unacct_blocks(info->flags, 1);
1247failed: 1266failed:
1248 if (swap.val && error != -EINVAL) { 1267 if (swap.val && error != -EINVAL &&
1249 struct page *test = find_get_page(mapping, index); 1268 !shmem_confirm_swap(mapping, index, swap))
1250 if (test && !radix_tree_exceptional_entry(test)) 1269 error = -EEXIST;
1251 page_cache_release(test); 1270unlock:
1252 /* Have another try if the entry has changed */
1253 if (test != swp_to_radix_entry(swap))
1254 error = -EEXIST;
1255 }
1256 if (page) { 1271 if (page) {
1257 unlock_page(page); 1272 unlock_page(page);
1258 page_cache_release(page); 1273 page_cache_release(page);
@@ -1264,7 +1279,7 @@ failed:
1264 spin_unlock(&info->lock); 1279 spin_unlock(&info->lock);
1265 goto repeat; 1280 goto repeat;
1266 } 1281 }
1267 if (error == -EEXIST) 1282 if (error == -EEXIST) /* from above or from radix_tree_insert */
1268 goto repeat; 1283 goto repeat;
1269 return error; 1284 return error;
1270} 1285}
@@ -1594,6 +1609,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
1594 struct splice_pipe_desc spd = { 1609 struct splice_pipe_desc spd = {
1595 .pages = pages, 1610 .pages = pages,
1596 .partial = partial, 1611 .partial = partial,
1612 .nr_pages_max = PIPE_DEF_BUFFERS,
1597 .flags = flags, 1613 .flags = flags,
1598 .ops = &page_cache_pipe_buf_ops, 1614 .ops = &page_cache_pipe_buf_ops,
1599 .spd_release = spd_release_page, 1615 .spd_release = spd_release_page,
@@ -1682,7 +1698,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
1682 if (spd.nr_pages) 1698 if (spd.nr_pages)
1683 error = splice_to_pipe(pipe, &spd); 1699 error = splice_to_pipe(pipe, &spd);
1684 1700
1685 splice_shrink_spd(pipe, &spd); 1701 splice_shrink_spd(&spd);
1686 1702
1687 if (error > 0) { 1703 if (error > 0) {
1688 *ppos += error; 1704 *ppos += error;
@@ -1691,98 +1707,6 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
1691 return error; 1707 return error;
1692} 1708}
1693 1709
1694/*
1695 * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
1696 */
1697static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
1698 pgoff_t index, pgoff_t end, int origin)
1699{
1700 struct page *page;
1701 struct pagevec pvec;
1702 pgoff_t indices[PAGEVEC_SIZE];
1703 bool done = false;
1704 int i;
1705
1706 pagevec_init(&pvec, 0);
1707 pvec.nr = 1; /* start small: we may be there already */
1708 while (!done) {
1709 pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
1710 pvec.nr, pvec.pages, indices);
1711 if (!pvec.nr) {
1712 if (origin == SEEK_DATA)
1713 index = end;
1714 break;
1715 }
1716 for (i = 0; i < pvec.nr; i++, index++) {
1717 if (index < indices[i]) {
1718 if (origin == SEEK_HOLE) {
1719 done = true;
1720 break;
1721 }
1722 index = indices[i];
1723 }
1724 page = pvec.pages[i];
1725 if (page && !radix_tree_exceptional_entry(page)) {
1726 if (!PageUptodate(page))
1727 page = NULL;
1728 }
1729 if (index >= end ||
1730 (page && origin == SEEK_DATA) ||
1731 (!page && origin == SEEK_HOLE)) {
1732 done = true;
1733 break;
1734 }
1735 }
1736 shmem_deswap_pagevec(&pvec);
1737 pagevec_release(&pvec);
1738 pvec.nr = PAGEVEC_SIZE;
1739 cond_resched();
1740 }
1741 return index;
1742}
1743
1744static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin)
1745{
1746 struct address_space *mapping;
1747 struct inode *inode;
1748 pgoff_t start, end;
1749 loff_t new_offset;
1750
1751 if (origin != SEEK_DATA && origin != SEEK_HOLE)
1752 return generic_file_llseek_size(file, offset, origin,
1753 MAX_LFS_FILESIZE);
1754 mapping = file->f_mapping;
1755 inode = mapping->host;
1756 mutex_lock(&inode->i_mutex);
1757 /* We're holding i_mutex so we can access i_size directly */
1758
1759 if (offset < 0)
1760 offset = -EINVAL;
1761 else if (offset >= inode->i_size)
1762 offset = -ENXIO;
1763 else {
1764 start = offset >> PAGE_CACHE_SHIFT;
1765 end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1766 new_offset = shmem_seek_hole_data(mapping, start, end, origin);
1767 new_offset <<= PAGE_CACHE_SHIFT;
1768 if (new_offset > offset) {
1769 if (new_offset < inode->i_size)
1770 offset = new_offset;
1771 else if (origin == SEEK_DATA)
1772 offset = -ENXIO;
1773 else
1774 offset = inode->i_size;
1775 }
1776 }
1777
1778 if (offset >= 0 && offset != file->f_pos) {
1779 file->f_pos = offset;
1780 file->f_version = 0;
1781 }
1782 mutex_unlock(&inode->i_mutex);
1783 return offset;
1784}
1785
1786static long shmem_fallocate(struct file *file, int mode, loff_t offset, 1710static long shmem_fallocate(struct file *file, int mode, loff_t offset,
1787 loff_t len) 1711 loff_t len)
1788{ 1712{
@@ -2786,7 +2710,7 @@ static const struct address_space_operations shmem_aops = {
2786static const struct file_operations shmem_file_operations = { 2710static const struct file_operations shmem_file_operations = {
2787 .mmap = shmem_mmap, 2711 .mmap = shmem_mmap,
2788#ifdef CONFIG_TMPFS 2712#ifdef CONFIG_TMPFS
2789 .llseek = shmem_file_llseek, 2713 .llseek = generic_file_llseek,
2790 .read = do_sync_read, 2714 .read = do_sync_read,
2791 .write = do_sync_write, 2715 .write = do_sync_write,
2792 .aio_read = shmem_file_aio_read, 2716 .aio_read = shmem_file_aio_read,
diff --git a/mm/sparse.c b/mm/sparse.c
index 6a4bf9160e85..c7bb952400c8 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -275,8 +275,9 @@ static unsigned long * __init
275sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, 275sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
276 unsigned long size) 276 unsigned long size)
277{ 277{
278 pg_data_t *host_pgdat; 278 unsigned long goal, limit;
279 unsigned long goal; 279 unsigned long *p;
280 int nid;
280 /* 281 /*
281 * A page may contain usemaps for other sections preventing the 282 * A page may contain usemaps for other sections preventing the
282 * page being freed and making a section unremovable while 283 * page being freed and making a section unremovable while
@@ -287,10 +288,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
287 * from the same section as the pgdat where possible to avoid 288 * from the same section as the pgdat where possible to avoid
288 * this problem. 289 * this problem.
289 */ 290 */
290 goal = __pa(pgdat) & PAGE_SECTION_MASK; 291 goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
291 host_pgdat = NODE_DATA(early_pfn_to_nid(goal >> PAGE_SHIFT)); 292 limit = goal + (1UL << PA_SECTION_SHIFT);
292 return __alloc_bootmem_node_nopanic(host_pgdat, size, 293 nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
293 SMP_CACHE_BYTES, goal); 294again:
295 p = ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
296 SMP_CACHE_BYTES, goal, limit);
297 if (!p && limit) {
298 limit = 0;
299 goto again;
300 }
301 return p;
294} 302}
295 303
296static void __init check_usemap_section_nr(int nid, unsigned long *usemap) 304static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index de5bc51c4a66..71373d03fcee 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1916,24 +1916,20 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
1916 1916
1917 /* 1917 /*
1918 * Find out how many pages are allowed for a single swap 1918 * Find out how many pages are allowed for a single swap
1919 * device. There are three limiting factors: 1) the number 1919 * device. There are two limiting factors: 1) the number
1920 * of bits for the swap offset in the swp_entry_t type, and 1920 * of bits for the swap offset in the swp_entry_t type, and
1921 * 2) the number of bits in the swap pte as defined by the 1921 * 2) the number of bits in the swap pte as defined by the
1922 * the different architectures, and 3) the number of free bits 1922 * different architectures. In order to find the
1923 * in an exceptional radix_tree entry. In order to find the
1924 * largest possible bit mask, a swap entry with swap type 0 1923 * largest possible bit mask, a swap entry with swap type 0
1925 * and swap offset ~0UL is created, encoded to a swap pte, 1924 * and swap offset ~0UL is created, encoded to a swap pte,
1926 * decoded to a swp_entry_t again, and finally the swap 1925 * decoded to a swp_entry_t again, and finally the swap
1927 * offset is extracted. This will mask all the bits from 1926 * offset is extracted. This will mask all the bits from
1928 * the initial ~0UL mask that can't be encoded in either 1927 * the initial ~0UL mask that can't be encoded in either
1929 * the swp_entry_t or the architecture definition of a 1928 * the swp_entry_t or the architecture definition of a
1930 * swap pte. Then the same is done for a radix_tree entry. 1929 * swap pte.
1931 */ 1930 */
1932 maxpages = swp_offset(pte_to_swp_entry( 1931 maxpages = swp_offset(pte_to_swp_entry(
1933 swp_entry_to_pte(swp_entry(0, ~0UL)))); 1932 swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
1934 maxpages = swp_offset(radix_to_swp_entry(
1935 swp_to_radix_entry(swp_entry(0, maxpages)))) + 1;
1936
1937 if (maxpages > swap_header->info.last_page) { 1933 if (maxpages > swap_header->info.last_page) {
1938 maxpages = swap_header->info.last_page + 1; 1934 maxpages = swap_header->info.last_page + 1;
1939 /* p->max is an unsigned int: don't overflow it */ 1935 /* p->max is an unsigned int: don't overflow it */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index eeb3bc9d1d36..66e431060c05 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2688,7 +2688,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
2688 * them before going back to sleep. 2688 * them before going back to sleep.
2689 */ 2689 */
2690 set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); 2690 set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold);
2691 schedule(); 2691
2692 if (!kthread_should_stop())
2693 schedule();
2694
2692 set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold); 2695 set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold);
2693 } else { 2696 } else {
2694 if (remaining) 2697 if (remaining)
@@ -2955,14 +2958,17 @@ int kswapd_run(int nid)
2955} 2958}
2956 2959
2957/* 2960/*
2958 * Called by memory hotplug when all memory in a node is offlined. 2961 * Called by memory hotplug when all memory in a node is offlined. Caller must
2962 * hold lock_memory_hotplug().
2959 */ 2963 */
2960void kswapd_stop(int nid) 2964void kswapd_stop(int nid)
2961{ 2965{
2962 struct task_struct *kswapd = NODE_DATA(nid)->kswapd; 2966 struct task_struct *kswapd = NODE_DATA(nid)->kswapd;
2963 2967
2964 if (kswapd) 2968 if (kswapd) {
2965 kthread_stop(kswapd); 2969 kthread_stop(kswapd);
2970 NODE_DATA(nid)->kswapd = NULL;
2971 }
2966} 2972}
2967 2973
2968static int __init kswapd_init(void) 2974static int __init kswapd_init(void)