diff options
author | Mauro Carvalho Chehab <mchehab@redhat.com> | 2012-07-29 20:09:39 -0400 |
---|---|---|
committer | Mauro Carvalho Chehab <mchehab@redhat.com> | 2012-07-29 20:09:39 -0400 |
commit | 73bcc49959e4e40911dd0dd634bf1b353827df66 (patch) | |
tree | 6b0c1d440c490a65c51ab5cf5aee7095cb4089d3 /mm | |
parent | 8447c4d15e357a458c9051ddc84aa6c8b9c27000 (diff) | |
parent | 28a33cbc24e4256c143dce96c7d93bf423229f92 (diff) |
Merge tag 'v3.5'
Linux 3.5
* tag 'v3.5': (1242 commits)
Linux 3.5
Remove SYSTEM_SUSPEND_DISK system state
kdb: Switch to nolock variants of kmsg_dump functions
printk: Implement some unlocked kmsg_dump functions
printk: Remove kdb_syslog_data
kdb: Revive dmesg command
dm raid1: set discard_zeroes_data_unsupported
dm thin: do not send discards to shared blocks
dm raid1: fix crash with mirror recovery and discard
pnfs-obj: Fix __r4w_get_page when offset is beyond i_size
pnfs-obj: don't leak objio_state if ore_write/read fails
ore: Unlock r4w pages in exact reverse order of locking
ore: Remove support of partial IO request (NFS crash)
ore: Fix NFS crash by supporting any unaligned RAID IO
UBIFS: fix a bug in empty space fix-up
cx25821: Remove bad strcpy to read-only char*
HID: hid-multitouch: add support for Zytronic panels
MIPS: PCI: Move fixups from __init to __devinit.
MIPS: Fix bug.h MIPS build regression
MIPS: sync-r4k: remove redundant irq operation
...
Diffstat (limited to 'mm')
-rw-r--r-- | mm/bootmem.c | 6 | ||||
-rw-r--r-- | mm/compaction.c | 5 | ||||
-rw-r--r-- | mm/madvise.c | 18 | ||||
-rw-r--r-- | mm/memblock.c | 115 | ||||
-rw-r--r-- | mm/memcontrol.c | 6 | ||||
-rw-r--r-- | mm/memory.c | 12 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 2 | ||||
-rw-r--r-- | mm/mempolicy.c | 2 | ||||
-rw-r--r-- | mm/nobootmem.c | 40 | ||||
-rw-r--r-- | mm/oom_kill.c | 17 | ||||
-rw-r--r-- | mm/page_alloc.c | 7 | ||||
-rw-r--r-- | mm/page_cgroup.c | 4 | ||||
-rw-r--r-- | mm/pagewalk.c | 1 | ||||
-rw-r--r-- | mm/percpu-vm.c | 1 | ||||
-rw-r--r-- | mm/shmem.c | 196 | ||||
-rw-r--r-- | mm/sparse.c | 20 | ||||
-rw-r--r-- | mm/swapfile.c | 12 | ||||
-rw-r--r-- | mm/vmscan.c | 12 |
18 files changed, 244 insertions, 232 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c index ec4fcb7a56c8..bcb63ac48cc5 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -698,7 +698,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, | |||
698 | return ___alloc_bootmem(size, align, goal, limit); | 698 | return ___alloc_bootmem(size, align, goal, limit); |
699 | } | 699 | } |
700 | 700 | ||
701 | static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, | 701 | void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, |
702 | unsigned long size, unsigned long align, | 702 | unsigned long size, unsigned long align, |
703 | unsigned long goal, unsigned long limit) | 703 | unsigned long goal, unsigned long limit) |
704 | { | 704 | { |
@@ -710,6 +710,10 @@ again: | |||
710 | if (ptr) | 710 | if (ptr) |
711 | return ptr; | 711 | return ptr; |
712 | 712 | ||
713 | /* do not panic in alloc_bootmem_bdata() */ | ||
714 | if (limit && goal + size > limit) | ||
715 | limit = 0; | ||
716 | |||
713 | ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit); | 717 | ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit); |
714 | if (ptr) | 718 | if (ptr) |
715 | return ptr; | 719 | return ptr; |
diff --git a/mm/compaction.c b/mm/compaction.c index 7ea259d82a99..2f42d9528539 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -701,8 +701,11 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) | |||
701 | if (err) { | 701 | if (err) { |
702 | putback_lru_pages(&cc->migratepages); | 702 | putback_lru_pages(&cc->migratepages); |
703 | cc->nr_migratepages = 0; | 703 | cc->nr_migratepages = 0; |
704 | if (err == -ENOMEM) { | ||
705 | ret = COMPACT_PARTIAL; | ||
706 | goto out; | ||
707 | } | ||
704 | } | 708 | } |
705 | |||
706 | } | 709 | } |
707 | 710 | ||
708 | out: | 711 | out: |
diff --git a/mm/madvise.c b/mm/madvise.c index deff1b64a08c..14d260fa0d17 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/ksm.h> | 16 | #include <linux/ksm.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | #include <linux/file.h> | ||
18 | 19 | ||
19 | /* | 20 | /* |
20 | * Any behaviour which results in changes to the vma->vm_flags needs to | 21 | * Any behaviour which results in changes to the vma->vm_flags needs to |
@@ -204,14 +205,16 @@ static long madvise_remove(struct vm_area_struct *vma, | |||
204 | { | 205 | { |
205 | loff_t offset; | 206 | loff_t offset; |
206 | int error; | 207 | int error; |
208 | struct file *f; | ||
207 | 209 | ||
208 | *prev = NULL; /* tell sys_madvise we drop mmap_sem */ | 210 | *prev = NULL; /* tell sys_madvise we drop mmap_sem */ |
209 | 211 | ||
210 | if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB)) | 212 | if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB)) |
211 | return -EINVAL; | 213 | return -EINVAL; |
212 | 214 | ||
213 | if (!vma->vm_file || !vma->vm_file->f_mapping | 215 | f = vma->vm_file; |
214 | || !vma->vm_file->f_mapping->host) { | 216 | |
217 | if (!f || !f->f_mapping || !f->f_mapping->host) { | ||
215 | return -EINVAL; | 218 | return -EINVAL; |
216 | } | 219 | } |
217 | 220 | ||
@@ -221,11 +224,18 @@ static long madvise_remove(struct vm_area_struct *vma, | |||
221 | offset = (loff_t)(start - vma->vm_start) | 224 | offset = (loff_t)(start - vma->vm_start) |
222 | + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); | 225 | + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); |
223 | 226 | ||
224 | /* filesystem's fallocate may need to take i_mutex */ | 227 | /* |
228 | * Filesystem's fallocate may need to take i_mutex. We need to | ||
229 | * explicitly grab a reference because the vma (and hence the | ||
230 | * vma's reference to the file) can go away as soon as we drop | ||
231 | * mmap_sem. | ||
232 | */ | ||
233 | get_file(f); | ||
225 | up_read(¤t->mm->mmap_sem); | 234 | up_read(¤t->mm->mmap_sem); |
226 | error = do_fallocate(vma->vm_file, | 235 | error = do_fallocate(f, |
227 | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, | 236 | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, |
228 | offset, end - start); | 237 | offset, end - start); |
238 | fput(f); | ||
229 | down_read(¤t->mm->mmap_sem); | 239 | down_read(¤t->mm->mmap_sem); |
230 | return error; | 240 | return error; |
231 | } | 241 | } |
diff --git a/mm/memblock.c b/mm/memblock.c index 952123eba433..5cc6731b00cc 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -143,30 +143,6 @@ phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, | |||
143 | MAX_NUMNODES); | 143 | MAX_NUMNODES); |
144 | } | 144 | } |
145 | 145 | ||
146 | /* | ||
147 | * Free memblock.reserved.regions | ||
148 | */ | ||
149 | int __init_memblock memblock_free_reserved_regions(void) | ||
150 | { | ||
151 | if (memblock.reserved.regions == memblock_reserved_init_regions) | ||
152 | return 0; | ||
153 | |||
154 | return memblock_free(__pa(memblock.reserved.regions), | ||
155 | sizeof(struct memblock_region) * memblock.reserved.max); | ||
156 | } | ||
157 | |||
158 | /* | ||
159 | * Reserve memblock.reserved.regions | ||
160 | */ | ||
161 | int __init_memblock memblock_reserve_reserved_regions(void) | ||
162 | { | ||
163 | if (memblock.reserved.regions == memblock_reserved_init_regions) | ||
164 | return 0; | ||
165 | |||
166 | return memblock_reserve(__pa(memblock.reserved.regions), | ||
167 | sizeof(struct memblock_region) * memblock.reserved.max); | ||
168 | } | ||
169 | |||
170 | static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) | 146 | static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) |
171 | { | 147 | { |
172 | type->total_size -= type->regions[r].size; | 148 | type->total_size -= type->regions[r].size; |
@@ -184,9 +160,39 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u | |||
184 | } | 160 | } |
185 | } | 161 | } |
186 | 162 | ||
187 | static int __init_memblock memblock_double_array(struct memblock_type *type) | 163 | phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info( |
164 | phys_addr_t *addr) | ||
165 | { | ||
166 | if (memblock.reserved.regions == memblock_reserved_init_regions) | ||
167 | return 0; | ||
168 | |||
169 | *addr = __pa(memblock.reserved.regions); | ||
170 | |||
171 | return PAGE_ALIGN(sizeof(struct memblock_region) * | ||
172 | memblock.reserved.max); | ||
173 | } | ||
174 | |||
175 | /** | ||
176 | * memblock_double_array - double the size of the memblock regions array | ||
177 | * @type: memblock type of the regions array being doubled | ||
178 | * @new_area_start: starting address of memory range to avoid overlap with | ||
179 | * @new_area_size: size of memory range to avoid overlap with | ||
180 | * | ||
181 | * Double the size of the @type regions array. If memblock is being used to | ||
182 | * allocate memory for a new reserved regions array and there is a previously | ||
183 | * allocated memory range [@new_area_start,@new_area_start+@new_area_size] | ||
184 | * waiting to be reserved, ensure the memory used by the new array does | ||
185 | * not overlap. | ||
186 | * | ||
187 | * RETURNS: | ||
188 | * 0 on success, -1 on failure. | ||
189 | */ | ||
190 | static int __init_memblock memblock_double_array(struct memblock_type *type, | ||
191 | phys_addr_t new_area_start, | ||
192 | phys_addr_t new_area_size) | ||
188 | { | 193 | { |
189 | struct memblock_region *new_array, *old_array; | 194 | struct memblock_region *new_array, *old_array; |
195 | phys_addr_t old_alloc_size, new_alloc_size; | ||
190 | phys_addr_t old_size, new_size, addr; | 196 | phys_addr_t old_size, new_size, addr; |
191 | int use_slab = slab_is_available(); | 197 | int use_slab = slab_is_available(); |
192 | int *in_slab; | 198 | int *in_slab; |
@@ -200,6 +206,12 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) | |||
200 | /* Calculate new doubled size */ | 206 | /* Calculate new doubled size */ |
201 | old_size = type->max * sizeof(struct memblock_region); | 207 | old_size = type->max * sizeof(struct memblock_region); |
202 | new_size = old_size << 1; | 208 | new_size = old_size << 1; |
209 | /* | ||
210 | * We need to allocated new one align to PAGE_SIZE, | ||
211 | * so we can free them completely later. | ||
212 | */ | ||
213 | old_alloc_size = PAGE_ALIGN(old_size); | ||
214 | new_alloc_size = PAGE_ALIGN(new_size); | ||
203 | 215 | ||
204 | /* Retrieve the slab flag */ | 216 | /* Retrieve the slab flag */ |
205 | if (type == &memblock.memory) | 217 | if (type == &memblock.memory) |
@@ -222,7 +234,18 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) | |||
222 | new_array = kmalloc(new_size, GFP_KERNEL); | 234 | new_array = kmalloc(new_size, GFP_KERNEL); |
223 | addr = new_array ? __pa(new_array) : 0; | 235 | addr = new_array ? __pa(new_array) : 0; |
224 | } else { | 236 | } else { |
225 | addr = memblock_find_in_range(0, MEMBLOCK_ALLOC_ACCESSIBLE, new_size, sizeof(phys_addr_t)); | 237 | /* only exclude range when trying to double reserved.regions */ |
238 | if (type != &memblock.reserved) | ||
239 | new_area_start = new_area_size = 0; | ||
240 | |||
241 | addr = memblock_find_in_range(new_area_start + new_area_size, | ||
242 | memblock.current_limit, | ||
243 | new_alloc_size, PAGE_SIZE); | ||
244 | if (!addr && new_area_size) | ||
245 | addr = memblock_find_in_range(0, | ||
246 | min(new_area_start, memblock.current_limit), | ||
247 | new_alloc_size, PAGE_SIZE); | ||
248 | |||
226 | new_array = addr ? __va(addr) : 0; | 249 | new_array = addr ? __va(addr) : 0; |
227 | } | 250 | } |
228 | if (!addr) { | 251 | if (!addr) { |
@@ -251,13 +274,13 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) | |||
251 | kfree(old_array); | 274 | kfree(old_array); |
252 | else if (old_array != memblock_memory_init_regions && | 275 | else if (old_array != memblock_memory_init_regions && |
253 | old_array != memblock_reserved_init_regions) | 276 | old_array != memblock_reserved_init_regions) |
254 | memblock_free(__pa(old_array), old_size); | 277 | memblock_free(__pa(old_array), old_alloc_size); |
255 | 278 | ||
256 | /* Reserve the new array if that comes from the memblock. | 279 | /* Reserve the new array if that comes from the memblock. |
257 | * Otherwise, we needn't do it | 280 | * Otherwise, we needn't do it |
258 | */ | 281 | */ |
259 | if (!use_slab) | 282 | if (!use_slab) |
260 | BUG_ON(memblock_reserve(addr, new_size)); | 283 | BUG_ON(memblock_reserve(addr, new_alloc_size)); |
261 | 284 | ||
262 | /* Update slab flag */ | 285 | /* Update slab flag */ |
263 | *in_slab = use_slab; | 286 | *in_slab = use_slab; |
@@ -399,7 +422,7 @@ repeat: | |||
399 | */ | 422 | */ |
400 | if (!insert) { | 423 | if (!insert) { |
401 | while (type->cnt + nr_new > type->max) | 424 | while (type->cnt + nr_new > type->max) |
402 | if (memblock_double_array(type) < 0) | 425 | if (memblock_double_array(type, obase, size) < 0) |
403 | return -ENOMEM; | 426 | return -ENOMEM; |
404 | insert = true; | 427 | insert = true; |
405 | goto repeat; | 428 | goto repeat; |
@@ -450,7 +473,7 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type, | |||
450 | 473 | ||
451 | /* we'll create at most two more regions */ | 474 | /* we'll create at most two more regions */ |
452 | while (type->cnt + 2 > type->max) | 475 | while (type->cnt + 2 > type->max) |
453 | if (memblock_double_array(type) < 0) | 476 | if (memblock_double_array(type, base, size) < 0) |
454 | return -ENOMEM; | 477 | return -ENOMEM; |
455 | 478 | ||
456 | for (i = 0; i < type->cnt; i++) { | 479 | for (i = 0; i < type->cnt; i++) { |
@@ -540,9 +563,9 @@ int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) | |||
540 | * __next_free_mem_range - next function for for_each_free_mem_range() | 563 | * __next_free_mem_range - next function for for_each_free_mem_range() |
541 | * @idx: pointer to u64 loop variable | 564 | * @idx: pointer to u64 loop variable |
542 | * @nid: nid: node selector, %MAX_NUMNODES for all nodes | 565 | * @nid: nid: node selector, %MAX_NUMNODES for all nodes |
543 | * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL | 566 | * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL |
544 | * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL | 567 | * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL |
545 | * @p_nid: ptr to int for nid of the range, can be %NULL | 568 | * @out_nid: ptr to int for nid of the range, can be %NULL |
546 | * | 569 | * |
547 | * Find the first free area from *@idx which matches @nid, fill the out | 570 | * Find the first free area from *@idx which matches @nid, fill the out |
548 | * parameters, and update *@idx for the next iteration. The lower 32bit of | 571 | * parameters, and update *@idx for the next iteration. The lower 32bit of |
@@ -616,9 +639,9 @@ void __init_memblock __next_free_mem_range(u64 *idx, int nid, | |||
616 | * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse() | 639 | * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse() |
617 | * @idx: pointer to u64 loop variable | 640 | * @idx: pointer to u64 loop variable |
618 | * @nid: nid: node selector, %MAX_NUMNODES for all nodes | 641 | * @nid: nid: node selector, %MAX_NUMNODES for all nodes |
619 | * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL | 642 | * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL |
620 | * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL | 643 | * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL |
621 | * @p_nid: ptr to int for nid of the range, can be %NULL | 644 | * @out_nid: ptr to int for nid of the range, can be %NULL |
622 | * | 645 | * |
623 | * Reverse of __next_free_mem_range(). | 646 | * Reverse of __next_free_mem_range(). |
624 | */ | 647 | */ |
@@ -867,6 +890,16 @@ int __init_memblock memblock_is_memory(phys_addr_t addr) | |||
867 | return memblock_search(&memblock.memory, addr) != -1; | 890 | return memblock_search(&memblock.memory, addr) != -1; |
868 | } | 891 | } |
869 | 892 | ||
893 | /** | ||
894 | * memblock_is_region_memory - check if a region is a subset of memory | ||
895 | * @base: base of region to check | ||
896 | * @size: size of region to check | ||
897 | * | ||
898 | * Check if the region [@base, @base+@size) is a subset of a memory block. | ||
899 | * | ||
900 | * RETURNS: | ||
901 | * 0 if false, non-zero if true | ||
902 | */ | ||
870 | int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) | 903 | int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) |
871 | { | 904 | { |
872 | int idx = memblock_search(&memblock.memory, base); | 905 | int idx = memblock_search(&memblock.memory, base); |
@@ -879,6 +912,16 @@ int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size | |||
879 | memblock.memory.regions[idx].size) >= end; | 912 | memblock.memory.regions[idx].size) >= end; |
880 | } | 913 | } |
881 | 914 | ||
915 | /** | ||
916 | * memblock_is_region_reserved - check if a region intersects reserved memory | ||
917 | * @base: base of region to check | ||
918 | * @size: size of region to check | ||
919 | * | ||
920 | * Check if the region [@base, @base+@size) intersects a reserved memory block. | ||
921 | * | ||
922 | * RETURNS: | ||
923 | * 0 if false, non-zero if true | ||
924 | */ | ||
882 | int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) | 925 | int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) |
883 | { | 926 | { |
884 | memblock_cap_size(base, &size); | 927 | memblock_cap_size(base, &size); |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ac35bccadb7b..f72b5e52451a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1148,7 +1148,7 @@ bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, | |||
1148 | { | 1148 | { |
1149 | if (root_memcg == memcg) | 1149 | if (root_memcg == memcg) |
1150 | return true; | 1150 | return true; |
1151 | if (!root_memcg->use_hierarchy) | 1151 | if (!root_memcg->use_hierarchy || !memcg) |
1152 | return false; | 1152 | return false; |
1153 | return css_is_ancestor(&memcg->css, &root_memcg->css); | 1153 | return css_is_ancestor(&memcg->css, &root_memcg->css); |
1154 | } | 1154 | } |
@@ -1234,7 +1234,7 @@ int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec) | |||
1234 | 1234 | ||
1235 | /** | 1235 | /** |
1236 | * mem_cgroup_margin - calculate chargeable space of a memory cgroup | 1236 | * mem_cgroup_margin - calculate chargeable space of a memory cgroup |
1237 | * @mem: the memory cgroup | 1237 | * @memcg: the memory cgroup |
1238 | * | 1238 | * |
1239 | * Returns the maximum amount of memory @mem can be charged with, in | 1239 | * Returns the maximum amount of memory @mem can be charged with, in |
1240 | * pages. | 1240 | * pages. |
@@ -1508,7 +1508,7 @@ static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg, | |||
1508 | 1508 | ||
1509 | /** | 1509 | /** |
1510 | * test_mem_cgroup_node_reclaimable | 1510 | * test_mem_cgroup_node_reclaimable |
1511 | * @mem: the target memcg | 1511 | * @memcg: the target memcg |
1512 | * @nid: the node ID to be checked. | 1512 | * @nid: the node ID to be checked. |
1513 | * @noswap : specify true here if the user wants flle only information. | 1513 | * @noswap : specify true here if the user wants flle only information. |
1514 | * | 1514 | * |
diff --git a/mm/memory.c b/mm/memory.c index 1b7dc662bf9f..2466d1250231 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1225,7 +1225,15 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, | |||
1225 | next = pmd_addr_end(addr, end); | 1225 | next = pmd_addr_end(addr, end); |
1226 | if (pmd_trans_huge(*pmd)) { | 1226 | if (pmd_trans_huge(*pmd)) { |
1227 | if (next - addr != HPAGE_PMD_SIZE) { | 1227 | if (next - addr != HPAGE_PMD_SIZE) { |
1228 | VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem)); | 1228 | #ifdef CONFIG_DEBUG_VM |
1229 | if (!rwsem_is_locked(&tlb->mm->mmap_sem)) { | ||
1230 | pr_err("%s: mmap_sem is unlocked! addr=0x%lx end=0x%lx vma->vm_start=0x%lx vma->vm_end=0x%lx\n", | ||
1231 | __func__, addr, end, | ||
1232 | vma->vm_start, | ||
1233 | vma->vm_end); | ||
1234 | BUG(); | ||
1235 | } | ||
1236 | #endif | ||
1229 | split_huge_page_pmd(vma->vm_mm, pmd); | 1237 | split_huge_page_pmd(vma->vm_mm, pmd); |
1230 | } else if (zap_huge_pmd(tlb, vma, pmd, addr)) | 1238 | } else if (zap_huge_pmd(tlb, vma, pmd, addr)) |
1231 | goto next; | 1239 | goto next; |
@@ -1366,7 +1374,7 @@ void unmap_vmas(struct mmu_gather *tlb, | |||
1366 | /** | 1374 | /** |
1367 | * zap_page_range - remove user pages in a given range | 1375 | * zap_page_range - remove user pages in a given range |
1368 | * @vma: vm_area_struct holding the applicable pages | 1376 | * @vma: vm_area_struct holding the applicable pages |
1369 | * @address: starting address of pages to zap | 1377 | * @start: starting address of pages to zap |
1370 | * @size: number of bytes to zap | 1378 | * @size: number of bytes to zap |
1371 | * @details: details of nonlinear truncation or shared cache invalidation | 1379 | * @details: details of nonlinear truncation or shared cache invalidation |
1372 | * | 1380 | * |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0d7e3ec8e0f3..427bb291dd0f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -618,7 +618,7 @@ int __ref add_memory(int nid, u64 start, u64 size) | |||
618 | pgdat = hotadd_new_pgdat(nid, start); | 618 | pgdat = hotadd_new_pgdat(nid, start); |
619 | ret = -ENOMEM; | 619 | ret = -ENOMEM; |
620 | if (!pgdat) | 620 | if (!pgdat) |
621 | goto out; | 621 | goto error; |
622 | new_pgdat = 1; | 622 | new_pgdat = 1; |
623 | } | 623 | } |
624 | 624 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index f15c1b24ca18..1d771e4200d2 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1177,7 +1177,7 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1177 | if (!list_empty(&pagelist)) { | 1177 | if (!list_empty(&pagelist)) { |
1178 | nr_failed = migrate_pages(&pagelist, new_vma_page, | 1178 | nr_failed = migrate_pages(&pagelist, new_vma_page, |
1179 | (unsigned long)vma, | 1179 | (unsigned long)vma, |
1180 | false, true); | 1180 | false, MIGRATE_SYNC); |
1181 | if (nr_failed) | 1181 | if (nr_failed) |
1182 | putback_lru_pages(&pagelist); | 1182 | putback_lru_pages(&pagelist); |
1183 | } | 1183 | } |
diff --git a/mm/nobootmem.c b/mm/nobootmem.c index d23415c001bc..405573010f99 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c | |||
@@ -105,27 +105,35 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end) | |||
105 | __free_pages_bootmem(pfn_to_page(i), 0); | 105 | __free_pages_bootmem(pfn_to_page(i), 0); |
106 | } | 106 | } |
107 | 107 | ||
108 | static unsigned long __init __free_memory_core(phys_addr_t start, | ||
109 | phys_addr_t end) | ||
110 | { | ||
111 | unsigned long start_pfn = PFN_UP(start); | ||
112 | unsigned long end_pfn = min_t(unsigned long, | ||
113 | PFN_DOWN(end), max_low_pfn); | ||
114 | |||
115 | if (start_pfn > end_pfn) | ||
116 | return 0; | ||
117 | |||
118 | __free_pages_memory(start_pfn, end_pfn); | ||
119 | |||
120 | return end_pfn - start_pfn; | ||
121 | } | ||
122 | |||
108 | unsigned long __init free_low_memory_core_early(int nodeid) | 123 | unsigned long __init free_low_memory_core_early(int nodeid) |
109 | { | 124 | { |
110 | unsigned long count = 0; | 125 | unsigned long count = 0; |
111 | phys_addr_t start, end; | 126 | phys_addr_t start, end, size; |
112 | u64 i; | 127 | u64 i; |
113 | 128 | ||
114 | /* free reserved array temporarily so that it's treated as free area */ | 129 | for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) |
115 | memblock_free_reserved_regions(); | 130 | count += __free_memory_core(start, end); |
116 | 131 | ||
117 | for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) { | 132 | /* free range that is used for reserved array if we allocate it */ |
118 | unsigned long start_pfn = PFN_UP(start); | 133 | size = get_allocated_memblock_reserved_regions_info(&start); |
119 | unsigned long end_pfn = min_t(unsigned long, | 134 | if (size) |
120 | PFN_DOWN(end), max_low_pfn); | 135 | count += __free_memory_core(start, start + size); |
121 | if (start_pfn < end_pfn) { | ||
122 | __free_pages_memory(start_pfn, end_pfn); | ||
123 | count += end_pfn - start_pfn; | ||
124 | } | ||
125 | } | ||
126 | 136 | ||
127 | /* put region array back? */ | ||
128 | memblock_reserve_reserved_regions(); | ||
129 | return count; | 137 | return count; |
130 | } | 138 | } |
131 | 139 | ||
@@ -274,7 +282,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, | |||
274 | return ___alloc_bootmem(size, align, goal, limit); | 282 | return ___alloc_bootmem(size, align, goal, limit); |
275 | } | 283 | } |
276 | 284 | ||
277 | static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, | 285 | void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, |
278 | unsigned long size, | 286 | unsigned long size, |
279 | unsigned long align, | 287 | unsigned long align, |
280 | unsigned long goal, | 288 | unsigned long goal, |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 416637f0e924..ac300c99baf6 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -184,6 +184,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, | |||
184 | const nodemask_t *nodemask, unsigned long totalpages) | 184 | const nodemask_t *nodemask, unsigned long totalpages) |
185 | { | 185 | { |
186 | long points; | 186 | long points; |
187 | long adj; | ||
187 | 188 | ||
188 | if (oom_unkillable_task(p, memcg, nodemask)) | 189 | if (oom_unkillable_task(p, memcg, nodemask)) |
189 | return 0; | 190 | return 0; |
@@ -192,7 +193,8 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, | |||
192 | if (!p) | 193 | if (!p) |
193 | return 0; | 194 | return 0; |
194 | 195 | ||
195 | if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { | 196 | adj = p->signal->oom_score_adj; |
197 | if (adj == OOM_SCORE_ADJ_MIN) { | ||
196 | task_unlock(p); | 198 | task_unlock(p); |
197 | return 0; | 199 | return 0; |
198 | } | 200 | } |
@@ -210,14 +212,11 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, | |||
210 | * implementation used by LSMs. | 212 | * implementation used by LSMs. |
211 | */ | 213 | */ |
212 | if (has_capability_noaudit(p, CAP_SYS_ADMIN)) | 214 | if (has_capability_noaudit(p, CAP_SYS_ADMIN)) |
213 | points -= 30 * totalpages / 1000; | 215 | adj -= 30; |
214 | 216 | ||
215 | /* | 217 | /* Normalize to oom_score_adj units */ |
216 | * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may | 218 | adj *= totalpages / 1000; |
217 | * either completely disable oom killing or always prefer a certain | 219 | points += adj; |
218 | * task. | ||
219 | */ | ||
220 | points += p->signal->oom_score_adj * totalpages / 1000; | ||
221 | 220 | ||
222 | /* | 221 | /* |
223 | * Never return 0 for an eligible task regardless of the root bonus and | 222 | * Never return 0 for an eligible task regardless of the root bonus and |
@@ -366,7 +365,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, | |||
366 | 365 | ||
367 | /** | 366 | /** |
368 | * dump_tasks - dump current memory state of all system tasks | 367 | * dump_tasks - dump current memory state of all system tasks |
369 | * @mem: current's memory controller, if constrained | 368 | * @memcg: current's memory controller, if constrained |
370 | * @nodemask: nodemask passed to page allocator for mempolicy ooms | 369 | * @nodemask: nodemask passed to page allocator for mempolicy ooms |
371 | * | 370 | * |
372 | * Dumps the current memory state of all eligible tasks. Tasks not in the same | 371 | * Dumps the current memory state of all eligible tasks. Tasks not in the same |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 44030096da63..4a4f9219683f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -5635,7 +5635,12 @@ static struct page * | |||
5635 | __alloc_contig_migrate_alloc(struct page *page, unsigned long private, | 5635 | __alloc_contig_migrate_alloc(struct page *page, unsigned long private, |
5636 | int **resultp) | 5636 | int **resultp) |
5637 | { | 5637 | { |
5638 | return alloc_page(GFP_HIGHUSER_MOVABLE); | 5638 | gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; |
5639 | |||
5640 | if (PageHighMem(page)) | ||
5641 | gfp_mask |= __GFP_HIGHMEM; | ||
5642 | |||
5643 | return alloc_page(gfp_mask); | ||
5639 | } | 5644 | } |
5640 | 5645 | ||
5641 | /* [start, end) must belong to a single zone. */ | 5646 | /* [start, end) must belong to a single zone. */ |
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 1ccbd714059c..eb750f851395 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -392,7 +392,7 @@ static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent, | |||
392 | 392 | ||
393 | /** | 393 | /** |
394 | * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry. | 394 | * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry. |
395 | * @end: swap entry to be cmpxchged | 395 | * @ent: swap entry to be cmpxchged |
396 | * @old: old id | 396 | * @old: old id |
397 | * @new: new id | 397 | * @new: new id |
398 | * | 398 | * |
@@ -422,7 +422,7 @@ unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, | |||
422 | /** | 422 | /** |
423 | * swap_cgroup_record - record mem_cgroup for this swp_entry. | 423 | * swap_cgroup_record - record mem_cgroup for this swp_entry. |
424 | * @ent: swap entry to be recorded into | 424 | * @ent: swap entry to be recorded into |
425 | * @mem: mem_cgroup to be recorded | 425 | * @id: mem_cgroup to be recorded |
426 | * | 426 | * |
427 | * Returns old value at success, 0 at failure. | 427 | * Returns old value at success, 0 at failure. |
428 | * (Of course, old value can be 0.) | 428 | * (Of course, old value can be 0.) |
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index aa9701e12714..6c118d012bb5 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
@@ -162,7 +162,6 @@ static int walk_hugetlb_range(struct vm_area_struct *vma, | |||
162 | 162 | ||
163 | /** | 163 | /** |
164 | * walk_page_range - walk a memory map's page tables with a callback | 164 | * walk_page_range - walk a memory map's page tables with a callback |
165 | * @mm: memory map to walk | ||
166 | * @addr: starting address | 165 | * @addr: starting address |
167 | * @end: ending address | 166 | * @end: ending address |
168 | * @walk: set of callbacks to invoke for each level of the tree | 167 | * @walk: set of callbacks to invoke for each level of the tree |
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 405d331804c3..3707c71ae4cd 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c | |||
@@ -360,7 +360,6 @@ err_free: | |||
360 | * @chunk: chunk to depopulate | 360 | * @chunk: chunk to depopulate |
361 | * @off: offset to the area to depopulate | 361 | * @off: offset to the area to depopulate |
362 | * @size: size of the area to depopulate in bytes | 362 | * @size: size of the area to depopulate in bytes |
363 | * @flush: whether to flush cache and tlb or not | ||
364 | * | 363 | * |
365 | * For each cpu, depopulate and unmap pages [@page_start,@page_end) | 364 | * For each cpu, depopulate and unmap pages [@page_start,@page_end) |
366 | * from @chunk. If @flush is true, vcache is flushed before unmapping | 365 | * from @chunk. If @flush is true, vcache is flushed before unmapping |
diff --git a/mm/shmem.c b/mm/shmem.c index a15a466d0d1d..bd106361be4b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -264,46 +264,55 @@ static int shmem_radix_tree_replace(struct address_space *mapping, | |||
264 | } | 264 | } |
265 | 265 | ||
266 | /* | 266 | /* |
267 | * Sometimes, before we decide whether to proceed or to fail, we must check | ||
268 | * that an entry was not already brought back from swap by a racing thread. | ||
269 | * | ||
270 | * Checking page is not enough: by the time a SwapCache page is locked, it | ||
271 | * might be reused, and again be SwapCache, using the same swap as before. | ||
272 | */ | ||
273 | static bool shmem_confirm_swap(struct address_space *mapping, | ||
274 | pgoff_t index, swp_entry_t swap) | ||
275 | { | ||
276 | void *item; | ||
277 | |||
278 | rcu_read_lock(); | ||
279 | item = radix_tree_lookup(&mapping->page_tree, index); | ||
280 | rcu_read_unlock(); | ||
281 | return item == swp_to_radix_entry(swap); | ||
282 | } | ||
283 | |||
284 | /* | ||
267 | * Like add_to_page_cache_locked, but error if expected item has gone. | 285 | * Like add_to_page_cache_locked, but error if expected item has gone. |
268 | */ | 286 | */ |
269 | static int shmem_add_to_page_cache(struct page *page, | 287 | static int shmem_add_to_page_cache(struct page *page, |
270 | struct address_space *mapping, | 288 | struct address_space *mapping, |
271 | pgoff_t index, gfp_t gfp, void *expected) | 289 | pgoff_t index, gfp_t gfp, void *expected) |
272 | { | 290 | { |
273 | int error = 0; | 291 | int error; |
274 | 292 | ||
275 | VM_BUG_ON(!PageLocked(page)); | 293 | VM_BUG_ON(!PageLocked(page)); |
276 | VM_BUG_ON(!PageSwapBacked(page)); | 294 | VM_BUG_ON(!PageSwapBacked(page)); |
277 | 295 | ||
296 | page_cache_get(page); | ||
297 | page->mapping = mapping; | ||
298 | page->index = index; | ||
299 | |||
300 | spin_lock_irq(&mapping->tree_lock); | ||
278 | if (!expected) | 301 | if (!expected) |
279 | error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); | 302 | error = radix_tree_insert(&mapping->page_tree, index, page); |
303 | else | ||
304 | error = shmem_radix_tree_replace(mapping, index, expected, | ||
305 | page); | ||
280 | if (!error) { | 306 | if (!error) { |
281 | page_cache_get(page); | 307 | mapping->nrpages++; |
282 | page->mapping = mapping; | 308 | __inc_zone_page_state(page, NR_FILE_PAGES); |
283 | page->index = index; | 309 | __inc_zone_page_state(page, NR_SHMEM); |
284 | 310 | spin_unlock_irq(&mapping->tree_lock); | |
285 | spin_lock_irq(&mapping->tree_lock); | 311 | } else { |
286 | if (!expected) | 312 | page->mapping = NULL; |
287 | error = radix_tree_insert(&mapping->page_tree, | 313 | spin_unlock_irq(&mapping->tree_lock); |
288 | index, page); | 314 | page_cache_release(page); |
289 | else | ||
290 | error = shmem_radix_tree_replace(mapping, index, | ||
291 | expected, page); | ||
292 | if (!error) { | ||
293 | mapping->nrpages++; | ||
294 | __inc_zone_page_state(page, NR_FILE_PAGES); | ||
295 | __inc_zone_page_state(page, NR_SHMEM); | ||
296 | spin_unlock_irq(&mapping->tree_lock); | ||
297 | } else { | ||
298 | page->mapping = NULL; | ||
299 | spin_unlock_irq(&mapping->tree_lock); | ||
300 | page_cache_release(page); | ||
301 | } | ||
302 | if (!expected) | ||
303 | radix_tree_preload_end(); | ||
304 | } | 315 | } |
305 | if (error) | ||
306 | mem_cgroup_uncharge_cache_page(page); | ||
307 | return error; | 316 | return error; |
308 | } | 317 | } |
309 | 318 | ||
@@ -1124,9 +1133,9 @@ repeat: | |||
1124 | /* We have to do this with page locked to prevent races */ | 1133 | /* We have to do this with page locked to prevent races */ |
1125 | lock_page(page); | 1134 | lock_page(page); |
1126 | if (!PageSwapCache(page) || page_private(page) != swap.val || | 1135 | if (!PageSwapCache(page) || page_private(page) != swap.val || |
1127 | page->mapping) { | 1136 | !shmem_confirm_swap(mapping, index, swap)) { |
1128 | error = -EEXIST; /* try again */ | 1137 | error = -EEXIST; /* try again */ |
1129 | goto failed; | 1138 | goto unlock; |
1130 | } | 1139 | } |
1131 | if (!PageUptodate(page)) { | 1140 | if (!PageUptodate(page)) { |
1132 | error = -EIO; | 1141 | error = -EIO; |
@@ -1142,9 +1151,12 @@ repeat: | |||
1142 | 1151 | ||
1143 | error = mem_cgroup_cache_charge(page, current->mm, | 1152 | error = mem_cgroup_cache_charge(page, current->mm, |
1144 | gfp & GFP_RECLAIM_MASK); | 1153 | gfp & GFP_RECLAIM_MASK); |
1145 | if (!error) | 1154 | if (!error) { |
1146 | error = shmem_add_to_page_cache(page, mapping, index, | 1155 | error = shmem_add_to_page_cache(page, mapping, index, |
1147 | gfp, swp_to_radix_entry(swap)); | 1156 | gfp, swp_to_radix_entry(swap)); |
1157 | /* We already confirmed swap, and make no allocation */ | ||
1158 | VM_BUG_ON(error); | ||
1159 | } | ||
1148 | if (error) | 1160 | if (error) |
1149 | goto failed; | 1161 | goto failed; |
1150 | 1162 | ||
@@ -1181,11 +1193,18 @@ repeat: | |||
1181 | __set_page_locked(page); | 1193 | __set_page_locked(page); |
1182 | error = mem_cgroup_cache_charge(page, current->mm, | 1194 | error = mem_cgroup_cache_charge(page, current->mm, |
1183 | gfp & GFP_RECLAIM_MASK); | 1195 | gfp & GFP_RECLAIM_MASK); |
1184 | if (!error) | ||
1185 | error = shmem_add_to_page_cache(page, mapping, index, | ||
1186 | gfp, NULL); | ||
1187 | if (error) | 1196 | if (error) |
1188 | goto decused; | 1197 | goto decused; |
1198 | error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); | ||
1199 | if (!error) { | ||
1200 | error = shmem_add_to_page_cache(page, mapping, index, | ||
1201 | gfp, NULL); | ||
1202 | radix_tree_preload_end(); | ||
1203 | } | ||
1204 | if (error) { | ||
1205 | mem_cgroup_uncharge_cache_page(page); | ||
1206 | goto decused; | ||
1207 | } | ||
1189 | lru_cache_add_anon(page); | 1208 | lru_cache_add_anon(page); |
1190 | 1209 | ||
1191 | spin_lock(&info->lock); | 1210 | spin_lock(&info->lock); |
@@ -1245,14 +1264,10 @@ decused: | |||
1245 | unacct: | 1264 | unacct: |
1246 | shmem_unacct_blocks(info->flags, 1); | 1265 | shmem_unacct_blocks(info->flags, 1); |
1247 | failed: | 1266 | failed: |
1248 | if (swap.val && error != -EINVAL) { | 1267 | if (swap.val && error != -EINVAL && |
1249 | struct page *test = find_get_page(mapping, index); | 1268 | !shmem_confirm_swap(mapping, index, swap)) |
1250 | if (test && !radix_tree_exceptional_entry(test)) | 1269 | error = -EEXIST; |
1251 | page_cache_release(test); | 1270 | unlock: |
1252 | /* Have another try if the entry has changed */ | ||
1253 | if (test != swp_to_radix_entry(swap)) | ||
1254 | error = -EEXIST; | ||
1255 | } | ||
1256 | if (page) { | 1271 | if (page) { |
1257 | unlock_page(page); | 1272 | unlock_page(page); |
1258 | page_cache_release(page); | 1273 | page_cache_release(page); |
@@ -1264,7 +1279,7 @@ failed: | |||
1264 | spin_unlock(&info->lock); | 1279 | spin_unlock(&info->lock); |
1265 | goto repeat; | 1280 | goto repeat; |
1266 | } | 1281 | } |
1267 | if (error == -EEXIST) | 1282 | if (error == -EEXIST) /* from above or from radix_tree_insert */ |
1268 | goto repeat; | 1283 | goto repeat; |
1269 | return error; | 1284 | return error; |
1270 | } | 1285 | } |
@@ -1594,6 +1609,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, | |||
1594 | struct splice_pipe_desc spd = { | 1609 | struct splice_pipe_desc spd = { |
1595 | .pages = pages, | 1610 | .pages = pages, |
1596 | .partial = partial, | 1611 | .partial = partial, |
1612 | .nr_pages_max = PIPE_DEF_BUFFERS, | ||
1597 | .flags = flags, | 1613 | .flags = flags, |
1598 | .ops = &page_cache_pipe_buf_ops, | 1614 | .ops = &page_cache_pipe_buf_ops, |
1599 | .spd_release = spd_release_page, | 1615 | .spd_release = spd_release_page, |
@@ -1682,7 +1698,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, | |||
1682 | if (spd.nr_pages) | 1698 | if (spd.nr_pages) |
1683 | error = splice_to_pipe(pipe, &spd); | 1699 | error = splice_to_pipe(pipe, &spd); |
1684 | 1700 | ||
1685 | splice_shrink_spd(pipe, &spd); | 1701 | splice_shrink_spd(&spd); |
1686 | 1702 | ||
1687 | if (error > 0) { | 1703 | if (error > 0) { |
1688 | *ppos += error; | 1704 | *ppos += error; |
@@ -1691,98 +1707,6 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, | |||
1691 | return error; | 1707 | return error; |
1692 | } | 1708 | } |
1693 | 1709 | ||
1694 | /* | ||
1695 | * llseek SEEK_DATA or SEEK_HOLE through the radix_tree. | ||
1696 | */ | ||
1697 | static pgoff_t shmem_seek_hole_data(struct address_space *mapping, | ||
1698 | pgoff_t index, pgoff_t end, int origin) | ||
1699 | { | ||
1700 | struct page *page; | ||
1701 | struct pagevec pvec; | ||
1702 | pgoff_t indices[PAGEVEC_SIZE]; | ||
1703 | bool done = false; | ||
1704 | int i; | ||
1705 | |||
1706 | pagevec_init(&pvec, 0); | ||
1707 | pvec.nr = 1; /* start small: we may be there already */ | ||
1708 | while (!done) { | ||
1709 | pvec.nr = shmem_find_get_pages_and_swap(mapping, index, | ||
1710 | pvec.nr, pvec.pages, indices); | ||
1711 | if (!pvec.nr) { | ||
1712 | if (origin == SEEK_DATA) | ||
1713 | index = end; | ||
1714 | break; | ||
1715 | } | ||
1716 | for (i = 0; i < pvec.nr; i++, index++) { | ||
1717 | if (index < indices[i]) { | ||
1718 | if (origin == SEEK_HOLE) { | ||
1719 | done = true; | ||
1720 | break; | ||
1721 | } | ||
1722 | index = indices[i]; | ||
1723 | } | ||
1724 | page = pvec.pages[i]; | ||
1725 | if (page && !radix_tree_exceptional_entry(page)) { | ||
1726 | if (!PageUptodate(page)) | ||
1727 | page = NULL; | ||
1728 | } | ||
1729 | if (index >= end || | ||
1730 | (page && origin == SEEK_DATA) || | ||
1731 | (!page && origin == SEEK_HOLE)) { | ||
1732 | done = true; | ||
1733 | break; | ||
1734 | } | ||
1735 | } | ||
1736 | shmem_deswap_pagevec(&pvec); | ||
1737 | pagevec_release(&pvec); | ||
1738 | pvec.nr = PAGEVEC_SIZE; | ||
1739 | cond_resched(); | ||
1740 | } | ||
1741 | return index; | ||
1742 | } | ||
1743 | |||
1744 | static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin) | ||
1745 | { | ||
1746 | struct address_space *mapping; | ||
1747 | struct inode *inode; | ||
1748 | pgoff_t start, end; | ||
1749 | loff_t new_offset; | ||
1750 | |||
1751 | if (origin != SEEK_DATA && origin != SEEK_HOLE) | ||
1752 | return generic_file_llseek_size(file, offset, origin, | ||
1753 | MAX_LFS_FILESIZE); | ||
1754 | mapping = file->f_mapping; | ||
1755 | inode = mapping->host; | ||
1756 | mutex_lock(&inode->i_mutex); | ||
1757 | /* We're holding i_mutex so we can access i_size directly */ | ||
1758 | |||
1759 | if (offset < 0) | ||
1760 | offset = -EINVAL; | ||
1761 | else if (offset >= inode->i_size) | ||
1762 | offset = -ENXIO; | ||
1763 | else { | ||
1764 | start = offset >> PAGE_CACHE_SHIFT; | ||
1765 | end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
1766 | new_offset = shmem_seek_hole_data(mapping, start, end, origin); | ||
1767 | new_offset <<= PAGE_CACHE_SHIFT; | ||
1768 | if (new_offset > offset) { | ||
1769 | if (new_offset < inode->i_size) | ||
1770 | offset = new_offset; | ||
1771 | else if (origin == SEEK_DATA) | ||
1772 | offset = -ENXIO; | ||
1773 | else | ||
1774 | offset = inode->i_size; | ||
1775 | } | ||
1776 | } | ||
1777 | |||
1778 | if (offset >= 0 && offset != file->f_pos) { | ||
1779 | file->f_pos = offset; | ||
1780 | file->f_version = 0; | ||
1781 | } | ||
1782 | mutex_unlock(&inode->i_mutex); | ||
1783 | return offset; | ||
1784 | } | ||
1785 | |||
1786 | static long shmem_fallocate(struct file *file, int mode, loff_t offset, | 1710 | static long shmem_fallocate(struct file *file, int mode, loff_t offset, |
1787 | loff_t len) | 1711 | loff_t len) |
1788 | { | 1712 | { |
@@ -2786,7 +2710,7 @@ static const struct address_space_operations shmem_aops = { | |||
2786 | static const struct file_operations shmem_file_operations = { | 2710 | static const struct file_operations shmem_file_operations = { |
2787 | .mmap = shmem_mmap, | 2711 | .mmap = shmem_mmap, |
2788 | #ifdef CONFIG_TMPFS | 2712 | #ifdef CONFIG_TMPFS |
2789 | .llseek = shmem_file_llseek, | 2713 | .llseek = generic_file_llseek, |
2790 | .read = do_sync_read, | 2714 | .read = do_sync_read, |
2791 | .write = do_sync_write, | 2715 | .write = do_sync_write, |
2792 | .aio_read = shmem_file_aio_read, | 2716 | .aio_read = shmem_file_aio_read, |
diff --git a/mm/sparse.c b/mm/sparse.c index 6a4bf9160e85..c7bb952400c8 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -275,8 +275,9 @@ static unsigned long * __init | |||
275 | sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, | 275 | sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, |
276 | unsigned long size) | 276 | unsigned long size) |
277 | { | 277 | { |
278 | pg_data_t *host_pgdat; | 278 | unsigned long goal, limit; |
279 | unsigned long goal; | 279 | unsigned long *p; |
280 | int nid; | ||
280 | /* | 281 | /* |
281 | * A page may contain usemaps for other sections preventing the | 282 | * A page may contain usemaps for other sections preventing the |
282 | * page being freed and making a section unremovable while | 283 | * page being freed and making a section unremovable while |
@@ -287,10 +288,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, | |||
287 | * from the same section as the pgdat where possible to avoid | 288 | * from the same section as the pgdat where possible to avoid |
288 | * this problem. | 289 | * this problem. |
289 | */ | 290 | */ |
290 | goal = __pa(pgdat) & PAGE_SECTION_MASK; | 291 | goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); |
291 | host_pgdat = NODE_DATA(early_pfn_to_nid(goal >> PAGE_SHIFT)); | 292 | limit = goal + (1UL << PA_SECTION_SHIFT); |
292 | return __alloc_bootmem_node_nopanic(host_pgdat, size, | 293 | nid = early_pfn_to_nid(goal >> PAGE_SHIFT); |
293 | SMP_CACHE_BYTES, goal); | 294 | again: |
295 | p = ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, | ||
296 | SMP_CACHE_BYTES, goal, limit); | ||
297 | if (!p && limit) { | ||
298 | limit = 0; | ||
299 | goto again; | ||
300 | } | ||
301 | return p; | ||
294 | } | 302 | } |
295 | 303 | ||
296 | static void __init check_usemap_section_nr(int nid, unsigned long *usemap) | 304 | static void __init check_usemap_section_nr(int nid, unsigned long *usemap) |
diff --git a/mm/swapfile.c b/mm/swapfile.c index de5bc51c4a66..71373d03fcee 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -1916,24 +1916,20 @@ static unsigned long read_swap_header(struct swap_info_struct *p, | |||
1916 | 1916 | ||
1917 | /* | 1917 | /* |
1918 | * Find out how many pages are allowed for a single swap | 1918 | * Find out how many pages are allowed for a single swap |
1919 | * device. There are three limiting factors: 1) the number | 1919 | * device. There are two limiting factors: 1) the number |
1920 | * of bits for the swap offset in the swp_entry_t type, and | 1920 | * of bits for the swap offset in the swp_entry_t type, and |
1921 | * 2) the number of bits in the swap pte as defined by the | 1921 | * 2) the number of bits in the swap pte as defined by the |
1922 | * the different architectures, and 3) the number of free bits | 1922 | * different architectures. In order to find the |
1923 | * in an exceptional radix_tree entry. In order to find the | ||
1924 | * largest possible bit mask, a swap entry with swap type 0 | 1923 | * largest possible bit mask, a swap entry with swap type 0 |
1925 | * and swap offset ~0UL is created, encoded to a swap pte, | 1924 | * and swap offset ~0UL is created, encoded to a swap pte, |
1926 | * decoded to a swp_entry_t again, and finally the swap | 1925 | * decoded to a swp_entry_t again, and finally the swap |
1927 | * offset is extracted. This will mask all the bits from | 1926 | * offset is extracted. This will mask all the bits from |
1928 | * the initial ~0UL mask that can't be encoded in either | 1927 | * the initial ~0UL mask that can't be encoded in either |
1929 | * the swp_entry_t or the architecture definition of a | 1928 | * the swp_entry_t or the architecture definition of a |
1930 | * swap pte. Then the same is done for a radix_tree entry. | 1929 | * swap pte. |
1931 | */ | 1930 | */ |
1932 | maxpages = swp_offset(pte_to_swp_entry( | 1931 | maxpages = swp_offset(pte_to_swp_entry( |
1933 | swp_entry_to_pte(swp_entry(0, ~0UL)))); | 1932 | swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; |
1934 | maxpages = swp_offset(radix_to_swp_entry( | ||
1935 | swp_to_radix_entry(swp_entry(0, maxpages)))) + 1; | ||
1936 | |||
1937 | if (maxpages > swap_header->info.last_page) { | 1933 | if (maxpages > swap_header->info.last_page) { |
1938 | maxpages = swap_header->info.last_page + 1; | 1934 | maxpages = swap_header->info.last_page + 1; |
1939 | /* p->max is an unsigned int: don't overflow it */ | 1935 | /* p->max is an unsigned int: don't overflow it */ |
diff --git a/mm/vmscan.c b/mm/vmscan.c index eeb3bc9d1d36..66e431060c05 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -2688,7 +2688,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx) | |||
2688 | * them before going back to sleep. | 2688 | * them before going back to sleep. |
2689 | */ | 2689 | */ |
2690 | set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); | 2690 | set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); |
2691 | schedule(); | 2691 | |
2692 | if (!kthread_should_stop()) | ||
2693 | schedule(); | ||
2694 | |||
2692 | set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold); | 2695 | set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold); |
2693 | } else { | 2696 | } else { |
2694 | if (remaining) | 2697 | if (remaining) |
@@ -2955,14 +2958,17 @@ int kswapd_run(int nid) | |||
2955 | } | 2958 | } |
2956 | 2959 | ||
2957 | /* | 2960 | /* |
2958 | * Called by memory hotplug when all memory in a node is offlined. | 2961 | * Called by memory hotplug when all memory in a node is offlined. Caller must |
2962 | * hold lock_memory_hotplug(). | ||
2959 | */ | 2963 | */ |
2960 | void kswapd_stop(int nid) | 2964 | void kswapd_stop(int nid) |
2961 | { | 2965 | { |
2962 | struct task_struct *kswapd = NODE_DATA(nid)->kswapd; | 2966 | struct task_struct *kswapd = NODE_DATA(nid)->kswapd; |
2963 | 2967 | ||
2964 | if (kswapd) | 2968 | if (kswapd) { |
2965 | kthread_stop(kswapd); | 2969 | kthread_stop(kswapd); |
2970 | NODE_DATA(nid)->kswapd = NULL; | ||
2971 | } | ||
2966 | } | 2972 | } |
2967 | 2973 | ||
2968 | static int __init kswapd_init(void) | 2974 | static int __init kswapd_init(void) |