aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/bootmem.c37
-rw-r--r--mm/filemap_xip.c65
-rw-r--r--mm/mm_init.c2
-rw-r--r--mm/oom_kill.c6
-rw-r--r--mm/rmap.c39
-rw-r--r--mm/swap_state.c2
6 files changed, 111 insertions, 40 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 4af15d0340ad..ad8eec6e44a8 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -405,6 +405,29 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size,
405} 405}
406#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ 406#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
407 407
408static unsigned long align_idx(struct bootmem_data *bdata, unsigned long idx,
409 unsigned long step)
410{
411 unsigned long base = bdata->node_min_pfn;
412
413 /*
414 * Align the index with respect to the node start so that the
415 * combination of both satisfies the requested alignment.
416 */
417
418 return ALIGN(base + idx, step) - base;
419}
420
421static unsigned long align_off(struct bootmem_data *bdata, unsigned long off,
422 unsigned long align)
423{
424 unsigned long base = PFN_PHYS(bdata->node_min_pfn);
425
426 /* Same as align_idx for byte offsets */
427
428 return ALIGN(base + off, align) - base;
429}
430
408static void * __init alloc_bootmem_core(struct bootmem_data *bdata, 431static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
409 unsigned long size, unsigned long align, 432 unsigned long size, unsigned long align,
410 unsigned long goal, unsigned long limit) 433 unsigned long goal, unsigned long limit)
@@ -441,7 +464,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
441 else 464 else
442 start = ALIGN(min, step); 465 start = ALIGN(min, step);
443 466
444 sidx = start - bdata->node_min_pfn;; 467 sidx = start - bdata->node_min_pfn;
445 midx = max - bdata->node_min_pfn; 468 midx = max - bdata->node_min_pfn;
446 469
447 if (bdata->hint_idx > sidx) { 470 if (bdata->hint_idx > sidx) {
@@ -450,7 +473,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
450 * catch the fallback below. 473 * catch the fallback below.
451 */ 474 */
452 fallback = sidx + 1; 475 fallback = sidx + 1;
453 sidx = ALIGN(bdata->hint_idx, step); 476 sidx = align_idx(bdata, bdata->hint_idx, step);
454 } 477 }
455 478
456 while (1) { 479 while (1) {
@@ -459,7 +482,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
459 unsigned long eidx, i, start_off, end_off; 482 unsigned long eidx, i, start_off, end_off;
460find_block: 483find_block:
461 sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx); 484 sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);
462 sidx = ALIGN(sidx, step); 485 sidx = align_idx(bdata, sidx, step);
463 eidx = sidx + PFN_UP(size); 486 eidx = sidx + PFN_UP(size);
464 487
465 if (sidx >= midx || eidx > midx) 488 if (sidx >= midx || eidx > midx)
@@ -467,15 +490,15 @@ find_block:
467 490
468 for (i = sidx; i < eidx; i++) 491 for (i = sidx; i < eidx; i++)
469 if (test_bit(i, bdata->node_bootmem_map)) { 492 if (test_bit(i, bdata->node_bootmem_map)) {
470 sidx = ALIGN(i, step); 493 sidx = align_idx(bdata, i, step);
471 if (sidx == i) 494 if (sidx == i)
472 sidx += step; 495 sidx += step;
473 goto find_block; 496 goto find_block;
474 } 497 }
475 498
476 if (bdata->last_end_off && 499 if (bdata->last_end_off & (PAGE_SIZE - 1) &&
477 PFN_DOWN(bdata->last_end_off) + 1 == sidx) 500 PFN_DOWN(bdata->last_end_off) + 1 == sidx)
478 start_off = ALIGN(bdata->last_end_off, align); 501 start_off = align_off(bdata, bdata->last_end_off, align);
479 else 502 else
480 start_off = PFN_PHYS(sidx); 503 start_off = PFN_PHYS(sidx);
481 504
@@ -499,7 +522,7 @@ find_block:
499 } 522 }
500 523
501 if (fallback) { 524 if (fallback) {
502 sidx = ALIGN(fallback - 1, step); 525 sidx = align_idx(bdata, fallback - 1, step);
503 fallback = 0; 526 fallback = 0;
504 goto find_block; 527 goto find_block;
505 } 528 }
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 380ab402d711..b5167dfb2f2d 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -15,6 +15,8 @@
15#include <linux/rmap.h> 15#include <linux/rmap.h>
16#include <linux/mmu_notifier.h> 16#include <linux/mmu_notifier.h>
17#include <linux/sched.h> 17#include <linux/sched.h>
18#include <linux/seqlock.h>
19#include <linux/mutex.h>
18#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
19#include <asm/io.h> 21#include <asm/io.h>
20 22
@@ -22,22 +24,18 @@
22 * We do use our own empty page to avoid interference with other users 24 * We do use our own empty page to avoid interference with other users
23 * of ZERO_PAGE(), such as /dev/zero 25 * of ZERO_PAGE(), such as /dev/zero
24 */ 26 */
27static DEFINE_MUTEX(xip_sparse_mutex);
28static seqcount_t xip_sparse_seq = SEQCNT_ZERO;
25static struct page *__xip_sparse_page; 29static struct page *__xip_sparse_page;
26 30
31/* called under xip_sparse_mutex */
27static struct page *xip_sparse_page(void) 32static struct page *xip_sparse_page(void)
28{ 33{
29 if (!__xip_sparse_page) { 34 if (!__xip_sparse_page) {
30 struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); 35 struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
31 36
32 if (page) { 37 if (page)
33 static DEFINE_SPINLOCK(xip_alloc_lock); 38 __xip_sparse_page = page;
34 spin_lock(&xip_alloc_lock);
35 if (!__xip_sparse_page)
36 __xip_sparse_page = page;
37 else
38 __free_page(page);
39 spin_unlock(&xip_alloc_lock);
40 }
41 } 39 }
42 return __xip_sparse_page; 40 return __xip_sparse_page;
43} 41}
@@ -174,18 +172,23 @@ __xip_unmap (struct address_space * mapping,
174 pte_t pteval; 172 pte_t pteval;
175 spinlock_t *ptl; 173 spinlock_t *ptl;
176 struct page *page; 174 struct page *page;
175 unsigned count;
176 int locked = 0;
177
178 count = read_seqcount_begin(&xip_sparse_seq);
177 179
178 page = __xip_sparse_page; 180 page = __xip_sparse_page;
179 if (!page) 181 if (!page)
180 return; 182 return;
181 183
184retry:
182 spin_lock(&mapping->i_mmap_lock); 185 spin_lock(&mapping->i_mmap_lock);
183 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 186 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
184 mm = vma->vm_mm; 187 mm = vma->vm_mm;
185 address = vma->vm_start + 188 address = vma->vm_start +
186 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); 189 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
187 BUG_ON(address < vma->vm_start || address >= vma->vm_end); 190 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
188 pte = page_check_address(page, mm, address, &ptl); 191 pte = page_check_address(page, mm, address, &ptl, 1);
189 if (pte) { 192 if (pte) {
190 /* Nuke the page table entry. */ 193 /* Nuke the page table entry. */
191 flush_cache_page(vma, address, pte_pfn(*pte)); 194 flush_cache_page(vma, address, pte_pfn(*pte));
@@ -198,6 +201,14 @@ __xip_unmap (struct address_space * mapping,
198 } 201 }
199 } 202 }
200 spin_unlock(&mapping->i_mmap_lock); 203 spin_unlock(&mapping->i_mmap_lock);
204
205 if (locked) {
206 mutex_unlock(&xip_sparse_mutex);
207 } else if (read_seqcount_retry(&xip_sparse_seq, count)) {
208 mutex_lock(&xip_sparse_mutex);
209 locked = 1;
210 goto retry;
211 }
201} 212}
202 213
203/* 214/*
@@ -218,7 +229,7 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
218 int error; 229 int error;
219 230
220 /* XXX: are VM_FAULT_ codes OK? */ 231 /* XXX: are VM_FAULT_ codes OK? */
221 232again:
222 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 233 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
223 if (vmf->pgoff >= size) 234 if (vmf->pgoff >= size)
224 return VM_FAULT_SIGBUS; 235 return VM_FAULT_SIGBUS;
@@ -237,8 +248,10 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
237 int err; 248 int err;
238 249
239 /* maybe shared writable, allocate new block */ 250 /* maybe shared writable, allocate new block */
251 mutex_lock(&xip_sparse_mutex);
240 error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1, 252 error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1,
241 &xip_mem, &xip_pfn); 253 &xip_mem, &xip_pfn);
254 mutex_unlock(&xip_sparse_mutex);
242 if (error) 255 if (error)
243 return VM_FAULT_SIGBUS; 256 return VM_FAULT_SIGBUS;
244 /* unmap sparse mappings at pgoff from all other vmas */ 257 /* unmap sparse mappings at pgoff from all other vmas */
@@ -252,14 +265,34 @@ found:
252 BUG_ON(err); 265 BUG_ON(err);
253 return VM_FAULT_NOPAGE; 266 return VM_FAULT_NOPAGE;
254 } else { 267 } else {
268 int err, ret = VM_FAULT_OOM;
269
270 mutex_lock(&xip_sparse_mutex);
271 write_seqcount_begin(&xip_sparse_seq);
272 error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0,
273 &xip_mem, &xip_pfn);
274 if (unlikely(!error)) {
275 write_seqcount_end(&xip_sparse_seq);
276 mutex_unlock(&xip_sparse_mutex);
277 goto again;
278 }
279 if (error != -ENODATA)
280 goto out;
255 /* not shared and writable, use xip_sparse_page() */ 281 /* not shared and writable, use xip_sparse_page() */
256 page = xip_sparse_page(); 282 page = xip_sparse_page();
257 if (!page) 283 if (!page)
258 return VM_FAULT_OOM; 284 goto out;
285 err = vm_insert_page(vma, (unsigned long)vmf->virtual_address,
286 page);
287 if (err == -ENOMEM)
288 goto out;
259 289
260 page_cache_get(page); 290 ret = VM_FAULT_NOPAGE;
261 vmf->page = page; 291out:
262 return 0; 292 write_seqcount_end(&xip_sparse_seq);
293 mutex_unlock(&xip_sparse_mutex);
294
295 return ret;
263 } 296 }
264} 297}
265 298
@@ -308,8 +341,10 @@ __xip_file_write(struct file *filp, const char __user *buf,
308 &xip_mem, &xip_pfn); 341 &xip_mem, &xip_pfn);
309 if (status == -ENODATA) { 342 if (status == -ENODATA) {
310 /* we allocate a new page unmap it */ 343 /* we allocate a new page unmap it */
344 mutex_lock(&xip_sparse_mutex);
311 status = a_ops->get_xip_mem(mapping, index, 1, 345 status = a_ops->get_xip_mem(mapping, index, 1,
312 &xip_mem, &xip_pfn); 346 &xip_mem, &xip_pfn);
347 mutex_unlock(&xip_sparse_mutex);
313 if (!status) 348 if (!status)
314 /* unmap page at pgoff from all other vmas */ 349 /* unmap page at pgoff from all other vmas */
315 __xip_unmap(mapping, index); 350 __xip_unmap(mapping, index);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 936ef2efd892..4e0e26591dfa 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -12,7 +12,7 @@
12#include "internal.h" 12#include "internal.h"
13 13
14#ifdef CONFIG_DEBUG_MEMORY_INIT 14#ifdef CONFIG_DEBUG_MEMORY_INIT
15int __meminitdata mminit_loglevel; 15int mminit_loglevel;
16 16
17#ifndef SECTIONS_SHIFT 17#ifndef SECTIONS_SHIFT
18#define SECTIONS_SHIFT 0 18#define SECTIONS_SHIFT 0
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 8a5467ee6265..64e5b4bcd964 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -26,6 +26,7 @@
26#include <linux/module.h> 26#include <linux/module.h>
27#include <linux/notifier.h> 27#include <linux/notifier.h>
28#include <linux/memcontrol.h> 28#include <linux/memcontrol.h>
29#include <linux/security.h>
29 30
30int sysctl_panic_on_oom; 31int sysctl_panic_on_oom;
31int sysctl_oom_kill_allocating_task; 32int sysctl_oom_kill_allocating_task;
@@ -128,7 +129,8 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
128 * Superuser processes are usually more important, so we make it 129 * Superuser processes are usually more important, so we make it
129 * less likely that we kill those. 130 * less likely that we kill those.
130 */ 131 */
131 if (__capable(p, CAP_SYS_ADMIN) || __capable(p, CAP_SYS_RESOURCE)) 132 if (has_capability(p, CAP_SYS_ADMIN) ||
133 has_capability(p, CAP_SYS_RESOURCE))
132 points /= 4; 134 points /= 4;
133 135
134 /* 136 /*
@@ -137,7 +139,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
137 * tend to only have this flag set on applications they think 139 * tend to only have this flag set on applications they think
138 * of as important. 140 * of as important.
139 */ 141 */
140 if (__capable(p, CAP_SYS_RAWIO)) 142 if (has_capability(p, CAP_SYS_RAWIO))
141 points /= 4; 143 points /= 4;
142 144
143 /* 145 /*
diff --git a/mm/rmap.c b/mm/rmap.c
index 1ea4e6fcee77..0383acfcb068 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -224,10 +224,14 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
224/* 224/*
225 * Check that @page is mapped at @address into @mm. 225 * Check that @page is mapped at @address into @mm.
226 * 226 *
227 * If @sync is false, page_check_address may perform a racy check to avoid
228 * the page table lock when the pte is not present (helpful when reclaiming
229 * highly shared pages).
230 *
227 * On success returns with pte mapped and locked. 231 * On success returns with pte mapped and locked.
228 */ 232 */
229pte_t *page_check_address(struct page *page, struct mm_struct *mm, 233pte_t *page_check_address(struct page *page, struct mm_struct *mm,
230 unsigned long address, spinlock_t **ptlp) 234 unsigned long address, spinlock_t **ptlp, int sync)
231{ 235{
232 pgd_t *pgd; 236 pgd_t *pgd;
233 pud_t *pud; 237 pud_t *pud;
@@ -249,7 +253,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
249 253
250 pte = pte_offset_map(pmd, address); 254 pte = pte_offset_map(pmd, address);
251 /* Make a quick check before getting the lock */ 255 /* Make a quick check before getting the lock */
252 if (!pte_present(*pte)) { 256 if (!sync && !pte_present(*pte)) {
253 pte_unmap(pte); 257 pte_unmap(pte);
254 return NULL; 258 return NULL;
255 } 259 }
@@ -281,7 +285,7 @@ static int page_referenced_one(struct page *page,
281 if (address == -EFAULT) 285 if (address == -EFAULT)
282 goto out; 286 goto out;
283 287
284 pte = page_check_address(page, mm, address, &ptl); 288 pte = page_check_address(page, mm, address, &ptl, 0);
285 if (!pte) 289 if (!pte)
286 goto out; 290 goto out;
287 291
@@ -450,7 +454,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
450 if (address == -EFAULT) 454 if (address == -EFAULT)
451 goto out; 455 goto out;
452 456
453 pte = page_check_address(page, mm, address, &ptl); 457 pte = page_check_address(page, mm, address, &ptl, 1);
454 if (!pte) 458 if (!pte)
455 goto out; 459 goto out;
456 460
@@ -659,23 +663,30 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
659 } 663 }
660 664
661 /* 665 /*
662 * It would be tidy to reset the PageAnon mapping here, 666 * Now that the last pte has gone, s390 must transfer dirty
663 * but that might overwrite a racing page_add_anon_rmap 667 * flag from storage key to struct page. We can usually skip
664 * which increments mapcount after us but sets mapping 668 * this if the page is anon, so about to be freed; but perhaps
665 * before us: so leave the reset to free_hot_cold_page, 669 * not if it's in swapcache - there might be another pte slot
666 * and remember that it's only reliable while mapped. 670 * containing the swap entry, but page not yet written to swap.
667 * Leaving it set also helps swapoff to reinstate ptes
668 * faster for those pages still in swapcache.
669 */ 671 */
670 if ((!PageAnon(page) || PageSwapCache(page)) && 672 if ((!PageAnon(page) || PageSwapCache(page)) &&
671 page_test_dirty(page)) { 673 page_test_dirty(page)) {
672 page_clear_dirty(page); 674 page_clear_dirty(page);
673 set_page_dirty(page); 675 set_page_dirty(page);
674 } 676 }
675 mem_cgroup_uncharge_page(page);
676 677
678 mem_cgroup_uncharge_page(page);
677 __dec_zone_page_state(page, 679 __dec_zone_page_state(page,
678 PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); 680 PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
681 /*
682 * It would be tidy to reset the PageAnon mapping here,
683 * but that might overwrite a racing page_add_anon_rmap
684 * which increments mapcount after us but sets mapping
685 * before us: so leave the reset to free_hot_cold_page,
686 * and remember that it's only reliable while mapped.
687 * Leaving it set also helps swapoff to reinstate ptes
688 * faster for those pages still in swapcache.
689 */
679 } 690 }
680} 691}
681 692
@@ -697,7 +708,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
697 if (address == -EFAULT) 708 if (address == -EFAULT)
698 goto out; 709 goto out;
699 710
700 pte = page_check_address(page, mm, address, &ptl); 711 pte = page_check_address(page, mm, address, &ptl, 0);
701 if (!pte) 712 if (!pte)
702 goto out; 713 goto out;
703 714
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 167cf2dc8a03..797c3831cbec 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -60,7 +60,7 @@ void show_swap_cache_info(void)
60 printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", 60 printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n",
61 swap_cache_info.add_total, swap_cache_info.del_total, 61 swap_cache_info.add_total, swap_cache_info.del_total,
62 swap_cache_info.find_success, swap_cache_info.find_total); 62 swap_cache_info.find_success, swap_cache_info.find_total);
63 printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10)); 63 printk("Free swap = %ldkB\n", nr_swap_pages << (PAGE_SHIFT - 10));
64 printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); 64 printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));
65} 65}
66 66