aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorRobert Richter <robert.richter@amd.com>2008-09-24 05:25:31 -0400
committerRobert Richter <robert.richter@amd.com>2008-09-24 05:25:31 -0400
commitf78e80209cf143be49f268c340431ae9fa3abb74 (patch)
tree820fa64b688099dfdd93d27ba03252738ca5c7e2 /mm
parent4c168eaf7ea39f25a45a3d8c7eebc3fedb633a1d (diff)
parent24342c34a022ee90839873d91396045e12ef1090 (diff)
Merge commit 'v2.6.27-rc5' into tip/oprofile
Conflicts: arch/x86/oprofile/nmi_int.c
Diffstat (limited to 'mm')
-rw-r--r--mm/bootmem.c35
-rw-r--r--mm/filemap_xip.c65
-rw-r--r--mm/mm_init.c2
-rw-r--r--mm/rmap.c39
-rw-r--r--mm/slub.c4
-rw-r--r--mm/swap_state.c2
-rw-r--r--mm/vmstat.c19
7 files changed, 126 insertions, 40 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c
index e023c68b0255..ad8eec6e44a8 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -405,6 +405,29 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size,
405} 405}
406#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ 406#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
407 407
408static unsigned long align_idx(struct bootmem_data *bdata, unsigned long idx,
409 unsigned long step)
410{
411 unsigned long base = bdata->node_min_pfn;
412
413 /*
414 * Align the index with respect to the node start so that the
415 * combination of both satisfies the requested alignment.
416 */
417
418 return ALIGN(base + idx, step) - base;
419}
420
421static unsigned long align_off(struct bootmem_data *bdata, unsigned long off,
422 unsigned long align)
423{
424 unsigned long base = PFN_PHYS(bdata->node_min_pfn);
425
426 /* Same as align_idx for byte offsets */
427
428 return ALIGN(base + off, align) - base;
429}
430
408static void * __init alloc_bootmem_core(struct bootmem_data *bdata, 431static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
409 unsigned long size, unsigned long align, 432 unsigned long size, unsigned long align,
410 unsigned long goal, unsigned long limit) 433 unsigned long goal, unsigned long limit)
@@ -441,7 +464,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
441 else 464 else
442 start = ALIGN(min, step); 465 start = ALIGN(min, step);
443 466
444 sidx = start - bdata->node_min_pfn;; 467 sidx = start - bdata->node_min_pfn;
445 midx = max - bdata->node_min_pfn; 468 midx = max - bdata->node_min_pfn;
446 469
447 if (bdata->hint_idx > sidx) { 470 if (bdata->hint_idx > sidx) {
@@ -450,7 +473,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
450 * catch the fallback below. 473 * catch the fallback below.
451 */ 474 */
452 fallback = sidx + 1; 475 fallback = sidx + 1;
453 sidx = ALIGN(bdata->hint_idx, step); 476 sidx = align_idx(bdata, bdata->hint_idx, step);
454 } 477 }
455 478
456 while (1) { 479 while (1) {
@@ -459,7 +482,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
459 unsigned long eidx, i, start_off, end_off; 482 unsigned long eidx, i, start_off, end_off;
460find_block: 483find_block:
461 sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx); 484 sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);
462 sidx = ALIGN(sidx, step); 485 sidx = align_idx(bdata, sidx, step);
463 eidx = sidx + PFN_UP(size); 486 eidx = sidx + PFN_UP(size);
464 487
465 if (sidx >= midx || eidx > midx) 488 if (sidx >= midx || eidx > midx)
@@ -467,7 +490,7 @@ find_block:
467 490
468 for (i = sidx; i < eidx; i++) 491 for (i = sidx; i < eidx; i++)
469 if (test_bit(i, bdata->node_bootmem_map)) { 492 if (test_bit(i, bdata->node_bootmem_map)) {
470 sidx = ALIGN(i, step); 493 sidx = align_idx(bdata, i, step);
471 if (sidx == i) 494 if (sidx == i)
472 sidx += step; 495 sidx += step;
473 goto find_block; 496 goto find_block;
@@ -475,7 +498,7 @@ find_block:
475 498
476 if (bdata->last_end_off & (PAGE_SIZE - 1) && 499 if (bdata->last_end_off & (PAGE_SIZE - 1) &&
477 PFN_DOWN(bdata->last_end_off) + 1 == sidx) 500 PFN_DOWN(bdata->last_end_off) + 1 == sidx)
478 start_off = ALIGN(bdata->last_end_off, align); 501 start_off = align_off(bdata, bdata->last_end_off, align);
479 else 502 else
480 start_off = PFN_PHYS(sidx); 503 start_off = PFN_PHYS(sidx);
481 504
@@ -499,7 +522,7 @@ find_block:
499 } 522 }
500 523
501 if (fallback) { 524 if (fallback) {
502 sidx = ALIGN(fallback - 1, step); 525 sidx = align_idx(bdata, fallback - 1, step);
503 fallback = 0; 526 fallback = 0;
504 goto find_block; 527 goto find_block;
505 } 528 }
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 380ab402d711..b5167dfb2f2d 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -15,6 +15,8 @@
15#include <linux/rmap.h> 15#include <linux/rmap.h>
16#include <linux/mmu_notifier.h> 16#include <linux/mmu_notifier.h>
17#include <linux/sched.h> 17#include <linux/sched.h>
18#include <linux/seqlock.h>
19#include <linux/mutex.h>
18#include <asm/tlbflush.h> 20#include <asm/tlbflush.h>
19#include <asm/io.h> 21#include <asm/io.h>
20 22
@@ -22,22 +24,18 @@
22 * We do use our own empty page to avoid interference with other users 24 * We do use our own empty page to avoid interference with other users
23 * of ZERO_PAGE(), such as /dev/zero 25 * of ZERO_PAGE(), such as /dev/zero
24 */ 26 */
27static DEFINE_MUTEX(xip_sparse_mutex);
28static seqcount_t xip_sparse_seq = SEQCNT_ZERO;
25static struct page *__xip_sparse_page; 29static struct page *__xip_sparse_page;
26 30
31/* called under xip_sparse_mutex */
27static struct page *xip_sparse_page(void) 32static struct page *xip_sparse_page(void)
28{ 33{
29 if (!__xip_sparse_page) { 34 if (!__xip_sparse_page) {
30 struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); 35 struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
31 36
32 if (page) { 37 if (page)
33 static DEFINE_SPINLOCK(xip_alloc_lock); 38 __xip_sparse_page = page;
34 spin_lock(&xip_alloc_lock);
35 if (!__xip_sparse_page)
36 __xip_sparse_page = page;
37 else
38 __free_page(page);
39 spin_unlock(&xip_alloc_lock);
40 }
41 } 39 }
42 return __xip_sparse_page; 40 return __xip_sparse_page;
43} 41}
@@ -174,18 +172,23 @@ __xip_unmap (struct address_space * mapping,
174 pte_t pteval; 172 pte_t pteval;
175 spinlock_t *ptl; 173 spinlock_t *ptl;
176 struct page *page; 174 struct page *page;
175 unsigned count;
176 int locked = 0;
177
178 count = read_seqcount_begin(&xip_sparse_seq);
177 179
178 page = __xip_sparse_page; 180 page = __xip_sparse_page;
179 if (!page) 181 if (!page)
180 return; 182 return;
181 183
184retry:
182 spin_lock(&mapping->i_mmap_lock); 185 spin_lock(&mapping->i_mmap_lock);
183 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 186 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
184 mm = vma->vm_mm; 187 mm = vma->vm_mm;
185 address = vma->vm_start + 188 address = vma->vm_start +
186 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); 189 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
187 BUG_ON(address < vma->vm_start || address >= vma->vm_end); 190 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
188 pte = page_check_address(page, mm, address, &ptl); 191 pte = page_check_address(page, mm, address, &ptl, 1);
189 if (pte) { 192 if (pte) {
190 /* Nuke the page table entry. */ 193 /* Nuke the page table entry. */
191 flush_cache_page(vma, address, pte_pfn(*pte)); 194 flush_cache_page(vma, address, pte_pfn(*pte));
@@ -198,6 +201,14 @@ __xip_unmap (struct address_space * mapping,
198 } 201 }
199 } 202 }
200 spin_unlock(&mapping->i_mmap_lock); 203 spin_unlock(&mapping->i_mmap_lock);
204
205 if (locked) {
206 mutex_unlock(&xip_sparse_mutex);
207 } else if (read_seqcount_retry(&xip_sparse_seq, count)) {
208 mutex_lock(&xip_sparse_mutex);
209 locked = 1;
210 goto retry;
211 }
201} 212}
202 213
203/* 214/*
@@ -218,7 +229,7 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
218 int error; 229 int error;
219 230
220 /* XXX: are VM_FAULT_ codes OK? */ 231 /* XXX: are VM_FAULT_ codes OK? */
221 232again:
222 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 233 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
223 if (vmf->pgoff >= size) 234 if (vmf->pgoff >= size)
224 return VM_FAULT_SIGBUS; 235 return VM_FAULT_SIGBUS;
@@ -237,8 +248,10 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
237 int err; 248 int err;
238 249
239 /* maybe shared writable, allocate new block */ 250 /* maybe shared writable, allocate new block */
251 mutex_lock(&xip_sparse_mutex);
240 error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1, 252 error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1,
241 &xip_mem, &xip_pfn); 253 &xip_mem, &xip_pfn);
254 mutex_unlock(&xip_sparse_mutex);
242 if (error) 255 if (error)
243 return VM_FAULT_SIGBUS; 256 return VM_FAULT_SIGBUS;
244 /* unmap sparse mappings at pgoff from all other vmas */ 257 /* unmap sparse mappings at pgoff from all other vmas */
@@ -252,14 +265,34 @@ found:
252 BUG_ON(err); 265 BUG_ON(err);
253 return VM_FAULT_NOPAGE; 266 return VM_FAULT_NOPAGE;
254 } else { 267 } else {
268 int err, ret = VM_FAULT_OOM;
269
270 mutex_lock(&xip_sparse_mutex);
271 write_seqcount_begin(&xip_sparse_seq);
272 error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0,
273 &xip_mem, &xip_pfn);
274 if (unlikely(!error)) {
275 write_seqcount_end(&xip_sparse_seq);
276 mutex_unlock(&xip_sparse_mutex);
277 goto again;
278 }
279 if (error != -ENODATA)
280 goto out;
255 /* not shared and writable, use xip_sparse_page() */ 281 /* not shared and writable, use xip_sparse_page() */
256 page = xip_sparse_page(); 282 page = xip_sparse_page();
257 if (!page) 283 if (!page)
258 return VM_FAULT_OOM; 284 goto out;
285 err = vm_insert_page(vma, (unsigned long)vmf->virtual_address,
286 page);
287 if (err == -ENOMEM)
288 goto out;
259 289
260 page_cache_get(page); 290 ret = VM_FAULT_NOPAGE;
261 vmf->page = page; 291out:
262 return 0; 292 write_seqcount_end(&xip_sparse_seq);
293 mutex_unlock(&xip_sparse_mutex);
294
295 return ret;
263 } 296 }
264} 297}
265 298
@@ -308,8 +341,10 @@ __xip_file_write(struct file *filp, const char __user *buf,
308 &xip_mem, &xip_pfn); 341 &xip_mem, &xip_pfn);
309 if (status == -ENODATA) { 342 if (status == -ENODATA) {
310 /* we allocate a new page unmap it */ 343 /* we allocate a new page unmap it */
344 mutex_lock(&xip_sparse_mutex);
311 status = a_ops->get_xip_mem(mapping, index, 1, 345 status = a_ops->get_xip_mem(mapping, index, 1,
312 &xip_mem, &xip_pfn); 346 &xip_mem, &xip_pfn);
347 mutex_unlock(&xip_sparse_mutex);
313 if (!status) 348 if (!status)
314 /* unmap page at pgoff from all other vmas */ 349 /* unmap page at pgoff from all other vmas */
315 __xip_unmap(mapping, index); 350 __xip_unmap(mapping, index);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 936ef2efd892..4e0e26591dfa 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -12,7 +12,7 @@
12#include "internal.h" 12#include "internal.h"
13 13
14#ifdef CONFIG_DEBUG_MEMORY_INIT 14#ifdef CONFIG_DEBUG_MEMORY_INIT
15int __meminitdata mminit_loglevel; 15int mminit_loglevel;
16 16
17#ifndef SECTIONS_SHIFT 17#ifndef SECTIONS_SHIFT
18#define SECTIONS_SHIFT 0 18#define SECTIONS_SHIFT 0
diff --git a/mm/rmap.c b/mm/rmap.c
index 1ea4e6fcee77..0383acfcb068 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -224,10 +224,14 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
224/* 224/*
225 * Check that @page is mapped at @address into @mm. 225 * Check that @page is mapped at @address into @mm.
226 * 226 *
227 * If @sync is false, page_check_address may perform a racy check to avoid
228 * the page table lock when the pte is not present (helpful when reclaiming
229 * highly shared pages).
230 *
227 * On success returns with pte mapped and locked. 231 * On success returns with pte mapped and locked.
228 */ 232 */
229pte_t *page_check_address(struct page *page, struct mm_struct *mm, 233pte_t *page_check_address(struct page *page, struct mm_struct *mm,
230 unsigned long address, spinlock_t **ptlp) 234 unsigned long address, spinlock_t **ptlp, int sync)
231{ 235{
232 pgd_t *pgd; 236 pgd_t *pgd;
233 pud_t *pud; 237 pud_t *pud;
@@ -249,7 +253,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
249 253
250 pte = pte_offset_map(pmd, address); 254 pte = pte_offset_map(pmd, address);
251 /* Make a quick check before getting the lock */ 255 /* Make a quick check before getting the lock */
252 if (!pte_present(*pte)) { 256 if (!sync && !pte_present(*pte)) {
253 pte_unmap(pte); 257 pte_unmap(pte);
254 return NULL; 258 return NULL;
255 } 259 }
@@ -281,7 +285,7 @@ static int page_referenced_one(struct page *page,
281 if (address == -EFAULT) 285 if (address == -EFAULT)
282 goto out; 286 goto out;
283 287
284 pte = page_check_address(page, mm, address, &ptl); 288 pte = page_check_address(page, mm, address, &ptl, 0);
285 if (!pte) 289 if (!pte)
286 goto out; 290 goto out;
287 291
@@ -450,7 +454,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
450 if (address == -EFAULT) 454 if (address == -EFAULT)
451 goto out; 455 goto out;
452 456
453 pte = page_check_address(page, mm, address, &ptl); 457 pte = page_check_address(page, mm, address, &ptl, 1);
454 if (!pte) 458 if (!pte)
455 goto out; 459 goto out;
456 460
@@ -659,23 +663,30 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
659 } 663 }
660 664
661 /* 665 /*
662 * It would be tidy to reset the PageAnon mapping here, 666 * Now that the last pte has gone, s390 must transfer dirty
663 * but that might overwrite a racing page_add_anon_rmap 667 * flag from storage key to struct page. We can usually skip
664 * which increments mapcount after us but sets mapping 668 * this if the page is anon, so about to be freed; but perhaps
665 * before us: so leave the reset to free_hot_cold_page, 669 * not if it's in swapcache - there might be another pte slot
666 * and remember that it's only reliable while mapped. 670 * containing the swap entry, but page not yet written to swap.
667 * Leaving it set also helps swapoff to reinstate ptes
668 * faster for those pages still in swapcache.
669 */ 671 */
670 if ((!PageAnon(page) || PageSwapCache(page)) && 672 if ((!PageAnon(page) || PageSwapCache(page)) &&
671 page_test_dirty(page)) { 673 page_test_dirty(page)) {
672 page_clear_dirty(page); 674 page_clear_dirty(page);
673 set_page_dirty(page); 675 set_page_dirty(page);
674 } 676 }
675 mem_cgroup_uncharge_page(page);
676 677
678 mem_cgroup_uncharge_page(page);
677 __dec_zone_page_state(page, 679 __dec_zone_page_state(page,
678 PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); 680 PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
681 /*
682 * It would be tidy to reset the PageAnon mapping here,
683 * but that might overwrite a racing page_add_anon_rmap
684 * which increments mapcount after us but sets mapping
685 * before us: so leave the reset to free_hot_cold_page,
686 * and remember that it's only reliable while mapped.
687 * Leaving it set also helps swapoff to reinstate ptes
688 * faster for those pages still in swapcache.
689 */
679 } 690 }
680} 691}
681 692
@@ -697,7 +708,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
697 if (address == -EFAULT) 708 if (address == -EFAULT)
698 goto out; 709 goto out;
699 710
700 pte = page_check_address(page, mm, address, &ptl); 711 pte = page_check_address(page, mm, address, &ptl, 0);
701 if (!pte) 712 if (!pte)
702 goto out; 713 goto out;
703 714
diff --git a/mm/slub.c b/mm/slub.c
index 4f5b96149458..fb486d5540f8 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2312,7 +2312,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
2312 2312
2313 s->refcount = 1; 2313 s->refcount = 1;
2314#ifdef CONFIG_NUMA 2314#ifdef CONFIG_NUMA
2315 s->remote_node_defrag_ratio = 100; 2315 s->remote_node_defrag_ratio = 1000;
2316#endif 2316#endif
2317 if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) 2317 if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
2318 goto error; 2318 goto error;
@@ -4058,7 +4058,7 @@ static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
4058 if (err) 4058 if (err)
4059 return err; 4059 return err;
4060 4060
4061 if (ratio < 100) 4061 if (ratio <= 100)
4062 s->remote_node_defrag_ratio = ratio * 10; 4062 s->remote_node_defrag_ratio = ratio * 10;
4063 4063
4064 return length; 4064 return length;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 167cf2dc8a03..797c3831cbec 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -60,7 +60,7 @@ void show_swap_cache_info(void)
60 printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", 60 printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n",
61 swap_cache_info.add_total, swap_cache_info.del_total, 61 swap_cache_info.add_total, swap_cache_info.del_total,
62 swap_cache_info.find_success, swap_cache_info.find_total); 62 swap_cache_info.find_success, swap_cache_info.find_total);
63 printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10)); 63 printk("Free swap = %ldkB\n", nr_swap_pages << (PAGE_SHIFT - 10));
64 printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); 64 printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));
65} 65}
66 66
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b0d08e667ece..d7826af2fb07 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -516,9 +516,26 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m,
516 continue; 516 continue;
517 517
518 page = pfn_to_page(pfn); 518 page = pfn_to_page(pfn);
519#ifdef CONFIG_ARCH_FLATMEM_HAS_HOLES
520 /*
521 * Ordinarily, memory holes in flatmem still have a valid
522 * memmap for the PFN range. However, an architecture for
523 * embedded systems (e.g. ARM) can free up the memmap backing
524 * holes to save memory on the assumption the memmap is
525 * never used. The page_zone linkages are then broken even
526 * though pfn_valid() returns true. Skip the page if the
527 * linkages are broken. Even if this test passed, the impact
528 * is that the counters for the movable type are off but
529 * fragmentation monitoring is likely meaningless on small
530 * systems.
531 */
532 if (page_zone(page) != zone)
533 continue;
534#endif
519 mtype = get_pageblock_migratetype(page); 535 mtype = get_pageblock_migratetype(page);
520 536
521 count[mtype]++; 537 if (mtype < MIGRATE_TYPES)
538 count[mtype]++;
522 } 539 }
523 540
524 /* Print counts */ 541 /* Print counts */