aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2008-06-18 01:29:12 -0400
committerPaul Mackerras <paulus@samba.org>2008-06-30 21:27:57 -0400
commit3a8247cc2c856930f34eafce33f6a039227ee175 (patch)
treeaa8599cdf09893f1150a2bc137878d8b8a661780 /arch/powerpc/mm
parente952e6c4d6635b36c212c056a9427bd93460178c (diff)
powerpc: Only demote individual slices rather than whole process
At present, if we have a kernel with a 64kB page size, and some process maps something that has to be mapped with 4kB pages (such as a cache-inhibited mapping on POWER5+, or the eHCA infiniband queue-pair pages), we change the process to use 4kB pages everywhere. This hurts the performance of HPC programs that access eHCA from userspace. With this patch, the kernel will only demote the slice(s) containing the eHCA or cache-inhibited mappings, leaving the remaining slices able to use 64kB hardware pages. This also changes the slice_get_unmapped_area code so that it is willing to place a 64k-page mapping into (or across) a 4k-page slice if there is no better alternative, i.e. if the program specified MAP_FIXED or if there is not sufficient space available in slices that are either empty or already have 64k-page mappings in them. Signed-off-by: Paul Mackerras <paulus@samba.org> Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/hash_utils_64.c36
-rw-r--r--arch/powerpc/mm/slice.c177
2 files changed, 160 insertions, 53 deletions
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index bf5b6d7ed30f..8d3b58ebd38e 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -695,6 +695,28 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
695 return pp; 695 return pp;
696} 696}
697 697
698#ifdef CONFIG_PPC_MM_SLICES
699unsigned int get_paca_psize(unsigned long addr)
700{
701 unsigned long index, slices;
702
703 if (addr < SLICE_LOW_TOP) {
704 slices = get_paca()->context.low_slices_psize;
705 index = GET_LOW_SLICE_INDEX(addr);
706 } else {
707 slices = get_paca()->context.high_slices_psize;
708 index = GET_HIGH_SLICE_INDEX(addr);
709 }
710 return (slices >> (index * 4)) & 0xF;
711}
712
713#else
714unsigned int get_paca_psize(unsigned long addr)
715{
716 return get_paca()->context.user_psize;
717}
718#endif
719
698/* 720/*
699 * Demote a segment to using 4k pages. 721 * Demote a segment to using 4k pages.
700 * For now this makes the whole process use 4k pages. 722 * For now this makes the whole process use 4k pages.
@@ -702,13 +724,13 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
702#ifdef CONFIG_PPC_64K_PAGES 724#ifdef CONFIG_PPC_64K_PAGES
703void demote_segment_4k(struct mm_struct *mm, unsigned long addr) 725void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
704{ 726{
705 if (mm->context.user_psize == MMU_PAGE_4K) 727 if (get_slice_psize(mm, addr) == MMU_PAGE_4K)
706 return; 728 return;
707 slice_set_user_psize(mm, MMU_PAGE_4K); 729 slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
708#ifdef CONFIG_SPU_BASE 730#ifdef CONFIG_SPU_BASE
709 spu_flush_all_slbs(mm); 731 spu_flush_all_slbs(mm);
710#endif 732#endif
711 if (get_paca()->context.user_psize != MMU_PAGE_4K) { 733 if (get_paca_psize(addr) != MMU_PAGE_4K) {
712 get_paca()->context = mm->context; 734 get_paca()->context = mm->context;
713 slb_flush_and_rebolt(); 735 slb_flush_and_rebolt();
714 } 736 }
@@ -792,11 +814,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
792 DBG_LOW(" user region with no mm !\n"); 814 DBG_LOW(" user region with no mm !\n");
793 return 1; 815 return 1;
794 } 816 }
795#ifdef CONFIG_PPC_MM_SLICES
796 psize = get_slice_psize(mm, ea); 817 psize = get_slice_psize(mm, ea);
797#else
798 psize = mm->context.user_psize;
799#endif
800 ssize = user_segment_size(ea); 818 ssize = user_segment_size(ea);
801 vsid = get_vsid(mm->context.id, ea, ssize); 819 vsid = get_vsid(mm->context.id, ea, ssize);
802 break; 820 break;
@@ -868,7 +886,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
868 /* Do actual hashing */ 886 /* Do actual hashing */
869#ifdef CONFIG_PPC_64K_PAGES 887#ifdef CONFIG_PPC_64K_PAGES
870 /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ 888 /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */
871 if (pte_val(*ptep) & _PAGE_4K_PFN) { 889 if ((pte_val(*ptep) & _PAGE_4K_PFN) && psize == MMU_PAGE_64K) {
872 demote_segment_4k(mm, ea); 890 demote_segment_4k(mm, ea);
873 psize = MMU_PAGE_4K; 891 psize = MMU_PAGE_4K;
874 } 892 }
@@ -897,7 +915,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
897 } 915 }
898 } 916 }
899 if (user_region) { 917 if (user_region) {
900 if (psize != get_paca()->context.user_psize) { 918 if (psize != get_paca_psize(ea)) {
901 get_paca()->context = mm->context; 919 get_paca()->context = mm->context;
902 slb_flush_and_rebolt(); 920 slb_flush_and_rebolt();
903 } 921 }
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index ad928edafb0a..583be67ad938 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -215,10 +215,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
215 mm->context.high_slices_psize); 215 mm->context.high_slices_psize);
216 216
217 spin_unlock_irqrestore(&slice_convert_lock, flags); 217 spin_unlock_irqrestore(&slice_convert_lock, flags);
218 mb();
219 218
220 /* XXX this is sub-optimal but will do for now */
221 on_each_cpu(slice_flush_segments, mm, 0, 1);
222#ifdef CONFIG_SPU_BASE 219#ifdef CONFIG_SPU_BASE
223 spu_flush_all_slbs(mm); 220 spu_flush_all_slbs(mm);
224#endif 221#endif
@@ -384,17 +381,34 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
384 return slice_find_area_bottomup(mm, len, mask, psize, use_cache); 381 return slice_find_area_bottomup(mm, len, mask, psize, use_cache);
385} 382}
386 383
384#define or_mask(dst, src) do { \
385 (dst).low_slices |= (src).low_slices; \
386 (dst).high_slices |= (src).high_slices; \
387} while (0)
388
389#define andnot_mask(dst, src) do { \
390 (dst).low_slices &= ~(src).low_slices; \
391 (dst).high_slices &= ~(src).high_slices; \
392} while (0)
393
394#ifdef CONFIG_PPC_64K_PAGES
395#define MMU_PAGE_BASE MMU_PAGE_64K
396#else
397#define MMU_PAGE_BASE MMU_PAGE_4K
398#endif
399
387unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, 400unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
388 unsigned long flags, unsigned int psize, 401 unsigned long flags, unsigned int psize,
389 int topdown, int use_cache) 402 int topdown, int use_cache)
390{ 403{
391 struct slice_mask mask; 404 struct slice_mask mask = {0, 0};
392 struct slice_mask good_mask; 405 struct slice_mask good_mask;
393 struct slice_mask potential_mask = {0,0} /* silence stupid warning */; 406 struct slice_mask potential_mask = {0,0} /* silence stupid warning */;
394 int pmask_set = 0; 407 struct slice_mask compat_mask = {0, 0};
395 int fixed = (flags & MAP_FIXED); 408 int fixed = (flags & MAP_FIXED);
396 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); 409 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
397 struct mm_struct *mm = current->mm; 410 struct mm_struct *mm = current->mm;
411 unsigned long newaddr;
398 412
399 /* Sanity checks */ 413 /* Sanity checks */
400 BUG_ON(mm->task_size == 0); 414 BUG_ON(mm->task_size == 0);
@@ -416,21 +430,48 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
416 if (!fixed && addr) { 430 if (!fixed && addr) {
417 addr = _ALIGN_UP(addr, 1ul << pshift); 431 addr = _ALIGN_UP(addr, 1ul << pshift);
418 slice_dbg(" aligned addr=%lx\n", addr); 432 slice_dbg(" aligned addr=%lx\n", addr);
433 /* Ignore hint if it's too large or overlaps a VMA */
434 if (addr > mm->task_size - len ||
435 !slice_area_is_free(mm, addr, len))
436 addr = 0;
419 } 437 }
420 438
421 /* First makeup a "good" mask of slices that have the right size 439 /* First make up a "good" mask of slices that have the right size
422 * already 440 * already
423 */ 441 */
424 good_mask = slice_mask_for_size(mm, psize); 442 good_mask = slice_mask_for_size(mm, psize);
425 slice_print_mask(" good_mask", good_mask); 443 slice_print_mask(" good_mask", good_mask);
426 444
427 /* First check hint if it's valid or if we have MAP_FIXED */ 445 /*
428 if ((addr != 0 || fixed) && (mm->task_size - len) >= addr) { 446 * Here "good" means slices that are already the right page size,
447 * "compat" means slices that have a compatible page size (i.e.
448 * 4k in a 64k pagesize kernel), and "free" means slices without
449 * any VMAs.
450 *
451 * If MAP_FIXED:
452 * check if fits in good | compat => OK
453 * check if fits in good | compat | free => convert free
454 * else bad
455 * If have hint:
456 * check if hint fits in good => OK
457 * check if hint fits in good | free => convert free
458 * Otherwise:
459 * search in good, found => OK
460 * search in good | free, found => convert free
461 * search in good | compat | free, found => convert free.
462 */
429 463
430 /* Don't bother with hint if it overlaps a VMA */ 464#ifdef CONFIG_PPC_64K_PAGES
431 if (!fixed && !slice_area_is_free(mm, addr, len)) 465 /* If we support combo pages, we can allow 64k pages in 4k slices */
432 goto search; 466 if (psize == MMU_PAGE_64K) {
467 compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K);
468 if (fixed)
469 or_mask(good_mask, compat_mask);
470 }
471#endif
433 472
473 /* First check hint if it's valid or if we have MAP_FIXED */
474 if (addr != 0 || fixed) {
434 /* Build a mask for the requested range */ 475 /* Build a mask for the requested range */
435 mask = slice_range_to_mask(addr, len); 476 mask = slice_range_to_mask(addr, len);
436 slice_print_mask(" mask", mask); 477 slice_print_mask(" mask", mask);
@@ -442,54 +483,66 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
442 slice_dbg(" fits good !\n"); 483 slice_dbg(" fits good !\n");
443 return addr; 484 return addr;
444 } 485 }
445 486 } else {
446 /* We don't fit in the good mask, check what other slices are 487 /* Now let's see if we can find something in the existing
447 * empty and thus can be converted 488 * slices for that size
448 */ 489 */
449 potential_mask = slice_mask_for_free(mm); 490 newaddr = slice_find_area(mm, len, good_mask, psize, topdown,
450 potential_mask.low_slices |= good_mask.low_slices; 491 use_cache);
451 potential_mask.high_slices |= good_mask.high_slices; 492 if (newaddr != -ENOMEM) {
452 pmask_set = 1; 493 /* Found within the good mask, we don't have to setup,
453 slice_print_mask(" potential", potential_mask); 494 * we thus return directly
454 if (slice_check_fit(mask, potential_mask)) { 495 */
455 slice_dbg(" fits potential !\n"); 496 slice_dbg(" found area at 0x%lx\n", newaddr);
456 goto convert; 497 return newaddr;
457 } 498 }
458 } 499 }
459 500
460 /* If we have MAP_FIXED and failed the above step, then error out */ 501 /* We don't fit in the good mask, check what other slices are
502 * empty and thus can be converted
503 */
504 potential_mask = slice_mask_for_free(mm);
505 or_mask(potential_mask, good_mask);
506 slice_print_mask(" potential", potential_mask);
507
508 if ((addr != 0 || fixed) && slice_check_fit(mask, potential_mask)) {
509 slice_dbg(" fits potential !\n");
510 goto convert;
511 }
512
513 /* If we have MAP_FIXED and failed the above steps, then error out */
461 if (fixed) 514 if (fixed)
462 return -EBUSY; 515 return -EBUSY;
463 516
464 search:
465 slice_dbg(" search...\n"); 517 slice_dbg(" search...\n");
466 518
467 /* Now let's see if we can find something in the existing slices 519 /* If we had a hint that didn't work out, see if we can fit
468 * for that size 520 * anywhere in the good area.
469 */ 521 */
470 addr = slice_find_area(mm, len, good_mask, psize, topdown, use_cache); 522 if (addr) {
471 if (addr != -ENOMEM) { 523 addr = slice_find_area(mm, len, good_mask, psize, topdown,
472 /* Found within the good mask, we don't have to setup, 524 use_cache);
473 * we thus return directly 525 if (addr != -ENOMEM) {
474 */ 526 slice_dbg(" found area at 0x%lx\n", addr);
475 slice_dbg(" found area at 0x%lx\n", addr); 527 return addr;
476 return addr; 528 }
477 }
478
479 /* Won't fit, check what can be converted */
480 if (!pmask_set) {
481 potential_mask = slice_mask_for_free(mm);
482 potential_mask.low_slices |= good_mask.low_slices;
483 potential_mask.high_slices |= good_mask.high_slices;
484 pmask_set = 1;
485 slice_print_mask(" potential", potential_mask);
486 } 529 }
487 530
488 /* Now let's see if we can find something in the existing slices 531 /* Now let's see if we can find something in the existing slices
489 * for that size 532 * for that size plus free slices
490 */ 533 */
491 addr = slice_find_area(mm, len, potential_mask, psize, topdown, 534 addr = slice_find_area(mm, len, potential_mask, psize, topdown,
492 use_cache); 535 use_cache);
536
537#ifdef CONFIG_PPC_64K_PAGES
538 if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
539 /* retry the search with 4k-page slices included */
540 or_mask(potential_mask, compat_mask);
541 addr = slice_find_area(mm, len, potential_mask, psize,
542 topdown, use_cache);
543 }
544#endif
545
493 if (addr == -ENOMEM) 546 if (addr == -ENOMEM)
494 return -ENOMEM; 547 return -ENOMEM;
495 548
@@ -498,7 +551,13 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
498 slice_print_mask(" mask", mask); 551 slice_print_mask(" mask", mask);
499 552
500 convert: 553 convert:
501 slice_convert(mm, mask, psize); 554 andnot_mask(mask, good_mask);
555 andnot_mask(mask, compat_mask);
556 if (mask.low_slices || mask.high_slices) {
557 slice_convert(mm, mask, psize);
558 if (psize > MMU_PAGE_BASE)
559 on_each_cpu(slice_flush_segments, mm, 0, 1);
560 }
502 return addr; 561 return addr;
503 562
504} 563}
@@ -598,6 +657,36 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
598 spin_unlock_irqrestore(&slice_convert_lock, flags); 657 spin_unlock_irqrestore(&slice_convert_lock, flags);
599} 658}
600 659
660void slice_set_psize(struct mm_struct *mm, unsigned long address,
661 unsigned int psize)
662{
663 unsigned long i, flags;
664 u64 *p;
665
666 spin_lock_irqsave(&slice_convert_lock, flags);
667 if (address < SLICE_LOW_TOP) {
668 i = GET_LOW_SLICE_INDEX(address);
669 p = &mm->context.low_slices_psize;
670 } else {
671 i = GET_HIGH_SLICE_INDEX(address);
672 p = &mm->context.high_slices_psize;
673 }
674 *p = (*p & ~(0xful << (i * 4))) | ((unsigned long) psize << (i * 4));
675 spin_unlock_irqrestore(&slice_convert_lock, flags);
676
677#ifdef CONFIG_SPU_BASE
678 spu_flush_all_slbs(mm);
679#endif
680}
681
682void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
683 unsigned long len, unsigned int psize)
684{
685 struct slice_mask mask = slice_range_to_mask(start, len);
686
687 slice_convert(mm, mask, psize);
688}
689
601/* 690/*
602 * is_hugepage_only_range() is used by generic code to verify wether 691 * is_hugepage_only_range() is used by generic code to verify wether
603 * a normal mmap mapping (non hugetlbfs) is valid on a given area. 692 * a normal mmap mapping (non hugetlbfs) is valid on a given area.