diff options
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 11 | ||||
-rw-r--r-- | include/asm-generic/pgtable.h | 106 | ||||
-rw-r--r-- | init/Kconfig | 37 |
3 files changed, 152 insertions, 2 deletions
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5fe03aaca92e..5199db2923d3 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -404,7 +404,8 @@ static inline int pte_same(pte_t a, pte_t b) | |||
404 | 404 | ||
405 | static inline int pte_present(pte_t a) | 405 | static inline int pte_present(pte_t a) |
406 | { | 406 | { |
407 | return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); | 407 | return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE | |
408 | _PAGE_NUMA); | ||
408 | } | 409 | } |
409 | 410 | ||
410 | #define pte_accessible pte_accessible | 411 | #define pte_accessible pte_accessible |
@@ -426,7 +427,8 @@ static inline int pmd_present(pmd_t pmd) | |||
426 | * the _PAGE_PSE flag will remain set at all times while the | 427 | * the _PAGE_PSE flag will remain set at all times while the |
427 | * _PAGE_PRESENT bit is clear). | 428 | * _PAGE_PRESENT bit is clear). |
428 | */ | 429 | */ |
429 | return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE); | 430 | return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE | |
431 | _PAGE_NUMA); | ||
430 | } | 432 | } |
431 | 433 | ||
432 | static inline int pmd_none(pmd_t pmd) | 434 | static inline int pmd_none(pmd_t pmd) |
@@ -485,6 +487,11 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) | |||
485 | 487 | ||
486 | static inline int pmd_bad(pmd_t pmd) | 488 | static inline int pmd_bad(pmd_t pmd) |
487 | { | 489 | { |
490 | #ifdef CONFIG_NUMA_BALANCING | ||
491 | /* pmd_numa check */ | ||
492 | if ((pmd_flags(pmd) & (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA) | ||
493 | return 0; | ||
494 | #endif | ||
488 | return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; | 495 | return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; |
489 | } | 496 | } |
490 | 497 | ||
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 48fc1dc1c74b..f27c83668d10 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
@@ -558,6 +558,112 @@ static inline int pmd_trans_unstable(pmd_t *pmd) | |||
558 | #endif | 558 | #endif |
559 | } | 559 | } |
560 | 560 | ||
561 | #ifdef CONFIG_NUMA_BALANCING | ||
562 | #ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE | ||
563 | /* | ||
564 | * _PAGE_NUMA works identical to _PAGE_PROTNONE (it's actually the | ||
565 | * same bit too). It's set only when _PAGE_PRESET is not set and it's | ||
566 | * never set if _PAGE_PRESENT is set. | ||
567 | * | ||
568 | * pte/pmd_present() returns true if pte/pmd_numa returns true. Page | ||
569 | * fault triggers on those regions if pte/pmd_numa returns true | ||
570 | * (because _PAGE_PRESENT is not set). | ||
571 | */ | ||
572 | #ifndef pte_numa | ||
573 | static inline int pte_numa(pte_t pte) | ||
574 | { | ||
575 | return (pte_flags(pte) & | ||
576 | (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA; | ||
577 | } | ||
578 | #endif | ||
579 | |||
580 | #ifndef pmd_numa | ||
581 | static inline int pmd_numa(pmd_t pmd) | ||
582 | { | ||
583 | return (pmd_flags(pmd) & | ||
584 | (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA; | ||
585 | } | ||
586 | #endif | ||
587 | |||
588 | /* | ||
589 | * pte/pmd_mknuma sets the _PAGE_ACCESSED bitflag automatically | ||
590 | * because they're called by the NUMA hinting minor page fault. If we | ||
591 | * wouldn't set the _PAGE_ACCESSED bitflag here, the TLB miss handler | ||
592 | * would be forced to set it later while filling the TLB after we | ||
593 | * return to userland. That would trigger a second write to memory | ||
594 | * that we optimize away by setting _PAGE_ACCESSED here. | ||
595 | */ | ||
596 | #ifndef pte_mknonnuma | ||
597 | static inline pte_t pte_mknonnuma(pte_t pte) | ||
598 | { | ||
599 | pte = pte_clear_flags(pte, _PAGE_NUMA); | ||
600 | return pte_set_flags(pte, _PAGE_PRESENT|_PAGE_ACCESSED); | ||
601 | } | ||
602 | #endif | ||
603 | |||
604 | #ifndef pmd_mknonnuma | ||
605 | static inline pmd_t pmd_mknonnuma(pmd_t pmd) | ||
606 | { | ||
607 | pmd = pmd_clear_flags(pmd, _PAGE_NUMA); | ||
608 | return pmd_set_flags(pmd, _PAGE_PRESENT|_PAGE_ACCESSED); | ||
609 | } | ||
610 | #endif | ||
611 | |||
612 | #ifndef pte_mknuma | ||
613 | static inline pte_t pte_mknuma(pte_t pte) | ||
614 | { | ||
615 | pte = pte_set_flags(pte, _PAGE_NUMA); | ||
616 | return pte_clear_flags(pte, _PAGE_PRESENT); | ||
617 | } | ||
618 | #endif | ||
619 | |||
620 | #ifndef pmd_mknuma | ||
621 | static inline pmd_t pmd_mknuma(pmd_t pmd) | ||
622 | { | ||
623 | pmd = pmd_set_flags(pmd, _PAGE_NUMA); | ||
624 | return pmd_clear_flags(pmd, _PAGE_PRESENT); | ||
625 | } | ||
626 | #endif | ||
627 | #else | ||
628 | extern int pte_numa(pte_t pte); | ||
629 | extern int pmd_numa(pmd_t pmd); | ||
630 | extern pte_t pte_mknonnuma(pte_t pte); | ||
631 | extern pmd_t pmd_mknonnuma(pmd_t pmd); | ||
632 | extern pte_t pte_mknuma(pte_t pte); | ||
633 | extern pmd_t pmd_mknuma(pmd_t pmd); | ||
634 | #endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ | ||
635 | #else | ||
636 | static inline int pmd_numa(pmd_t pmd) | ||
637 | { | ||
638 | return 0; | ||
639 | } | ||
640 | |||
641 | static inline int pte_numa(pte_t pte) | ||
642 | { | ||
643 | return 0; | ||
644 | } | ||
645 | |||
646 | static inline pte_t pte_mknonnuma(pte_t pte) | ||
647 | { | ||
648 | return pte; | ||
649 | } | ||
650 | |||
651 | static inline pmd_t pmd_mknonnuma(pmd_t pmd) | ||
652 | { | ||
653 | return pmd; | ||
654 | } | ||
655 | |||
656 | static inline pte_t pte_mknuma(pte_t pte) | ||
657 | { | ||
658 | return pte; | ||
659 | } | ||
660 | |||
661 | static inline pmd_t pmd_mknuma(pmd_t pmd) | ||
662 | { | ||
663 | return pmd; | ||
664 | } | ||
665 | #endif /* CONFIG_NUMA_BALANCING */ | ||
666 | |||
561 | #endif /* CONFIG_MMU */ | 667 | #endif /* CONFIG_MMU */ |
562 | 668 | ||
563 | #endif /* !__ASSEMBLY__ */ | 669 | #endif /* !__ASSEMBLY__ */ |
diff --git a/init/Kconfig b/init/Kconfig index 6fdd6e339326..9f00f004796a 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -696,6 +696,43 @@ config LOG_BUF_SHIFT | |||
696 | config HAVE_UNSTABLE_SCHED_CLOCK | 696 | config HAVE_UNSTABLE_SCHED_CLOCK |
697 | bool | 697 | bool |
698 | 698 | ||
699 | # | ||
700 | # For architectures that want to enable the support for NUMA-affine scheduler | ||
701 | # balancing logic: | ||
702 | # | ||
703 | config ARCH_SUPPORTS_NUMA_BALANCING | ||
704 | bool | ||
705 | |||
706 | # For architectures that (ab)use NUMA to represent different memory regions | ||
707 | # all cpu-local but of different latencies, such as SuperH. | ||
708 | # | ||
709 | config ARCH_WANT_NUMA_VARIABLE_LOCALITY | ||
710 | bool | ||
711 | |||
712 | # | ||
713 | # For architectures that are willing to define _PAGE_NUMA as _PAGE_PROTNONE | ||
714 | config ARCH_WANTS_PROT_NUMA_PROT_NONE | ||
715 | bool | ||
716 | |||
717 | config ARCH_USES_NUMA_PROT_NONE | ||
718 | bool | ||
719 | default y | ||
720 | depends on ARCH_WANTS_PROT_NUMA_PROT_NONE | ||
721 | depends on NUMA_BALANCING | ||
722 | |||
723 | config NUMA_BALANCING | ||
724 | bool "Memory placement aware NUMA scheduler" | ||
725 | default y | ||
726 | depends on ARCH_SUPPORTS_NUMA_BALANCING | ||
727 | depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY | ||
728 | depends on SMP && NUMA && MIGRATION | ||
729 | help | ||
730 | This option adds support for automatic NUMA aware memory/task placement. | ||
731 | The mechanism is quite primitive and is based on migrating memory when | ||
732 | it is references to the node the task is running on. | ||
733 | |||
734 | This system will be inactive on UMA systems. | ||
735 | |||
699 | menuconfig CGROUPS | 736 | menuconfig CGROUPS |
700 | boolean "Control Group support" | 737 | boolean "Control Group support" |
701 | depends on EVENTFD | 738 | depends on EVENTFD |