aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/pgtable.h11
-rw-r--r--include/asm-generic/pgtable.h106
-rw-r--r--init/Kconfig37
3 files changed, 152 insertions, 2 deletions
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5fe03aaca92e..5199db2923d3 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -404,7 +404,8 @@ static inline int pte_same(pte_t a, pte_t b)
404 404
405static inline int pte_present(pte_t a) 405static inline int pte_present(pte_t a)
406{ 406{
407 return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); 407 return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE |
408 _PAGE_NUMA);
408} 409}
409 410
410#define pte_accessible pte_accessible 411#define pte_accessible pte_accessible
@@ -426,7 +427,8 @@ static inline int pmd_present(pmd_t pmd)
426 * the _PAGE_PSE flag will remain set at all times while the 427 * the _PAGE_PSE flag will remain set at all times while the
427 * _PAGE_PRESENT bit is clear). 428 * _PAGE_PRESENT bit is clear).
428 */ 429 */
429 return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE); 430 return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE |
431 _PAGE_NUMA);
430} 432}
431 433
432static inline int pmd_none(pmd_t pmd) 434static inline int pmd_none(pmd_t pmd)
@@ -485,6 +487,11 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
485 487
486static inline int pmd_bad(pmd_t pmd) 488static inline int pmd_bad(pmd_t pmd)
487{ 489{
490#ifdef CONFIG_NUMA_BALANCING
491 /* pmd_numa check */
492 if ((pmd_flags(pmd) & (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA)
493 return 0;
494#endif
488 return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; 495 return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
489} 496}
490 497
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 48fc1dc1c74b..f27c83668d10 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -558,6 +558,112 @@ static inline int pmd_trans_unstable(pmd_t *pmd)
558#endif 558#endif
559} 559}
560 560
561#ifdef CONFIG_NUMA_BALANCING
562#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
563/*
564 * _PAGE_NUMA works identical to _PAGE_PROTNONE (it's actually the
565 * same bit too). It's set only when _PAGE_PRESET is not set and it's
566 * never set if _PAGE_PRESENT is set.
567 *
568 * pte/pmd_present() returns true if pte/pmd_numa returns true. Page
569 * fault triggers on those regions if pte/pmd_numa returns true
570 * (because _PAGE_PRESENT is not set).
571 */
572#ifndef pte_numa
573static inline int pte_numa(pte_t pte)
574{
575 return (pte_flags(pte) &
576 (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
577}
578#endif
579
580#ifndef pmd_numa
581static inline int pmd_numa(pmd_t pmd)
582{
583 return (pmd_flags(pmd) &
584 (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
585}
586#endif
587
588/*
589 * pte/pmd_mknuma sets the _PAGE_ACCESSED bitflag automatically
590 * because they're called by the NUMA hinting minor page fault. If we
591 * wouldn't set the _PAGE_ACCESSED bitflag here, the TLB miss handler
592 * would be forced to set it later while filling the TLB after we
593 * return to userland. That would trigger a second write to memory
594 * that we optimize away by setting _PAGE_ACCESSED here.
595 */
596#ifndef pte_mknonnuma
597static inline pte_t pte_mknonnuma(pte_t pte)
598{
599 pte = pte_clear_flags(pte, _PAGE_NUMA);
600 return pte_set_flags(pte, _PAGE_PRESENT|_PAGE_ACCESSED);
601}
602#endif
603
604#ifndef pmd_mknonnuma
605static inline pmd_t pmd_mknonnuma(pmd_t pmd)
606{
607 pmd = pmd_clear_flags(pmd, _PAGE_NUMA);
608 return pmd_set_flags(pmd, _PAGE_PRESENT|_PAGE_ACCESSED);
609}
610#endif
611
612#ifndef pte_mknuma
613static inline pte_t pte_mknuma(pte_t pte)
614{
615 pte = pte_set_flags(pte, _PAGE_NUMA);
616 return pte_clear_flags(pte, _PAGE_PRESENT);
617}
618#endif
619
620#ifndef pmd_mknuma
621static inline pmd_t pmd_mknuma(pmd_t pmd)
622{
623 pmd = pmd_set_flags(pmd, _PAGE_NUMA);
624 return pmd_clear_flags(pmd, _PAGE_PRESENT);
625}
626#endif
627#else
628extern int pte_numa(pte_t pte);
629extern int pmd_numa(pmd_t pmd);
630extern pte_t pte_mknonnuma(pte_t pte);
631extern pmd_t pmd_mknonnuma(pmd_t pmd);
632extern pte_t pte_mknuma(pte_t pte);
633extern pmd_t pmd_mknuma(pmd_t pmd);
634#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */
635#else
636static inline int pmd_numa(pmd_t pmd)
637{
638 return 0;
639}
640
641static inline int pte_numa(pte_t pte)
642{
643 return 0;
644}
645
646static inline pte_t pte_mknonnuma(pte_t pte)
647{
648 return pte;
649}
650
651static inline pmd_t pmd_mknonnuma(pmd_t pmd)
652{
653 return pmd;
654}
655
656static inline pte_t pte_mknuma(pte_t pte)
657{
658 return pte;
659}
660
661static inline pmd_t pmd_mknuma(pmd_t pmd)
662{
663 return pmd;
664}
665#endif /* CONFIG_NUMA_BALANCING */
666
561#endif /* CONFIG_MMU */ 667#endif /* CONFIG_MMU */
562 668
563#endif /* !__ASSEMBLY__ */ 669#endif /* !__ASSEMBLY__ */
diff --git a/init/Kconfig b/init/Kconfig
index 6fdd6e339326..9f00f004796a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -696,6 +696,43 @@ config LOG_BUF_SHIFT
696config HAVE_UNSTABLE_SCHED_CLOCK 696config HAVE_UNSTABLE_SCHED_CLOCK
697 bool 697 bool
698 698
699#
700# For architectures that want to enable the support for NUMA-affine scheduler
701# balancing logic:
702#
703config ARCH_SUPPORTS_NUMA_BALANCING
704 bool
705
706# For architectures that (ab)use NUMA to represent different memory regions
707# all cpu-local but of different latencies, such as SuperH.
708#
709config ARCH_WANT_NUMA_VARIABLE_LOCALITY
710 bool
711
712#
713# For architectures that are willing to define _PAGE_NUMA as _PAGE_PROTNONE
714config ARCH_WANTS_PROT_NUMA_PROT_NONE
715 bool
716
717config ARCH_USES_NUMA_PROT_NONE
718 bool
719 default y
720 depends on ARCH_WANTS_PROT_NUMA_PROT_NONE
721 depends on NUMA_BALANCING
722
723config NUMA_BALANCING
724 bool "Memory placement aware NUMA scheduler"
725 default y
726 depends on ARCH_SUPPORTS_NUMA_BALANCING
727 depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
728 depends on SMP && NUMA && MIGRATION
729 help
730 This option adds support for automatic NUMA aware memory/task placement.
731 The mechanism is quite primitive and is based on migrating memory when
732 it is references to the node the task is running on.
733
734 This system will be inactive on UMA systems.
735
699menuconfig CGROUPS 736menuconfig CGROUPS
700 boolean "Control Group support" 737 boolean "Control Group support"
701 depends on EVENTFD 738 depends on EVENTFD