aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/fair.c65
1 files changed, 52 insertions, 13 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6831abb5dbef..0a349dd1fa60 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -780,10 +780,13 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
780 780
781#ifdef CONFIG_NUMA_BALANCING 781#ifdef CONFIG_NUMA_BALANCING
782/* 782/*
783 * numa task sample period in ms: 5s 783 * numa task sample period in ms
784 */ 784 */
785unsigned int sysctl_numa_balancing_scan_period_min = 5000; 785unsigned int sysctl_numa_balancing_scan_period_min = 100;
786unsigned int sysctl_numa_balancing_scan_period_max = 5000*16; 786unsigned int sysctl_numa_balancing_scan_period_max = 100*16;
787
788/* Portion of address space to scan in MB */
789unsigned int sysctl_numa_balancing_scan_size = 256;
787 790
788static void task_numa_placement(struct task_struct *p) 791static void task_numa_placement(struct task_struct *p)
789{ 792{
@@ -808,6 +811,12 @@ void task_numa_fault(int node, int pages)
808 task_numa_placement(p); 811 task_numa_placement(p);
809} 812}
810 813
814static void reset_ptenuma_scan(struct task_struct *p)
815{
816 ACCESS_ONCE(p->mm->numa_scan_seq)++;
817 p->mm->numa_scan_offset = 0;
818}
819
811/* 820/*
812 * The expensive part of numa migration is done from task_work context. 821 * The expensive part of numa migration is done from task_work context.
813 * Triggered from task_tick_numa(). 822 * Triggered from task_tick_numa().
@@ -817,6 +826,9 @@ void task_numa_work(struct callback_head *work)
817 unsigned long migrate, next_scan, now = jiffies; 826 unsigned long migrate, next_scan, now = jiffies;
818 struct task_struct *p = current; 827 struct task_struct *p = current;
819 struct mm_struct *mm = p->mm; 828 struct mm_struct *mm = p->mm;
829 struct vm_area_struct *vma;
830 unsigned long offset, end;
831 long length;
820 832
821 WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work)); 833 WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work));
822 834
@@ -846,18 +858,45 @@ void task_numa_work(struct callback_head *work)
846 if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate) 858 if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate)
847 return; 859 return;
848 860
849 ACCESS_ONCE(mm->numa_scan_seq)++; 861 offset = mm->numa_scan_offset;
850 { 862 length = sysctl_numa_balancing_scan_size;
851 struct vm_area_struct *vma; 863 length <<= 20;
852 864
853 down_read(&mm->mmap_sem); 865 down_read(&mm->mmap_sem);
854 for (vma = mm->mmap; vma; vma = vma->vm_next) { 866 vma = find_vma(mm, offset);
855 if (!vma_migratable(vma)) 867 if (!vma) {
856 continue; 868 reset_ptenuma_scan(p);
857 change_prot_numa(vma, vma->vm_start, vma->vm_end); 869 offset = 0;
858 } 870 vma = mm->mmap;
859 up_read(&mm->mmap_sem); 871 }
872 for (; vma && length > 0; vma = vma->vm_next) {
873 if (!vma_migratable(vma))
874 continue;
875
876 /* Skip small VMAs. They are not likely to be of relevance */
877 if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) < HPAGE_PMD_NR)
878 continue;
879
880 offset = max(offset, vma->vm_start);
881 end = min(ALIGN(offset + length, HPAGE_SIZE), vma->vm_end);
882 length -= end - offset;
883
884 change_prot_numa(vma, offset, end);
885
886 offset = end;
860 } 887 }
888
889 /*
890 * It is possible to reach the end of the VMA list but the last few VMAs are
891 * not guaranteed to the vma_migratable. If they are not, we would find the
892 * !migratable VMA on the next scan but not reset the scanner to the start
893 * so check it now.
894 */
895 if (vma)
896 mm->numa_scan_offset = offset;
897 else
898 reset_ptenuma_scan(p);
899 up_read(&mm->mmap_sem);
861} 900}
862 901
863/* 902/*