diff options
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/fair.c | 65 |
1 files changed, 52 insertions, 13 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6831abb5dbef..0a349dd1fa60 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -780,10 +780,13 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
780 | 780 | ||
781 | #ifdef CONFIG_NUMA_BALANCING | 781 | #ifdef CONFIG_NUMA_BALANCING |
782 | /* | 782 | /* |
783 | * numa task sample period in ms: 5s | 783 | * numa task sample period in ms |
784 | */ | 784 | */ |
785 | unsigned int sysctl_numa_balancing_scan_period_min = 5000; | 785 | unsigned int sysctl_numa_balancing_scan_period_min = 100; |
786 | unsigned int sysctl_numa_balancing_scan_period_max = 5000*16; | 786 | unsigned int sysctl_numa_balancing_scan_period_max = 100*16; |
787 | |||
788 | /* Portion of address space to scan in MB */ | ||
789 | unsigned int sysctl_numa_balancing_scan_size = 256; | ||
787 | 790 | ||
788 | static void task_numa_placement(struct task_struct *p) | 791 | static void task_numa_placement(struct task_struct *p) |
789 | { | 792 | { |
@@ -808,6 +811,12 @@ void task_numa_fault(int node, int pages) | |||
808 | task_numa_placement(p); | 811 | task_numa_placement(p); |
809 | } | 812 | } |
810 | 813 | ||
814 | static void reset_ptenuma_scan(struct task_struct *p) | ||
815 | { | ||
816 | ACCESS_ONCE(p->mm->numa_scan_seq)++; | ||
817 | p->mm->numa_scan_offset = 0; | ||
818 | } | ||
819 | |||
811 | /* | 820 | /* |
812 | * The expensive part of numa migration is done from task_work context. | 821 | * The expensive part of numa migration is done from task_work context. |
813 | * Triggered from task_tick_numa(). | 822 | * Triggered from task_tick_numa(). |
@@ -817,6 +826,9 @@ void task_numa_work(struct callback_head *work) | |||
817 | unsigned long migrate, next_scan, now = jiffies; | 826 | unsigned long migrate, next_scan, now = jiffies; |
818 | struct task_struct *p = current; | 827 | struct task_struct *p = current; |
819 | struct mm_struct *mm = p->mm; | 828 | struct mm_struct *mm = p->mm; |
829 | struct vm_area_struct *vma; | ||
830 | unsigned long offset, end; | ||
831 | long length; | ||
820 | 832 | ||
821 | WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work)); | 833 | WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work)); |
822 | 834 | ||
@@ -846,18 +858,45 @@ void task_numa_work(struct callback_head *work) | |||
846 | if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate) | 858 | if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate) |
847 | return; | 859 | return; |
848 | 860 | ||
849 | ACCESS_ONCE(mm->numa_scan_seq)++; | 861 | offset = mm->numa_scan_offset; |
850 | { | 862 | length = sysctl_numa_balancing_scan_size; |
851 | struct vm_area_struct *vma; | 863 | length <<= 20; |
852 | 864 | ||
853 | down_read(&mm->mmap_sem); | 865 | down_read(&mm->mmap_sem); |
854 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 866 | vma = find_vma(mm, offset); |
855 | if (!vma_migratable(vma)) | 867 | if (!vma) { |
856 | continue; | 868 | reset_ptenuma_scan(p); |
857 | change_prot_numa(vma, vma->vm_start, vma->vm_end); | 869 | offset = 0; |
858 | } | 870 | vma = mm->mmap; |
859 | up_read(&mm->mmap_sem); | 871 | } |
872 | for (; vma && length > 0; vma = vma->vm_next) { | ||
873 | if (!vma_migratable(vma)) | ||
874 | continue; | ||
875 | |||
876 | /* Skip small VMAs. They are not likely to be of relevance */ | ||
877 | if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) < HPAGE_PMD_NR) | ||
878 | continue; | ||
879 | |||
880 | offset = max(offset, vma->vm_start); | ||
881 | end = min(ALIGN(offset + length, HPAGE_SIZE), vma->vm_end); | ||
882 | length -= end - offset; | ||
883 | |||
884 | change_prot_numa(vma, offset, end); | ||
885 | |||
886 | offset = end; | ||
860 | } | 887 | } |
888 | |||
889 | /* | ||
890 | * It is possible to reach the end of the VMA list but the last few VMAs are | ||
891 | * not guaranteed to the vma_migratable. If they are not, we would find the | ||
892 | * !migratable VMA on the next scan but not reset the scanner to the start | ||
893 | * so check it now. | ||
894 | */ | ||
895 | if (vma) | ||
896 | mm->numa_scan_offset = offset; | ||
897 | else | ||
898 | reset_ptenuma_scan(p); | ||
899 | up_read(&mm->mmap_sem); | ||
861 | } | 900 | } |
862 | 901 | ||
863 | /* | 902 | /* |