diff options
author | Mel Gorman <mgorman@suse.de> | 2012-11-14 13:34:32 -0500 |
---|---|---|
committer | Mel Gorman <mgorman@suse.de> | 2012-12-11 09:42:46 -0500 |
commit | 9f40604cdab935e80db57b309c48659de349d4e6 (patch) | |
tree | 72f77bdb7d8ab07e4db4323642db7c04eca8e9e9 | |
parent | 6e5fb223e89dbe5cb5c563f8d4a4a0a7d62455a8 (diff) |
sched, numa, mm: Count WS scanning against present PTEs, not virtual memory ranges
By accounting against the present PTEs, scanning speed reflects the
actual present (mapped) memory.
Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
-rw-r--r-- | kernel/sched/fair.c | 36 |
1 files changed, 21 insertions, 15 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0a349dd1fa60..f6e1f25ed2bd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -827,8 +827,8 @@ void task_numa_work(struct callback_head *work) | |||
827 | struct task_struct *p = current; | 827 | struct task_struct *p = current; |
828 | struct mm_struct *mm = p->mm; | 828 | struct mm_struct *mm = p->mm; |
829 | struct vm_area_struct *vma; | 829 | struct vm_area_struct *vma; |
830 | unsigned long offset, end; | 830 | unsigned long start, end; |
831 | long length; | 831 | long pages; |
832 | 832 | ||
833 | WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work)); | 833 | WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work)); |
834 | 834 | ||
@@ -858,18 +858,20 @@ void task_numa_work(struct callback_head *work) | |||
858 | if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate) | 858 | if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate) |
859 | return; | 859 | return; |
860 | 860 | ||
861 | offset = mm->numa_scan_offset; | 861 | start = mm->numa_scan_offset; |
862 | length = sysctl_numa_balancing_scan_size; | 862 | pages = sysctl_numa_balancing_scan_size; |
863 | length <<= 20; | 863 | pages <<= 20 - PAGE_SHIFT; /* MB in pages */ |
864 | if (!pages) | ||
865 | return; | ||
864 | 866 | ||
865 | down_read(&mm->mmap_sem); | 867 | down_read(&mm->mmap_sem); |
866 | vma = find_vma(mm, offset); | 868 | vma = find_vma(mm, start); |
867 | if (!vma) { | 869 | if (!vma) { |
868 | reset_ptenuma_scan(p); | 870 | reset_ptenuma_scan(p); |
869 | offset = 0; | 871 | start = 0; |
870 | vma = mm->mmap; | 872 | vma = mm->mmap; |
871 | } | 873 | } |
872 | for (; vma && length > 0; vma = vma->vm_next) { | 874 | for (; vma; vma = vma->vm_next) { |
873 | if (!vma_migratable(vma)) | 875 | if (!vma_migratable(vma)) |
874 | continue; | 876 | continue; |
875 | 877 | ||
@@ -877,15 +879,19 @@ void task_numa_work(struct callback_head *work) | |||
877 | if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) < HPAGE_PMD_NR) | 879 | if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) < HPAGE_PMD_NR) |
878 | continue; | 880 | continue; |
879 | 881 | ||
880 | offset = max(offset, vma->vm_start); | 882 | do { |
881 | end = min(ALIGN(offset + length, HPAGE_SIZE), vma->vm_end); | 883 | start = max(start, vma->vm_start); |
882 | length -= end - offset; | 884 | end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE); |
883 | 885 | end = min(end, vma->vm_end); | |
884 | change_prot_numa(vma, offset, end); | 886 | pages -= change_prot_numa(vma, start, end); |
885 | 887 | ||
886 | offset = end; | 888 | start = end; |
889 | if (pages <= 0) | ||
890 | goto out; | ||
891 | } while (end != vma->vm_end); | ||
887 | } | 892 | } |
888 | 893 | ||
894 | out: | ||
889 | /* | 895 | /* |
890 | * It is possible to reach the end of the VMA list but the last few VMAs are | 896 | * It is possible to reach the end of the VMA list but the last few VMAs are |
891 | * not guaranteed to the vma_migratable. If they are not, we would find the | 897 | * not guaranteed to the vma_migratable. If they are not, we would find the |
@@ -893,7 +899,7 @@ void task_numa_work(struct callback_head *work) | |||
893 | * so check it now. | 899 | * so check it now. |
894 | */ | 900 | */ |
895 | if (vma) | 901 | if (vma) |
896 | mm->numa_scan_offset = offset; | 902 | mm->numa_scan_offset = start; |
897 | else | 903 | else |
898 | reset_ptenuma_scan(p); | 904 | reset_ptenuma_scan(p); |
899 | up_read(&mm->mmap_sem); | 905 | up_read(&mm->mmap_sem); |