diff options
author | Mel Gorman <mgorman@suse.de> | 2012-11-19 05:59:15 -0500 |
---|---|---|
committer | Mel Gorman <mgorman@suse.de> | 2012-12-11 09:42:51 -0500 |
commit | e14808b49f55e0e1135da5e4a154a540dd9f3662 (patch) | |
tree | d66708455dcc1b6e2e15937d732ab12c121e623a | |
parent | a8f6077213d285ca08dbf6d4a67470787388138b (diff) |
mm: numa: Rate limit setting of pte_numa if node is saturated
If there are a large number of NUMA hinting faults and all of them
are resulting in migrations it may indicate that memory is just
bouncing uselessly around. NUMA balancing cost is likely exceeding
any benefit from locality. Rate limit the PTE updates if the node
is migration rate-limited. As noted in the comments, this distorts
the NUMA faulting statistics.
Signed-off-by: Mel Gorman <mgorman@suse.de>
-rw-r--r-- | include/linux/migrate.h | 6 | ||||
-rw-r--r-- | kernel/sched/fair.c | 9 | ||||
-rw-r--r-- | mm/migrate.c | 20 |
3 files changed, 35 insertions, 0 deletions
diff --git a/include/linux/migrate.h b/include/linux/migrate.h index f0d0313eea6f..91556889adac 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h | |||
@@ -77,11 +77,17 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, | |||
77 | 77 | ||
78 | #ifdef CONFIG_NUMA_BALANCING | 78 | #ifdef CONFIG_NUMA_BALANCING |
79 | extern int migrate_misplaced_page(struct page *page, int node); | 79 | extern int migrate_misplaced_page(struct page *page, int node); |
80 | extern int migrate_misplaced_page(struct page *page, int node); | ||
81 | extern bool migrate_ratelimited(int node); | ||
80 | #else | 82 | #else |
81 | static inline int migrate_misplaced_page(struct page *page, int node) | 83 | static inline int migrate_misplaced_page(struct page *page, int node) |
82 | { | 84 | { |
83 | return -EAGAIN; /* can't migrate now */ | 85 | return -EAGAIN; /* can't migrate now */ |
84 | } | 86 | } |
87 | static inline bool migrate_ratelimited(int node) | ||
88 | { | ||
89 | return false; | ||
90 | } | ||
85 | #endif /* CONFIG_NUMA_BALANCING */ | 91 | #endif /* CONFIG_NUMA_BALANCING */ |
86 | 92 | ||
87 | #endif /* _LINUX_MIGRATE_H */ | 93 | #endif /* _LINUX_MIGRATE_H */ |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7727b0161579..37e895a941ab 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/profile.h> | 27 | #include <linux/profile.h> |
28 | #include <linux/interrupt.h> | 28 | #include <linux/interrupt.h> |
29 | #include <linux/mempolicy.h> | 29 | #include <linux/mempolicy.h> |
30 | #include <linux/migrate.h> | ||
30 | #include <linux/task_work.h> | 31 | #include <linux/task_work.h> |
31 | 32 | ||
32 | #include <trace/events/sched.h> | 33 | #include <trace/events/sched.h> |
@@ -861,6 +862,14 @@ void task_numa_work(struct callback_head *work) | |||
861 | if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate) | 862 | if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate) |
862 | return; | 863 | return; |
863 | 864 | ||
865 | /* | ||
866 | * Do not set pte_numa if the current running node is rate-limited. | ||
867 | * This loses statistics on the fault but if we are unwilling to | ||
868 | * migrate to this node, it is less likely we can do useful work | ||
869 | */ | ||
870 | if (migrate_ratelimited(numa_node_id())) | ||
871 | return; | ||
872 | |||
864 | start = mm->numa_scan_offset; | 873 | start = mm->numa_scan_offset; |
865 | pages = sysctl_numa_balancing_scan_size; | 874 | pages = sysctl_numa_balancing_scan_size; |
866 | pages <<= 20 - PAGE_SHIFT; /* MB in pages */ | 875 | pages <<= 20 - PAGE_SHIFT; /* MB in pages */ |
diff --git a/mm/migrate.c b/mm/migrate.c index 4b8267f1842f..32a1afca6009 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -1464,10 +1464,30 @@ static struct page *alloc_misplaced_dst_page(struct page *page, | |||
1464 | * page migration rate limiting control. | 1464 | * page migration rate limiting control. |
1465 | * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs | 1465 | * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs |
1466 | * window of time. Default here says do not migrate more than 1280M per second. | 1466 | * window of time. Default here says do not migrate more than 1280M per second. |
1467 | * If a node is rate-limited then PTE NUMA updates are also rate-limited. However | ||
1468 | * as it is faults that reset the window, pte updates will happen unconditionally | ||
1469 | * if there has not been a fault since @pteupdate_interval_millisecs after the | ||
1470 | * throttle window closed. | ||
1467 | */ | 1471 | */ |
1468 | static unsigned int migrate_interval_millisecs __read_mostly = 100; | 1472 | static unsigned int migrate_interval_millisecs __read_mostly = 100; |
1473 | static unsigned int pteupdate_interval_millisecs __read_mostly = 1000; | ||
1469 | static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT); | 1474 | static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT); |
1470 | 1475 | ||
1476 | /* Returns true if NUMA migration is currently rate limited */ | ||
1477 | bool migrate_ratelimited(int node) | ||
1478 | { | ||
1479 | pg_data_t *pgdat = NODE_DATA(node); | ||
1480 | |||
1481 | if (time_after(jiffies, pgdat->numabalancing_migrate_next_window + | ||
1482 | msecs_to_jiffies(pteupdate_interval_millisecs))) | ||
1483 | return false; | ||
1484 | |||
1485 | if (pgdat->numabalancing_migrate_nr_pages < ratelimit_pages) | ||
1486 | return false; | ||
1487 | |||
1488 | return true; | ||
1489 | } | ||
1490 | |||
1471 | /* | 1491 | /* |
1472 | * Attempt to migrate a misplaced page to the specified destination | 1492 | * Attempt to migrate a misplaced page to the specified destination |
1473 | * node. Caller is expected to have an elevated reference count on | 1493 | * node. Caller is expected to have an elevated reference count on |