aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-11-19 05:59:15 -0500
committerMel Gorman <mgorman@suse.de>2012-12-11 09:42:51 -0500
commite14808b49f55e0e1135da5e4a154a540dd9f3662 (patch)
treed66708455dcc1b6e2e15937d732ab12c121e623a
parenta8f6077213d285ca08dbf6d4a67470787388138b (diff)
mm: numa: Rate limit setting of pte_numa if node is saturated
If there are a large number of NUMA hinting faults and all of them are resulting in migrations it may indicate that memory is just bouncing uselessly around. NUMA balancing cost is likely exceeding any benefit from locality. Rate limit the PTE updates if the node is migration rate-limited. As noted in the comments, this distorts the NUMA faulting statistics. Signed-off-by: Mel Gorman <mgorman@suse.de>
-rw-r--r--include/linux/migrate.h6
-rw-r--r--kernel/sched/fair.c9
-rw-r--r--mm/migrate.c20
3 files changed, 35 insertions, 0 deletions
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index f0d0313eea6f..91556889adac 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -77,11 +77,17 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
77 77
78#ifdef CONFIG_NUMA_BALANCING 78#ifdef CONFIG_NUMA_BALANCING
79extern int migrate_misplaced_page(struct page *page, int node); 79extern int migrate_misplaced_page(struct page *page, int node);
80extern int migrate_misplaced_page(struct page *page, int node);
81extern bool migrate_ratelimited(int node);
80#else 82#else
81static inline int migrate_misplaced_page(struct page *page, int node) 83static inline int migrate_misplaced_page(struct page *page, int node)
82{ 84{
83 return -EAGAIN; /* can't migrate now */ 85 return -EAGAIN; /* can't migrate now */
84} 86}
87static inline bool migrate_ratelimited(int node)
88{
89 return false;
90}
85#endif /* CONFIG_NUMA_BALANCING */ 91#endif /* CONFIG_NUMA_BALANCING */
86 92
87#endif /* _LINUX_MIGRATE_H */ 93#endif /* _LINUX_MIGRATE_H */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7727b0161579..37e895a941ab 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -27,6 +27,7 @@
27#include <linux/profile.h> 27#include <linux/profile.h>
28#include <linux/interrupt.h> 28#include <linux/interrupt.h>
29#include <linux/mempolicy.h> 29#include <linux/mempolicy.h>
30#include <linux/migrate.h>
30#include <linux/task_work.h> 31#include <linux/task_work.h>
31 32
32#include <trace/events/sched.h> 33#include <trace/events/sched.h>
@@ -861,6 +862,14 @@ void task_numa_work(struct callback_head *work)
861 if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate) 862 if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate)
862 return; 863 return;
863 864
865 /*
866 * Do not set pte_numa if the current running node is rate-limited.
867 * This loses statistics on the fault but if we are unwilling to
868 * migrate to this node, it is less likely we can do useful work
869 */
870 if (migrate_ratelimited(numa_node_id()))
871 return;
872
864 start = mm->numa_scan_offset; 873 start = mm->numa_scan_offset;
865 pages = sysctl_numa_balancing_scan_size; 874 pages = sysctl_numa_balancing_scan_size;
866 pages <<= 20 - PAGE_SHIFT; /* MB in pages */ 875 pages <<= 20 - PAGE_SHIFT; /* MB in pages */
diff --git a/mm/migrate.c b/mm/migrate.c
index 4b8267f1842f..32a1afca6009 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1464,10 +1464,30 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
1464 * page migration rate limiting control. 1464 * page migration rate limiting control.
1465 * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs 1465 * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs
1466 * window of time. Default here says do not migrate more than 1280M per second. 1466 * window of time. Default here says do not migrate more than 1280M per second.
1467 * If a node is rate-limited then PTE NUMA updates are also rate-limited. However
1468 * as it is faults that reset the window, pte updates will happen unconditionally
1469 * if there has not been a fault since @pteupdate_interval_millisecs after the
1470 * throttle window closed.
1467 */ 1471 */
1468static unsigned int migrate_interval_millisecs __read_mostly = 100; 1472static unsigned int migrate_interval_millisecs __read_mostly = 100;
1473static unsigned int pteupdate_interval_millisecs __read_mostly = 1000;
1469static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT); 1474static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
1470 1475
1476/* Returns true if NUMA migration is currently rate limited */
1477bool migrate_ratelimited(int node)
1478{
1479 pg_data_t *pgdat = NODE_DATA(node);
1480
1481 if (time_after(jiffies, pgdat->numabalancing_migrate_next_window +
1482 msecs_to_jiffies(pteupdate_interval_millisecs)))
1483 return false;
1484
1485 if (pgdat->numabalancing_migrate_nr_pages < ratelimit_pages)
1486 return false;
1487
1488 return true;
1489}
1490
1471/* 1491/*
1472 * Attempt to migrate a misplaced page to the specified destination 1492 * Attempt to migrate a misplaced page to the specified destination
1473 * node. Caller is expected to have an elevated reference count on 1493 * node. Caller is expected to have an elevated reference count on