summaryrefslogtreecommitdiffstats
path: root/mm/workingset.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2017-05-03 17:55:03 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-03 18:52:11 -0400
commit2a2e48854d704214dac7546e87ae0e4daa0e61a0 (patch)
treea3b999e7be0b2495c9602348eda500a9908ae822 /mm/workingset.c
parent20ac28933c49433e0f064314de3618129b54a22e (diff)
mm: vmscan: fix IO/refault regression in cache workingset transition
Since commit 59dc76b0d4df ("mm: vmscan: reduce size of inactive file list") we noticed bigger IO spikes during changes in cache access patterns. The patch in question shrunk the inactive list size to leave more room for the current workingset in the presence of streaming IO. However, workingset transitions that previously happened on the inactive list are now pushed out of memory and incur more refaults to complete. This patch disables active list protection when refaults are being observed. This accelerates workingset transitions, and allows more of the new set to establish itself from memory, without eating into the ability to protect the established workingset during stable periods. The workloads that were measurably affected for us were hit pretty bad by it, with refault/majfault rates doubling and tripling during cache transitions, and the machines sustaining half-hour periods of 100% IO utilization, where they'd previously have sub-minute peaks at 60-90%. Stateful services that handle user data tend to be more conservative with kernel upgrades. As a result we hit most page cache issues with some delay, as was the case here. The severity seemed to warrant a stable tag. Fixes: 59dc76b0d4df ("mm: vmscan: reduce size of inactive file list") Link: http://lkml.kernel.org/r/20170404220052.27593-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Rik van Riel <riel@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Michal Hocko <mhocko@suse.com> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: <stable@vger.kernel.org> [4.7+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/workingset.c')
-rw-r--r--mm/workingset.c7
1 files changed, 6 insertions, 1 deletions
diff --git a/mm/workingset.c b/mm/workingset.c
index eda05c71fa49..51c6f61d4cea 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -269,7 +269,6 @@ bool workingset_refault(void *shadow)
269 lruvec = mem_cgroup_lruvec(pgdat, memcg); 269 lruvec = mem_cgroup_lruvec(pgdat, memcg);
270 refault = atomic_long_read(&lruvec->inactive_age); 270 refault = atomic_long_read(&lruvec->inactive_age);
271 active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES); 271 active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES);
272 rcu_read_unlock();
273 272
274 /* 273 /*
275 * The unsigned subtraction here gives an accurate distance 274 * The unsigned subtraction here gives an accurate distance
@@ -290,11 +289,15 @@ bool workingset_refault(void *shadow)
290 refault_distance = (refault - eviction) & EVICTION_MASK; 289 refault_distance = (refault - eviction) & EVICTION_MASK;
291 290
292 inc_node_state(pgdat, WORKINGSET_REFAULT); 291 inc_node_state(pgdat, WORKINGSET_REFAULT);
292 mem_cgroup_inc_stat(memcg, MEMCG_WORKINGSET_REFAULT);
293 293
294 if (refault_distance <= active_file) { 294 if (refault_distance <= active_file) {
295 inc_node_state(pgdat, WORKINGSET_ACTIVATE); 295 inc_node_state(pgdat, WORKINGSET_ACTIVATE);
296 mem_cgroup_inc_stat(memcg, MEMCG_WORKINGSET_ACTIVATE);
297 rcu_read_unlock();
296 return true; 298 return true;
297 } 299 }
300 rcu_read_unlock();
298 return false; 301 return false;
299} 302}
300 303
@@ -472,6 +475,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
472 if (WARN_ON_ONCE(node->exceptional)) 475 if (WARN_ON_ONCE(node->exceptional))
473 goto out_invalid; 476 goto out_invalid;
474 inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM); 477 inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM);
478 mem_cgroup_inc_page_stat(virt_to_page(node),
479 MEMCG_WORKINGSET_NODERECLAIM);
475 __radix_tree_delete_node(&mapping->page_tree, node, 480 __radix_tree_delete_node(&mapping->page_tree, node,
476 workingset_update_node, mapping); 481 workingset_update_node, mapping);
477 482