aboutsummaryrefslogtreecommitdiffstats
path: root/mm/list_lru.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-04-03 17:47:56 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-03 19:21:01 -0400
commit449dd6984d0e47643c04c807f609dd56d48d5bcc (patch)
tree69f4a0a90290b048e63effc617c8ec907e8d6696 /mm/list_lru.c
parent139e561660fe11e0fc35e142a800df3dd7d03e9d (diff)
mm: keep page cache radix tree nodes in check
Previously, page cache radix tree nodes were freed after reclaim emptied out their page pointers. But now reclaim stores shadow entries in their place, which are only reclaimed when the inodes themselves are reclaimed. This is problematic for bigger files that are still in use after they have a significant amount of their cache reclaimed, without any of those pages actually refaulting. The shadow entries will just sit there and waste memory. In the worst case, the shadow entries will accumulate until the machine runs out of memory. To get this under control, the VM will track radix tree nodes exclusively containing shadow entries on a per-NUMA node list. Per-NUMA rather than global because we expect the radix tree nodes themselves to be allocated node-locally and we want to reduce cross-node references of otherwise independent cache workloads. A simple shrinker will then reclaim these nodes on memory pressure. A few things need to be stored in the radix tree node to implement the shadow node LRU and allow tree deletions coming from the list: 1. There is no index available that would describe the reverse path from the node up to the tree root, which is needed to perform a deletion. To solve this, encode in each node its offset inside the parent. This can be stored in the unused upper bits of the same member that stores the node's height at no extra space cost. 2. The number of shadow entries needs to be counted in addition to the regular entries, to quickly detect when the node is ready to go to the shadow node LRU list. The current entry count is an unsigned int but the maximum number of entries is 64, so a shadow counter can easily be stored in the unused upper bits. 3. Tree modification needs tree lock and tree root, which are located in the address space, so store an address_space backpointer in the node. The parent pointer of the node is in a union with the 2-word rcu_head, so the backpointer comes at no extra cost as well. 4. The node needs to be linked to an LRU list, which requires a list head inside the node. This does increase the size of the node, but it does not change the number of objects that fit into a slab page. [akpm@linux-foundation.org: export the right function] Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Rik van Riel <riel@redhat.com> Reviewed-by: Minchan Kim <minchan@kernel.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Bob Liu <bob.liu@oracle.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Greg Thelen <gthelen@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jan Kara <jack@suse.cz> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Luigi Semenzato <semenzato@google.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Metin Doslu <metin@citusdata.com> Cc: Michel Lespinasse <walken@google.com> Cc: Ozgun Erdogan <ozgun@citusdata.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <klamm@yandex-team.ru> Cc: Ryan Mallon <rmallon@gmail.com> Cc: Tejun Heo <tj@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/list_lru.c')
-rw-r--r--mm/list_lru.c16
1 files changed, 14 insertions, 2 deletions
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 72f9decb0104..f1a0db194173 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -87,11 +87,20 @@ restart:
87 87
88 ret = isolate(item, &nlru->lock, cb_arg); 88 ret = isolate(item, &nlru->lock, cb_arg);
89 switch (ret) { 89 switch (ret) {
90 case LRU_REMOVED_RETRY:
91 assert_spin_locked(&nlru->lock);
90 case LRU_REMOVED: 92 case LRU_REMOVED:
91 if (--nlru->nr_items == 0) 93 if (--nlru->nr_items == 0)
92 node_clear(nid, lru->active_nodes); 94 node_clear(nid, lru->active_nodes);
93 WARN_ON_ONCE(nlru->nr_items < 0); 95 WARN_ON_ONCE(nlru->nr_items < 0);
94 isolated++; 96 isolated++;
97 /*
98 * If the lru lock has been dropped, our list
99 * traversal is now invalid and so we have to
100 * restart from scratch.
101 */
102 if (ret == LRU_REMOVED_RETRY)
103 goto restart;
95 break; 104 break;
96 case LRU_ROTATE: 105 case LRU_ROTATE:
97 list_move_tail(item, &nlru->list); 106 list_move_tail(item, &nlru->list);
@@ -103,6 +112,7 @@ restart:
103 * The lru lock has been dropped, our list traversal is 112 * The lru lock has been dropped, our list traversal is
104 * now invalid and so we have to restart from scratch. 113 * now invalid and so we have to restart from scratch.
105 */ 114 */
115 assert_spin_locked(&nlru->lock);
106 goto restart; 116 goto restart;
107 default: 117 default:
108 BUG(); 118 BUG();
@@ -114,7 +124,7 @@ restart:
114} 124}
115EXPORT_SYMBOL_GPL(list_lru_walk_node); 125EXPORT_SYMBOL_GPL(list_lru_walk_node);
116 126
117int list_lru_init(struct list_lru *lru) 127int list_lru_init_key(struct list_lru *lru, struct lock_class_key *key)
118{ 128{
119 int i; 129 int i;
120 size_t size = sizeof(*lru->node) * nr_node_ids; 130 size_t size = sizeof(*lru->node) * nr_node_ids;
@@ -126,12 +136,14 @@ int list_lru_init(struct list_lru *lru)
126 nodes_clear(lru->active_nodes); 136 nodes_clear(lru->active_nodes);
127 for (i = 0; i < nr_node_ids; i++) { 137 for (i = 0; i < nr_node_ids; i++) {
128 spin_lock_init(&lru->node[i].lock); 138 spin_lock_init(&lru->node[i].lock);
139 if (key)
140 lockdep_set_class(&lru->node[i].lock, key);
129 INIT_LIST_HEAD(&lru->node[i].list); 141 INIT_LIST_HEAD(&lru->node[i].list);
130 lru->node[i].nr_items = 0; 142 lru->node[i].nr_items = 0;
131 } 143 }
132 return 0; 144 return 0;
133} 145}
134EXPORT_SYMBOL_GPL(list_lru_init); 146EXPORT_SYMBOL_GPL(list_lru_init_key);
135 147
136void list_lru_destroy(struct list_lru *lru) 148void list_lru_destroy(struct list_lru *lru)
137{ 149{