aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-04-03 17:47:56 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-03 19:21:01 -0400
commit449dd6984d0e47643c04c807f609dd56d48d5bcc (patch)
tree69f4a0a90290b048e63effc617c8ec907e8d6696 /lib
parent139e561660fe11e0fc35e142a800df3dd7d03e9d (diff)
mm: keep page cache radix tree nodes in check
Previously, page cache radix tree nodes were freed after reclaim emptied out their page pointers. But now reclaim stores shadow entries in their place, which are only reclaimed when the inodes themselves are reclaimed. This is problematic for bigger files that are still in use after they have a significant amount of their cache reclaimed, without any of those pages actually refaulting. The shadow entries will just sit there and waste memory. In the worst case, the shadow entries will accumulate until the machine runs out of memory. To get this under control, the VM will track radix tree nodes exclusively containing shadow entries on a per-NUMA node list. Per-NUMA rather than global because we expect the radix tree nodes themselves to be allocated node-locally and we want to reduce cross-node references of otherwise independent cache workloads. A simple shrinker will then reclaim these nodes on memory pressure. A few things need to be stored in the radix tree node to implement the shadow node LRU and allow tree deletions coming from the list: 1. There is no index available that would describe the reverse path from the node up to the tree root, which is needed to perform a deletion. To solve this, encode in each node its offset inside the parent. This can be stored in the unused upper bits of the same member that stores the node's height at no extra space cost. 2. The number of shadow entries needs to be counted in addition to the regular entries, to quickly detect when the node is ready to go to the shadow node LRU list. The current entry count is an unsigned int but the maximum number of entries is 64, so a shadow counter can easily be stored in the unused upper bits. 3. Tree modification needs tree lock and tree root, which are located in the address space, so store an address_space backpointer in the node. The parent pointer of the node is in a union with the 2-word rcu_head, so the backpointer comes at no extra cost as well. 4. The node needs to be linked to an LRU list, which requires a list head inside the node. This does increase the size of the node, but it does not change the number of objects that fit into a slab page. [akpm@linux-foundation.org: export the right function] Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Rik van Riel <riel@redhat.com> Reviewed-by: Minchan Kim <minchan@kernel.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Bob Liu <bob.liu@oracle.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Greg Thelen <gthelen@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jan Kara <jack@suse.cz> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Luigi Semenzato <semenzato@google.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Metin Doslu <metin@citusdata.com> Cc: Michel Lespinasse <walken@google.com> Cc: Ozgun Erdogan <ozgun@citusdata.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <klamm@yandex-team.ru> Cc: Ryan Mallon <rmallon@gmail.com> Cc: Tejun Heo <tj@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/radix-tree.c36
1 files changed, 22 insertions, 14 deletions
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index d60be40c111b..9599aa72d7a0 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -342,7 +342,8 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
342 342
343 /* Increase the height. */ 343 /* Increase the height. */
344 newheight = root->height+1; 344 newheight = root->height+1;
345 node->height = newheight; 345 BUG_ON(newheight & ~RADIX_TREE_HEIGHT_MASK);
346 node->path = newheight;
346 node->count = 1; 347 node->count = 1;
347 node->parent = NULL; 348 node->parent = NULL;
348 slot = root->rnode; 349 slot = root->rnode;
@@ -400,11 +401,12 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
400 /* Have to add a child node. */ 401 /* Have to add a child node. */
401 if (!(slot = radix_tree_node_alloc(root))) 402 if (!(slot = radix_tree_node_alloc(root)))
402 return -ENOMEM; 403 return -ENOMEM;
403 slot->height = height; 404 slot->path = height;
404 slot->parent = node; 405 slot->parent = node;
405 if (node) { 406 if (node) {
406 rcu_assign_pointer(node->slots[offset], slot); 407 rcu_assign_pointer(node->slots[offset], slot);
407 node->count++; 408 node->count++;
409 slot->path |= offset << RADIX_TREE_HEIGHT_SHIFT;
408 } else 410 } else
409 rcu_assign_pointer(root->rnode, ptr_to_indirect(slot)); 411 rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
410 } 412 }
@@ -498,7 +500,7 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
498 } 500 }
499 node = indirect_to_ptr(node); 501 node = indirect_to_ptr(node);
500 502
501 height = node->height; 503 height = node->path & RADIX_TREE_HEIGHT_MASK;
502 if (index > radix_tree_maxindex(height)) 504 if (index > radix_tree_maxindex(height))
503 return NULL; 505 return NULL;
504 506
@@ -704,7 +706,7 @@ int radix_tree_tag_get(struct radix_tree_root *root,
704 return (index == 0); 706 return (index == 0);
705 node = indirect_to_ptr(node); 707 node = indirect_to_ptr(node);
706 708
707 height = node->height; 709 height = node->path & RADIX_TREE_HEIGHT_MASK;
708 if (index > radix_tree_maxindex(height)) 710 if (index > radix_tree_maxindex(height))
709 return 0; 711 return 0;
710 712
@@ -741,7 +743,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
741{ 743{
742 unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK; 744 unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK;
743 struct radix_tree_node *rnode, *node; 745 struct radix_tree_node *rnode, *node;
744 unsigned long index, offset; 746 unsigned long index, offset, height;
745 747
746 if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) 748 if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
747 return NULL; 749 return NULL;
@@ -772,7 +774,8 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
772 return NULL; 774 return NULL;
773 775
774restart: 776restart:
775 shift = (rnode->height - 1) * RADIX_TREE_MAP_SHIFT; 777 height = rnode->path & RADIX_TREE_HEIGHT_MASK;
778 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
776 offset = index >> shift; 779 offset = index >> shift;
777 780
778 /* Index outside of the tree */ 781 /* Index outside of the tree */
@@ -1142,7 +1145,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item,
1142 unsigned int shift, height; 1145 unsigned int shift, height;
1143 unsigned long i; 1146 unsigned long i;
1144 1147
1145 height = slot->height; 1148 height = slot->path & RADIX_TREE_HEIGHT_MASK;
1146 shift = (height-1) * RADIX_TREE_MAP_SHIFT; 1149 shift = (height-1) * RADIX_TREE_MAP_SHIFT;
1147 1150
1148 for ( ; height > 1; height--) { 1151 for ( ; height > 1; height--) {
@@ -1205,7 +1208,8 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
1205 } 1208 }
1206 1209
1207 node = indirect_to_ptr(node); 1210 node = indirect_to_ptr(node);
1208 max_index = radix_tree_maxindex(node->height); 1211 max_index = radix_tree_maxindex(node->path &
1212 RADIX_TREE_HEIGHT_MASK);
1209 if (cur_index > max_index) { 1213 if (cur_index > max_index) {
1210 rcu_read_unlock(); 1214 rcu_read_unlock();
1211 break; 1215 break;
@@ -1301,7 +1305,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1301 * 1305 *
1302 * Returns %true if @node was freed, %false otherwise. 1306 * Returns %true if @node was freed, %false otherwise.
1303 */ 1307 */
1304bool __radix_tree_delete_node(struct radix_tree_root *root, unsigned long index, 1308bool __radix_tree_delete_node(struct radix_tree_root *root,
1305 struct radix_tree_node *node) 1309 struct radix_tree_node *node)
1306{ 1310{
1307 bool deleted = false; 1311 bool deleted = false;
@@ -1320,9 +1324,10 @@ bool __radix_tree_delete_node(struct radix_tree_root *root, unsigned long index,
1320 1324
1321 parent = node->parent; 1325 parent = node->parent;
1322 if (parent) { 1326 if (parent) {
1323 index >>= RADIX_TREE_MAP_SHIFT; 1327 unsigned int offset;
1324 1328
1325 parent->slots[index & RADIX_TREE_MAP_MASK] = NULL; 1329 offset = node->path >> RADIX_TREE_HEIGHT_SHIFT;
1330 parent->slots[offset] = NULL;
1326 parent->count--; 1331 parent->count--;
1327 } else { 1332 } else {
1328 root_tag_clear_all(root); 1333 root_tag_clear_all(root);
@@ -1386,7 +1391,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
1386 node->slots[offset] = NULL; 1391 node->slots[offset] = NULL;
1387 node->count--; 1392 node->count--;
1388 1393
1389 __radix_tree_delete_node(root, index, node); 1394 __radix_tree_delete_node(root, node);
1390 1395
1391 return entry; 1396 return entry;
1392} 1397}
@@ -1419,9 +1424,12 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
1419EXPORT_SYMBOL(radix_tree_tagged); 1424EXPORT_SYMBOL(radix_tree_tagged);
1420 1425
1421static void 1426static void
1422radix_tree_node_ctor(void *node) 1427radix_tree_node_ctor(void *arg)
1423{ 1428{
1424 memset(node, 0, sizeof(struct radix_tree_node)); 1429 struct radix_tree_node *node = arg;
1430
1431 memset(node, 0, sizeof(*node));
1432 INIT_LIST_HEAD(&node->private_list);
1425} 1433}
1426 1434
1427static __init unsigned long __maxindex(unsigned int height) 1435static __init unsigned long __maxindex(unsigned int height)