aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-04-03 17:47:56 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-03 19:21:01 -0400
commit449dd6984d0e47643c04c807f609dd56d48d5bcc (patch)
tree69f4a0a90290b048e63effc617c8ec907e8d6696 /include/linux
parent139e561660fe11e0fc35e142a800df3dd7d03e9d (diff)
mm: keep page cache radix tree nodes in check
Previously, page cache radix tree nodes were freed after reclaim emptied out their page pointers. But now reclaim stores shadow entries in their place, which are only reclaimed when the inodes themselves are reclaimed. This is problematic for bigger files that are still in use after they have a significant amount of their cache reclaimed, without any of those pages actually refaulting. The shadow entries will just sit there and waste memory. In the worst case, the shadow entries will accumulate until the machine runs out of memory. To get this under control, the VM will track radix tree nodes exclusively containing shadow entries on a per-NUMA node list. Per-NUMA rather than global because we expect the radix tree nodes themselves to be allocated node-locally and we want to reduce cross-node references of otherwise independent cache workloads. A simple shrinker will then reclaim these nodes on memory pressure. A few things need to be stored in the radix tree node to implement the shadow node LRU and allow tree deletions coming from the list: 1. There is no index available that would describe the reverse path from the node up to the tree root, which is needed to perform a deletion. To solve this, encode in each node its offset inside the parent. This can be stored in the unused upper bits of the same member that stores the node's height at no extra space cost. 2. The number of shadow entries needs to be counted in addition to the regular entries, to quickly detect when the node is ready to go to the shadow node LRU list. The current entry count is an unsigned int but the maximum number of entries is 64, so a shadow counter can easily be stored in the unused upper bits. 3. Tree modification needs tree lock and tree root, which are located in the address space, so store an address_space backpointer in the node. The parent pointer of the node is in a union with the 2-word rcu_head, so the backpointer comes at no extra cost as well. 4. The node needs to be linked to an LRU list, which requires a list head inside the node. This does increase the size of the node, but it does not change the number of objects that fit into a slab page. [akpm@linux-foundation.org: export the right function] Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Rik van Riel <riel@redhat.com> Reviewed-by: Minchan Kim <minchan@kernel.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Bob Liu <bob.liu@oracle.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Greg Thelen <gthelen@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jan Kara <jack@suse.cz> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Luigi Semenzato <semenzato@google.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Metin Doslu <metin@citusdata.com> Cc: Michel Lespinasse <walken@google.com> Cc: Ozgun Erdogan <ozgun@citusdata.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <klamm@yandex-team.ru> Cc: Ryan Mallon <rmallon@gmail.com> Cc: Tejun Heo <tj@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/list_lru.h8
-rw-r--r--include/linux/mmzone.h1
-rw-r--r--include/linux/radix-tree.h32
-rw-r--r--include/linux/swap.h31
4 files changed, 63 insertions, 9 deletions
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 3ce541753c88..f3434533fbf8 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -13,6 +13,8 @@
13/* list_lru_walk_cb has to always return one of those */ 13/* list_lru_walk_cb has to always return one of those */
14enum lru_status { 14enum lru_status {
15 LRU_REMOVED, /* item removed from list */ 15 LRU_REMOVED, /* item removed from list */
16 LRU_REMOVED_RETRY, /* item removed, but lock has been
17 dropped and reacquired */
16 LRU_ROTATE, /* item referenced, give another pass */ 18 LRU_ROTATE, /* item referenced, give another pass */
17 LRU_SKIP, /* item cannot be locked, skip */ 19 LRU_SKIP, /* item cannot be locked, skip */
18 LRU_RETRY, /* item not freeable. May drop the lock 20 LRU_RETRY, /* item not freeable. May drop the lock
@@ -32,7 +34,11 @@ struct list_lru {
32}; 34};
33 35
34void list_lru_destroy(struct list_lru *lru); 36void list_lru_destroy(struct list_lru *lru);
35int list_lru_init(struct list_lru *lru); 37int list_lru_init_key(struct list_lru *lru, struct lock_class_key *key);
38static inline int list_lru_init(struct list_lru *lru)
39{
40 return list_lru_init_key(lru, NULL);
41}
36 42
37/** 43/**
38 * list_lru_add: add an element to the lru list's tail 44 * list_lru_add: add an element to the lru list's tail
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f25db1d74a21..fac5509c18f0 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -144,6 +144,7 @@ enum zone_stat_item {
144#endif 144#endif
145 WORKINGSET_REFAULT, 145 WORKINGSET_REFAULT,
146 WORKINGSET_ACTIVATE, 146 WORKINGSET_ACTIVATE,
147 WORKINGSET_NODERECLAIM,
147 NR_ANON_TRANSPARENT_HUGEPAGES, 148 NR_ANON_TRANSPARENT_HUGEPAGES,
148 NR_FREE_CMA_PAGES, 149 NR_FREE_CMA_PAGES,
149 NR_VM_ZONE_STAT_ITEMS }; 150 NR_VM_ZONE_STAT_ITEMS };
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 13636c40bc42..33170dbd9db4 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -72,21 +72,37 @@ static inline int radix_tree_is_indirect_ptr(void *ptr)
72#define RADIX_TREE_TAG_LONGS \ 72#define RADIX_TREE_TAG_LONGS \
73 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) 73 ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
74 74
75#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
76#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
77 RADIX_TREE_MAP_SHIFT))
78
79/* Height component in node->path */
80#define RADIX_TREE_HEIGHT_SHIFT (RADIX_TREE_MAX_PATH + 1)
81#define RADIX_TREE_HEIGHT_MASK ((1UL << RADIX_TREE_HEIGHT_SHIFT) - 1)
82
83/* Internally used bits of node->count */
84#define RADIX_TREE_COUNT_SHIFT (RADIX_TREE_MAP_SHIFT + 1)
85#define RADIX_TREE_COUNT_MASK ((1UL << RADIX_TREE_COUNT_SHIFT) - 1)
86
75struct radix_tree_node { 87struct radix_tree_node {
76 unsigned int height; /* Height from the bottom */ 88 unsigned int path; /* Offset in parent & height from the bottom */
77 unsigned int count; 89 unsigned int count;
78 union { 90 union {
79 struct radix_tree_node *parent; /* Used when ascending tree */ 91 struct {
80 struct rcu_head rcu_head; /* Used when freeing node */ 92 /* Used when ascending tree */
93 struct radix_tree_node *parent;
94 /* For tree user */
95 void *private_data;
96 };
97 /* Used when freeing node */
98 struct rcu_head rcu_head;
81 }; 99 };
100 /* For tree user */
101 struct list_head private_list;
82 void __rcu *slots[RADIX_TREE_MAP_SIZE]; 102 void __rcu *slots[RADIX_TREE_MAP_SIZE];
83 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 103 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
84}; 104};
85 105
86#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
87#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
88 RADIX_TREE_MAP_SHIFT))
89
90/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */ 106/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */
91struct radix_tree_root { 107struct radix_tree_root {
92 unsigned int height; 108 unsigned int height;
@@ -251,7 +267,7 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
251 struct radix_tree_node **nodep, void ***slotp); 267 struct radix_tree_node **nodep, void ***slotp);
252void *radix_tree_lookup(struct radix_tree_root *, unsigned long); 268void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
253void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); 269void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
254bool __radix_tree_delete_node(struct radix_tree_root *root, unsigned long index, 270bool __radix_tree_delete_node(struct radix_tree_root *root,
255 struct radix_tree_node *node); 271 struct radix_tree_node *node);
256void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); 272void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
257void *radix_tree_delete(struct radix_tree_root *, unsigned long); 273void *radix_tree_delete(struct radix_tree_root *, unsigned long);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index b83cf61403ed..350711560753 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -264,6 +264,37 @@ struct swap_list_t {
264void *workingset_eviction(struct address_space *mapping, struct page *page); 264void *workingset_eviction(struct address_space *mapping, struct page *page);
265bool workingset_refault(void *shadow); 265bool workingset_refault(void *shadow);
266void workingset_activation(struct page *page); 266void workingset_activation(struct page *page);
267extern struct list_lru workingset_shadow_nodes;
268
269static inline unsigned int workingset_node_pages(struct radix_tree_node *node)
270{
271 return node->count & RADIX_TREE_COUNT_MASK;
272}
273
274static inline void workingset_node_pages_inc(struct radix_tree_node *node)
275{
276 node->count++;
277}
278
279static inline void workingset_node_pages_dec(struct radix_tree_node *node)
280{
281 node->count--;
282}
283
284static inline unsigned int workingset_node_shadows(struct radix_tree_node *node)
285{
286 return node->count >> RADIX_TREE_COUNT_SHIFT;
287}
288
289static inline void workingset_node_shadows_inc(struct radix_tree_node *node)
290{
291 node->count += 1U << RADIX_TREE_COUNT_SHIFT;
292}
293
294static inline void workingset_node_shadows_dec(struct radix_tree_node *node)
295{
296 node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
297}
267 298
268/* linux/mm/page_alloc.c */ 299/* linux/mm/page_alloc.c */
269extern unsigned long totalram_pages; 300extern unsigned long totalram_pages;