summaryrefslogtreecommitdiffstats
path: root/mm/workingset.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2016-07-28 18:46:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-28 19:07:41 -0400
commit1e6b10857f91685c60c341703ece4ae9bb775cf3 (patch)
tree32c7b4148a99ffd3af7e1208ad56a07bad1cf3c3 /mm/workingset.c
parentef8f2327996b5c20f11420f64e439e87c7a01604 (diff)
mm, workingset: make working set detection node-aware
Working set and refault detection is still zone-based, fix it. Link: http://lkml.kernel.org/r/1467970510-21195-16-git-send-email-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Rik van Riel <riel@surriel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/workingset.c')
-rw-r--r--mm/workingset.c43
1 files changed, 20 insertions, 23 deletions
diff --git a/mm/workingset.c b/mm/workingset.c
index 2af14bb5a349..69551cfae97b 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -16,7 +16,7 @@
16/* 16/*
17 * Double CLOCK lists 17 * Double CLOCK lists
18 * 18 *
19 * Per zone, two clock lists are maintained for file pages: the 19 * Per node, two clock lists are maintained for file pages: the
20 * inactive and the active list. Freshly faulted pages start out at 20 * inactive and the active list. Freshly faulted pages start out at
21 * the head of the inactive list and page reclaim scans pages from the 21 * the head of the inactive list and page reclaim scans pages from the
22 * tail. Pages that are accessed multiple times on the inactive list 22 * tail. Pages that are accessed multiple times on the inactive list
@@ -141,11 +141,11 @@
141 * 141 *
142 * Implementation 142 * Implementation
143 * 143 *
144 * For each zone's file LRU lists, a counter for inactive evictions 144 * For each node's file LRU lists, a counter for inactive evictions
145 * and activations is maintained (zone->inactive_age). 145 * and activations is maintained (node->inactive_age).
146 * 146 *
147 * On eviction, a snapshot of this counter (along with some bits to 147 * On eviction, a snapshot of this counter (along with some bits to
148 * identify the zone) is stored in the now empty page cache radix tree 148 * identify the node) is stored in the now empty page cache radix tree
149 * slot of the evicted page. This is called a shadow entry. 149 * slot of the evicted page. This is called a shadow entry.
150 * 150 *
151 * On cache misses for which there are shadow entries, an eligible 151 * On cache misses for which there are shadow entries, an eligible
@@ -153,7 +153,7 @@
153 */ 153 */
154 154
155#define EVICTION_SHIFT (RADIX_TREE_EXCEPTIONAL_ENTRY + \ 155#define EVICTION_SHIFT (RADIX_TREE_EXCEPTIONAL_ENTRY + \
156 ZONES_SHIFT + NODES_SHIFT + \ 156 NODES_SHIFT + \
157 MEM_CGROUP_ID_SHIFT) 157 MEM_CGROUP_ID_SHIFT)
158#define EVICTION_MASK (~0UL >> EVICTION_SHIFT) 158#define EVICTION_MASK (~0UL >> EVICTION_SHIFT)
159 159
@@ -167,33 +167,30 @@
167 */ 167 */
168static unsigned int bucket_order __read_mostly; 168static unsigned int bucket_order __read_mostly;
169 169
170static void *pack_shadow(int memcgid, struct zone *zone, unsigned long eviction) 170static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction)
171{ 171{
172 eviction >>= bucket_order; 172 eviction >>= bucket_order;
173 eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid; 173 eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
174 eviction = (eviction << NODES_SHIFT) | zone_to_nid(zone); 174 eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
175 eviction = (eviction << ZONES_SHIFT) | zone_idx(zone);
176 eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT); 175 eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT);
177 176
178 return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY); 177 return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY);
179} 178}
180 179
181static void unpack_shadow(void *shadow, int *memcgidp, struct zone **zonep, 180static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
182 unsigned long *evictionp) 181 unsigned long *evictionp)
183{ 182{
184 unsigned long entry = (unsigned long)shadow; 183 unsigned long entry = (unsigned long)shadow;
185 int memcgid, nid, zid; 184 int memcgid, nid;
186 185
187 entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT; 186 entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT;
188 zid = entry & ((1UL << ZONES_SHIFT) - 1);
189 entry >>= ZONES_SHIFT;
190 nid = entry & ((1UL << NODES_SHIFT) - 1); 187 nid = entry & ((1UL << NODES_SHIFT) - 1);
191 entry >>= NODES_SHIFT; 188 entry >>= NODES_SHIFT;
192 memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1); 189 memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
193 entry >>= MEM_CGROUP_ID_SHIFT; 190 entry >>= MEM_CGROUP_ID_SHIFT;
194 191
195 *memcgidp = memcgid; 192 *memcgidp = memcgid;
196 *zonep = NODE_DATA(nid)->node_zones + zid; 193 *pgdat = NODE_DATA(nid);
197 *evictionp = entry << bucket_order; 194 *evictionp = entry << bucket_order;
198} 195}
199 196
@@ -208,7 +205,7 @@ static void unpack_shadow(void *shadow, int *memcgidp, struct zone **zonep,
208void *workingset_eviction(struct address_space *mapping, struct page *page) 205void *workingset_eviction(struct address_space *mapping, struct page *page)
209{ 206{
210 struct mem_cgroup *memcg = page_memcg(page); 207 struct mem_cgroup *memcg = page_memcg(page);
211 struct zone *zone = page_zone(page); 208 struct pglist_data *pgdat = page_pgdat(page);
212 int memcgid = mem_cgroup_id(memcg); 209 int memcgid = mem_cgroup_id(memcg);
213 unsigned long eviction; 210 unsigned long eviction;
214 struct lruvec *lruvec; 211 struct lruvec *lruvec;
@@ -218,9 +215,9 @@ void *workingset_eviction(struct address_space *mapping, struct page *page)
218 VM_BUG_ON_PAGE(page_count(page), page); 215 VM_BUG_ON_PAGE(page_count(page), page);
219 VM_BUG_ON_PAGE(!PageLocked(page), page); 216 VM_BUG_ON_PAGE(!PageLocked(page), page);
220 217
221 lruvec = mem_cgroup_lruvec(zone->zone_pgdat, memcg); 218 lruvec = mem_cgroup_lruvec(pgdat, memcg);
222 eviction = atomic_long_inc_return(&lruvec->inactive_age); 219 eviction = atomic_long_inc_return(&lruvec->inactive_age);
223 return pack_shadow(memcgid, zone, eviction); 220 return pack_shadow(memcgid, pgdat, eviction);
224} 221}
225 222
226/** 223/**
@@ -228,7 +225,7 @@ void *workingset_eviction(struct address_space *mapping, struct page *page)
228 * @shadow: shadow entry of the evicted page 225 * @shadow: shadow entry of the evicted page
229 * 226 *
230 * Calculates and evaluates the refault distance of the previously 227 * Calculates and evaluates the refault distance of the previously
231 * evicted page in the context of the zone it was allocated in. 228 * evicted page in the context of the node it was allocated in.
232 * 229 *
233 * Returns %true if the page should be activated, %false otherwise. 230 * Returns %true if the page should be activated, %false otherwise.
234 */ 231 */
@@ -240,10 +237,10 @@ bool workingset_refault(void *shadow)
240 unsigned long eviction; 237 unsigned long eviction;
241 struct lruvec *lruvec; 238 struct lruvec *lruvec;
242 unsigned long refault; 239 unsigned long refault;
243 struct zone *zone; 240 struct pglist_data *pgdat;
244 int memcgid; 241 int memcgid;
245 242
246 unpack_shadow(shadow, &memcgid, &zone, &eviction); 243 unpack_shadow(shadow, &memcgid, &pgdat, &eviction);
247 244
248 rcu_read_lock(); 245 rcu_read_lock();
249 /* 246 /*
@@ -267,7 +264,7 @@ bool workingset_refault(void *shadow)
267 rcu_read_unlock(); 264 rcu_read_unlock();
268 return false; 265 return false;
269 } 266 }
270 lruvec = mem_cgroup_lruvec(zone->zone_pgdat, memcg); 267 lruvec = mem_cgroup_lruvec(pgdat, memcg);
271 refault = atomic_long_read(&lruvec->inactive_age); 268 refault = atomic_long_read(&lruvec->inactive_age);
272 active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE); 269 active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE);
273 rcu_read_unlock(); 270 rcu_read_unlock();
@@ -290,10 +287,10 @@ bool workingset_refault(void *shadow)
290 */ 287 */
291 refault_distance = (refault - eviction) & EVICTION_MASK; 288 refault_distance = (refault - eviction) & EVICTION_MASK;
292 289
293 inc_zone_state(zone, WORKINGSET_REFAULT); 290 inc_node_state(pgdat, WORKINGSET_REFAULT);
294 291
295 if (refault_distance <= active_file) { 292 if (refault_distance <= active_file) {
296 inc_zone_state(zone, WORKINGSET_ACTIVATE); 293 inc_node_state(pgdat, WORKINGSET_ACTIVATE);
297 return true; 294 return true;
298 } 295 }
299 return false; 296 return false;
@@ -436,7 +433,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
436 } 433 }
437 } 434 }
438 BUG_ON(node->count); 435 BUG_ON(node->count);
439 inc_zone_state(page_zone(virt_to_page(node)), WORKINGSET_NODERECLAIM); 436 inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM);
440 if (!__radix_tree_delete_node(&mapping->page_tree, node)) 437 if (!__radix_tree_delete_node(&mapping->page_tree, node))
441 BUG(); 438 BUG();
442 439