diff options
author | Mel Gorman <mgorman@techsingularity.net> | 2016-07-28 18:46:08 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-28 19:07:41 -0400 |
commit | 1e6b10857f91685c60c341703ece4ae9bb775cf3 (patch) | |
tree | 32c7b4148a99ffd3af7e1208ad56a07bad1cf3c3 /mm/workingset.c | |
parent | ef8f2327996b5c20f11420f64e439e87c7a01604 (diff) |
mm, workingset: make working set detection node-aware
Working set and refault detection is still zone-based, fix it.
Link: http://lkml.kernel.org/r/1467970510-21195-16-git-send-email-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/workingset.c')
-rw-r--r-- | mm/workingset.c | 43 |
1 files changed, 20 insertions, 23 deletions
diff --git a/mm/workingset.c b/mm/workingset.c index 2af14bb5a349..69551cfae97b 100644 --- a/mm/workingset.c +++ b/mm/workingset.c | |||
@@ -16,7 +16,7 @@ | |||
16 | /* | 16 | /* |
17 | * Double CLOCK lists | 17 | * Double CLOCK lists |
18 | * | 18 | * |
19 | * Per zone, two clock lists are maintained for file pages: the | 19 | * Per node, two clock lists are maintained for file pages: the |
20 | * inactive and the active list. Freshly faulted pages start out at | 20 | * inactive and the active list. Freshly faulted pages start out at |
21 | * the head of the inactive list and page reclaim scans pages from the | 21 | * the head of the inactive list and page reclaim scans pages from the |
22 | * tail. Pages that are accessed multiple times on the inactive list | 22 | * tail. Pages that are accessed multiple times on the inactive list |
@@ -141,11 +141,11 @@ | |||
141 | * | 141 | * |
142 | * Implementation | 142 | * Implementation |
143 | * | 143 | * |
144 | * For each zone's file LRU lists, a counter for inactive evictions | 144 | * For each node's file LRU lists, a counter for inactive evictions |
145 | * and activations is maintained (zone->inactive_age). | 145 | * and activations is maintained (node->inactive_age). |
146 | * | 146 | * |
147 | * On eviction, a snapshot of this counter (along with some bits to | 147 | * On eviction, a snapshot of this counter (along with some bits to |
148 | * identify the zone) is stored in the now empty page cache radix tree | 148 | * identify the node) is stored in the now empty page cache radix tree |
149 | * slot of the evicted page. This is called a shadow entry. | 149 | * slot of the evicted page. This is called a shadow entry. |
150 | * | 150 | * |
151 | * On cache misses for which there are shadow entries, an eligible | 151 | * On cache misses for which there are shadow entries, an eligible |
@@ -153,7 +153,7 @@ | |||
153 | */ | 153 | */ |
154 | 154 | ||
155 | #define EVICTION_SHIFT (RADIX_TREE_EXCEPTIONAL_ENTRY + \ | 155 | #define EVICTION_SHIFT (RADIX_TREE_EXCEPTIONAL_ENTRY + \ |
156 | ZONES_SHIFT + NODES_SHIFT + \ | 156 | NODES_SHIFT + \ |
157 | MEM_CGROUP_ID_SHIFT) | 157 | MEM_CGROUP_ID_SHIFT) |
158 | #define EVICTION_MASK (~0UL >> EVICTION_SHIFT) | 158 | #define EVICTION_MASK (~0UL >> EVICTION_SHIFT) |
159 | 159 | ||
@@ -167,33 +167,30 @@ | |||
167 | */ | 167 | */ |
168 | static unsigned int bucket_order __read_mostly; | 168 | static unsigned int bucket_order __read_mostly; |
169 | 169 | ||
170 | static void *pack_shadow(int memcgid, struct zone *zone, unsigned long eviction) | 170 | static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction) |
171 | { | 171 | { |
172 | eviction >>= bucket_order; | 172 | eviction >>= bucket_order; |
173 | eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid; | 173 | eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid; |
174 | eviction = (eviction << NODES_SHIFT) | zone_to_nid(zone); | 174 | eviction = (eviction << NODES_SHIFT) | pgdat->node_id; |
175 | eviction = (eviction << ZONES_SHIFT) | zone_idx(zone); | ||
176 | eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT); | 175 | eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT); |
177 | 176 | ||
178 | return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY); | 177 | return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY); |
179 | } | 178 | } |
180 | 179 | ||
181 | static void unpack_shadow(void *shadow, int *memcgidp, struct zone **zonep, | 180 | static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat, |
182 | unsigned long *evictionp) | 181 | unsigned long *evictionp) |
183 | { | 182 | { |
184 | unsigned long entry = (unsigned long)shadow; | 183 | unsigned long entry = (unsigned long)shadow; |
185 | int memcgid, nid, zid; | 184 | int memcgid, nid; |
186 | 185 | ||
187 | entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT; | 186 | entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT; |
188 | zid = entry & ((1UL << ZONES_SHIFT) - 1); | ||
189 | entry >>= ZONES_SHIFT; | ||
190 | nid = entry & ((1UL << NODES_SHIFT) - 1); | 187 | nid = entry & ((1UL << NODES_SHIFT) - 1); |
191 | entry >>= NODES_SHIFT; | 188 | entry >>= NODES_SHIFT; |
192 | memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1); | 189 | memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1); |
193 | entry >>= MEM_CGROUP_ID_SHIFT; | 190 | entry >>= MEM_CGROUP_ID_SHIFT; |
194 | 191 | ||
195 | *memcgidp = memcgid; | 192 | *memcgidp = memcgid; |
196 | *zonep = NODE_DATA(nid)->node_zones + zid; | 193 | *pgdat = NODE_DATA(nid); |
197 | *evictionp = entry << bucket_order; | 194 | *evictionp = entry << bucket_order; |
198 | } | 195 | } |
199 | 196 | ||
@@ -208,7 +205,7 @@ static void unpack_shadow(void *shadow, int *memcgidp, struct zone **zonep, | |||
208 | void *workingset_eviction(struct address_space *mapping, struct page *page) | 205 | void *workingset_eviction(struct address_space *mapping, struct page *page) |
209 | { | 206 | { |
210 | struct mem_cgroup *memcg = page_memcg(page); | 207 | struct mem_cgroup *memcg = page_memcg(page); |
211 | struct zone *zone = page_zone(page); | 208 | struct pglist_data *pgdat = page_pgdat(page); |
212 | int memcgid = mem_cgroup_id(memcg); | 209 | int memcgid = mem_cgroup_id(memcg); |
213 | unsigned long eviction; | 210 | unsigned long eviction; |
214 | struct lruvec *lruvec; | 211 | struct lruvec *lruvec; |
@@ -218,9 +215,9 @@ void *workingset_eviction(struct address_space *mapping, struct page *page) | |||
218 | VM_BUG_ON_PAGE(page_count(page), page); | 215 | VM_BUG_ON_PAGE(page_count(page), page); |
219 | VM_BUG_ON_PAGE(!PageLocked(page), page); | 216 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
220 | 217 | ||
221 | lruvec = mem_cgroup_lruvec(zone->zone_pgdat, memcg); | 218 | lruvec = mem_cgroup_lruvec(pgdat, memcg); |
222 | eviction = atomic_long_inc_return(&lruvec->inactive_age); | 219 | eviction = atomic_long_inc_return(&lruvec->inactive_age); |
223 | return pack_shadow(memcgid, zone, eviction); | 220 | return pack_shadow(memcgid, pgdat, eviction); |
224 | } | 221 | } |
225 | 222 | ||
226 | /** | 223 | /** |
@@ -228,7 +225,7 @@ void *workingset_eviction(struct address_space *mapping, struct page *page) | |||
228 | * @shadow: shadow entry of the evicted page | 225 | * @shadow: shadow entry of the evicted page |
229 | * | 226 | * |
230 | * Calculates and evaluates the refault distance of the previously | 227 | * Calculates and evaluates the refault distance of the previously |
231 | * evicted page in the context of the zone it was allocated in. | 228 | * evicted page in the context of the node it was allocated in. |
232 | * | 229 | * |
233 | * Returns %true if the page should be activated, %false otherwise. | 230 | * Returns %true if the page should be activated, %false otherwise. |
234 | */ | 231 | */ |
@@ -240,10 +237,10 @@ bool workingset_refault(void *shadow) | |||
240 | unsigned long eviction; | 237 | unsigned long eviction; |
241 | struct lruvec *lruvec; | 238 | struct lruvec *lruvec; |
242 | unsigned long refault; | 239 | unsigned long refault; |
243 | struct zone *zone; | 240 | struct pglist_data *pgdat; |
244 | int memcgid; | 241 | int memcgid; |
245 | 242 | ||
246 | unpack_shadow(shadow, &memcgid, &zone, &eviction); | 243 | unpack_shadow(shadow, &memcgid, &pgdat, &eviction); |
247 | 244 | ||
248 | rcu_read_lock(); | 245 | rcu_read_lock(); |
249 | /* | 246 | /* |
@@ -267,7 +264,7 @@ bool workingset_refault(void *shadow) | |||
267 | rcu_read_unlock(); | 264 | rcu_read_unlock(); |
268 | return false; | 265 | return false; |
269 | } | 266 | } |
270 | lruvec = mem_cgroup_lruvec(zone->zone_pgdat, memcg); | 267 | lruvec = mem_cgroup_lruvec(pgdat, memcg); |
271 | refault = atomic_long_read(&lruvec->inactive_age); | 268 | refault = atomic_long_read(&lruvec->inactive_age); |
272 | active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE); | 269 | active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE); |
273 | rcu_read_unlock(); | 270 | rcu_read_unlock(); |
@@ -290,10 +287,10 @@ bool workingset_refault(void *shadow) | |||
290 | */ | 287 | */ |
291 | refault_distance = (refault - eviction) & EVICTION_MASK; | 288 | refault_distance = (refault - eviction) & EVICTION_MASK; |
292 | 289 | ||
293 | inc_zone_state(zone, WORKINGSET_REFAULT); | 290 | inc_node_state(pgdat, WORKINGSET_REFAULT); |
294 | 291 | ||
295 | if (refault_distance <= active_file) { | 292 | if (refault_distance <= active_file) { |
296 | inc_zone_state(zone, WORKINGSET_ACTIVATE); | 293 | inc_node_state(pgdat, WORKINGSET_ACTIVATE); |
297 | return true; | 294 | return true; |
298 | } | 295 | } |
299 | return false; | 296 | return false; |
@@ -436,7 +433,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, | |||
436 | } | 433 | } |
437 | } | 434 | } |
438 | BUG_ON(node->count); | 435 | BUG_ON(node->count); |
439 | inc_zone_state(page_zone(virt_to_page(node)), WORKINGSET_NODERECLAIM); | 436 | inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM); |
440 | if (!__radix_tree_delete_node(&mapping->page_tree, node)) | 437 | if (!__radix_tree_delete_node(&mapping->page_tree, node)) |
441 | BUG(); | 438 | BUG(); |
442 | 439 | ||