diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-10-28 11:54:49 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-28 11:54:49 -0400 |
commit | d1a76187a5be4f89c6cb19d800cb5fb7aac735c5 (patch) | |
tree | 2fac3ffbfffc7560eeef8364b541d0d7a0057920 /mm/memcontrol.c | |
parent | c7e78cff6b7518212247fb20b1dc6411540dc9af (diff) | |
parent | 0173a3265b228da319ceb9c1ec6a5682fd1b2d92 (diff) |
Merge commit 'v2.6.28-rc2' into core/locking
Conflicts:
arch/um/include/asm/system.h
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 481 |
1 files changed, 231 insertions, 250 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0f1f7a7374ba..866dcc7eeb0c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -32,11 +32,12 @@ | |||
32 | #include <linux/fs.h> | 32 | #include <linux/fs.h> |
33 | #include <linux/seq_file.h> | 33 | #include <linux/seq_file.h> |
34 | #include <linux/vmalloc.h> | 34 | #include <linux/vmalloc.h> |
35 | #include <linux/mm_inline.h> | ||
36 | #include <linux/page_cgroup.h> | ||
35 | 37 | ||
36 | #include <asm/uaccess.h> | 38 | #include <asm/uaccess.h> |
37 | 39 | ||
38 | struct cgroup_subsys mem_cgroup_subsys __read_mostly; | 40 | struct cgroup_subsys mem_cgroup_subsys __read_mostly; |
39 | static struct kmem_cache *page_cgroup_cache __read_mostly; | ||
40 | #define MEM_CGROUP_RECLAIM_RETRIES 5 | 41 | #define MEM_CGROUP_RECLAIM_RETRIES 5 |
41 | 42 | ||
42 | /* | 43 | /* |
@@ -65,11 +66,10 @@ struct mem_cgroup_stat { | |||
65 | /* | 66 | /* |
66 | * For accounting under irq disable, no need for increment preempt count. | 67 | * For accounting under irq disable, no need for increment preempt count. |
67 | */ | 68 | */ |
68 | static void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat *stat, | 69 | static inline void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat_cpu *stat, |
69 | enum mem_cgroup_stat_index idx, int val) | 70 | enum mem_cgroup_stat_index idx, int val) |
70 | { | 71 | { |
71 | int cpu = smp_processor_id(); | 72 | stat->count[idx] += val; |
72 | stat->cpustat[cpu].count[idx] += val; | ||
73 | } | 73 | } |
74 | 74 | ||
75 | static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat, | 75 | static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat, |
@@ -85,22 +85,13 @@ static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat, | |||
85 | /* | 85 | /* |
86 | * per-zone information in memory controller. | 86 | * per-zone information in memory controller. |
87 | */ | 87 | */ |
88 | |||
89 | enum mem_cgroup_zstat_index { | ||
90 | MEM_CGROUP_ZSTAT_ACTIVE, | ||
91 | MEM_CGROUP_ZSTAT_INACTIVE, | ||
92 | |||
93 | NR_MEM_CGROUP_ZSTAT, | ||
94 | }; | ||
95 | |||
96 | struct mem_cgroup_per_zone { | 88 | struct mem_cgroup_per_zone { |
97 | /* | 89 | /* |
98 | * spin_lock to protect the per cgroup LRU | 90 | * spin_lock to protect the per cgroup LRU |
99 | */ | 91 | */ |
100 | spinlock_t lru_lock; | 92 | spinlock_t lru_lock; |
101 | struct list_head active_list; | 93 | struct list_head lists[NR_LRU_LISTS]; |
102 | struct list_head inactive_list; | 94 | unsigned long count[NR_LRU_LISTS]; |
103 | unsigned long count[NR_MEM_CGROUP_ZSTAT]; | ||
104 | }; | 95 | }; |
105 | /* Macro for accessing counter */ | 96 | /* Macro for accessing counter */ |
106 | #define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)]) | 97 | #define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)]) |
@@ -144,69 +135,52 @@ struct mem_cgroup { | |||
144 | }; | 135 | }; |
145 | static struct mem_cgroup init_mem_cgroup; | 136 | static struct mem_cgroup init_mem_cgroup; |
146 | 137 | ||
147 | /* | ||
148 | * We use the lower bit of the page->page_cgroup pointer as a bit spin | ||
149 | * lock. We need to ensure that page->page_cgroup is at least two | ||
150 | * byte aligned (based on comments from Nick Piggin). But since | ||
151 | * bit_spin_lock doesn't actually set that lock bit in a non-debug | ||
152 | * uniprocessor kernel, we should avoid setting it here too. | ||
153 | */ | ||
154 | #define PAGE_CGROUP_LOCK_BIT 0x0 | ||
155 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) | ||
156 | #define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) | ||
157 | #else | ||
158 | #define PAGE_CGROUP_LOCK 0x0 | ||
159 | #endif | ||
160 | |||
161 | /* | ||
162 | * A page_cgroup page is associated with every page descriptor. The | ||
163 | * page_cgroup helps us identify information about the cgroup | ||
164 | */ | ||
165 | struct page_cgroup { | ||
166 | struct list_head lru; /* per cgroup LRU list */ | ||
167 | struct page *page; | ||
168 | struct mem_cgroup *mem_cgroup; | ||
169 | int flags; | ||
170 | }; | ||
171 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ | ||
172 | #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ | ||
173 | |||
174 | static int page_cgroup_nid(struct page_cgroup *pc) | ||
175 | { | ||
176 | return page_to_nid(pc->page); | ||
177 | } | ||
178 | |||
179 | static enum zone_type page_cgroup_zid(struct page_cgroup *pc) | ||
180 | { | ||
181 | return page_zonenum(pc->page); | ||
182 | } | ||
183 | |||
184 | enum charge_type { | 138 | enum charge_type { |
185 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, | 139 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, |
186 | MEM_CGROUP_CHARGE_TYPE_MAPPED, | 140 | MEM_CGROUP_CHARGE_TYPE_MAPPED, |
141 | MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */ | ||
187 | MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ | 142 | MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ |
143 | NR_CHARGE_TYPE, | ||
144 | }; | ||
145 | |||
146 | /* only for here (for easy reading.) */ | ||
147 | #define PCGF_CACHE (1UL << PCG_CACHE) | ||
148 | #define PCGF_USED (1UL << PCG_USED) | ||
149 | #define PCGF_ACTIVE (1UL << PCG_ACTIVE) | ||
150 | #define PCGF_LOCK (1UL << PCG_LOCK) | ||
151 | #define PCGF_FILE (1UL << PCG_FILE) | ||
152 | static const unsigned long | ||
153 | pcg_default_flags[NR_CHARGE_TYPE] = { | ||
154 | PCGF_CACHE | PCGF_FILE | PCGF_USED | PCGF_LOCK, /* File Cache */ | ||
155 | PCGF_ACTIVE | PCGF_USED | PCGF_LOCK, /* Anon */ | ||
156 | PCGF_ACTIVE | PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */ | ||
157 | 0, /* FORCE */ | ||
188 | }; | 158 | }; |
189 | 159 | ||
190 | /* | 160 | /* |
191 | * Always modified under lru lock. Then, not necessary to preempt_disable() | 161 | * Always modified under lru lock. Then, not necessary to preempt_disable() |
192 | */ | 162 | */ |
193 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags, | 163 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, |
194 | bool charge) | 164 | struct page_cgroup *pc, |
165 | bool charge) | ||
195 | { | 166 | { |
196 | int val = (charge)? 1 : -1; | 167 | int val = (charge)? 1 : -1; |
197 | struct mem_cgroup_stat *stat = &mem->stat; | 168 | struct mem_cgroup_stat *stat = &mem->stat; |
169 | struct mem_cgroup_stat_cpu *cpustat; | ||
198 | 170 | ||
199 | VM_BUG_ON(!irqs_disabled()); | 171 | VM_BUG_ON(!irqs_disabled()); |
200 | if (flags & PAGE_CGROUP_FLAG_CACHE) | 172 | |
201 | __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val); | 173 | cpustat = &stat->cpustat[smp_processor_id()]; |
174 | if (PageCgroupCache(pc)) | ||
175 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val); | ||
202 | else | 176 | else |
203 | __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val); | 177 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_RSS, val); |
204 | 178 | ||
205 | if (charge) | 179 | if (charge) |
206 | __mem_cgroup_stat_add_safe(stat, | 180 | __mem_cgroup_stat_add_safe(cpustat, |
207 | MEM_CGROUP_STAT_PGPGIN_COUNT, 1); | 181 | MEM_CGROUP_STAT_PGPGIN_COUNT, 1); |
208 | else | 182 | else |
209 | __mem_cgroup_stat_add_safe(stat, | 183 | __mem_cgroup_stat_add_safe(cpustat, |
210 | MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); | 184 | MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); |
211 | } | 185 | } |
212 | 186 | ||
@@ -227,7 +201,7 @@ page_cgroup_zoneinfo(struct page_cgroup *pc) | |||
227 | } | 201 | } |
228 | 202 | ||
229 | static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem, | 203 | static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem, |
230 | enum mem_cgroup_zstat_index idx) | 204 | enum lru_list idx) |
231 | { | 205 | { |
232 | int nid, zid; | 206 | int nid, zid; |
233 | struct mem_cgroup_per_zone *mz; | 207 | struct mem_cgroup_per_zone *mz; |
@@ -250,89 +224,89 @@ static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) | |||
250 | 224 | ||
251 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) | 225 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) |
252 | { | 226 | { |
227 | /* | ||
228 | * mm_update_next_owner() may clear mm->owner to NULL | ||
229 | * if it races with swapoff, page migration, etc. | ||
230 | * So this can be called with p == NULL. | ||
231 | */ | ||
232 | if (unlikely(!p)) | ||
233 | return NULL; | ||
234 | |||
253 | return container_of(task_subsys_state(p, mem_cgroup_subsys_id), | 235 | return container_of(task_subsys_state(p, mem_cgroup_subsys_id), |
254 | struct mem_cgroup, css); | 236 | struct mem_cgroup, css); |
255 | } | 237 | } |
256 | 238 | ||
257 | static inline int page_cgroup_locked(struct page *page) | ||
258 | { | ||
259 | return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | ||
260 | } | ||
261 | |||
262 | static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) | ||
263 | { | ||
264 | VM_BUG_ON(!page_cgroup_locked(page)); | ||
265 | page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK); | ||
266 | } | ||
267 | |||
268 | struct page_cgroup *page_get_page_cgroup(struct page *page) | ||
269 | { | ||
270 | return (struct page_cgroup *) (page->page_cgroup & ~PAGE_CGROUP_LOCK); | ||
271 | } | ||
272 | |||
273 | static void lock_page_cgroup(struct page *page) | ||
274 | { | ||
275 | bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | ||
276 | } | ||
277 | |||
278 | static int try_lock_page_cgroup(struct page *page) | ||
279 | { | ||
280 | return bit_spin_trylock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | ||
281 | } | ||
282 | |||
283 | static void unlock_page_cgroup(struct page *page) | ||
284 | { | ||
285 | bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | ||
286 | } | ||
287 | |||
288 | static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, | 239 | static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, |
289 | struct page_cgroup *pc) | 240 | struct page_cgroup *pc) |
290 | { | 241 | { |
291 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; | 242 | int lru = LRU_BASE; |
243 | |||
244 | if (PageCgroupUnevictable(pc)) | ||
245 | lru = LRU_UNEVICTABLE; | ||
246 | else { | ||
247 | if (PageCgroupActive(pc)) | ||
248 | lru += LRU_ACTIVE; | ||
249 | if (PageCgroupFile(pc)) | ||
250 | lru += LRU_FILE; | ||
251 | } | ||
292 | 252 | ||
293 | if (from) | 253 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; |
294 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1; | ||
295 | else | ||
296 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; | ||
297 | 254 | ||
298 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false); | 255 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc, false); |
299 | list_del(&pc->lru); | 256 | list_del(&pc->lru); |
300 | } | 257 | } |
301 | 258 | ||
302 | static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, | 259 | static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, |
303 | struct page_cgroup *pc) | 260 | struct page_cgroup *pc) |
304 | { | 261 | { |
305 | int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; | 262 | int lru = LRU_BASE; |
306 | 263 | ||
307 | if (!to) { | 264 | if (PageCgroupUnevictable(pc)) |
308 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1; | 265 | lru = LRU_UNEVICTABLE; |
309 | list_add(&pc->lru, &mz->inactive_list); | 266 | else { |
310 | } else { | 267 | if (PageCgroupActive(pc)) |
311 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1; | 268 | lru += LRU_ACTIVE; |
312 | list_add(&pc->lru, &mz->active_list); | 269 | if (PageCgroupFile(pc)) |
270 | lru += LRU_FILE; | ||
313 | } | 271 | } |
314 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true); | 272 | |
273 | MEM_CGROUP_ZSTAT(mz, lru) += 1; | ||
274 | list_add(&pc->lru, &mz->lists[lru]); | ||
275 | |||
276 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc, true); | ||
315 | } | 277 | } |
316 | 278 | ||
317 | static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) | 279 | static void __mem_cgroup_move_lists(struct page_cgroup *pc, enum lru_list lru) |
318 | { | 280 | { |
319 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; | ||
320 | struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); | 281 | struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); |
282 | int active = PageCgroupActive(pc); | ||
283 | int file = PageCgroupFile(pc); | ||
284 | int unevictable = PageCgroupUnevictable(pc); | ||
285 | enum lru_list from = unevictable ? LRU_UNEVICTABLE : | ||
286 | (LRU_FILE * !!file + !!active); | ||
321 | 287 | ||
322 | if (from) | 288 | if (lru == from) |
323 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1; | 289 | return; |
324 | else | ||
325 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; | ||
326 | 290 | ||
327 | if (active) { | 291 | MEM_CGROUP_ZSTAT(mz, from) -= 1; |
328 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1; | 292 | /* |
329 | pc->flags |= PAGE_CGROUP_FLAG_ACTIVE; | 293 | * However this is done under mz->lru_lock, another flags, which |
330 | list_move(&pc->lru, &mz->active_list); | 294 | * are not related to LRU, will be modified from out-of-lock. |
295 | * We have to use atomic set/clear flags. | ||
296 | */ | ||
297 | if (is_unevictable_lru(lru)) { | ||
298 | ClearPageCgroupActive(pc); | ||
299 | SetPageCgroupUnevictable(pc); | ||
331 | } else { | 300 | } else { |
332 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1; | 301 | if (is_active_lru(lru)) |
333 | pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE; | 302 | SetPageCgroupActive(pc); |
334 | list_move(&pc->lru, &mz->inactive_list); | 303 | else |
304 | ClearPageCgroupActive(pc); | ||
305 | ClearPageCgroupUnevictable(pc); | ||
335 | } | 306 | } |
307 | |||
308 | MEM_CGROUP_ZSTAT(mz, lru) += 1; | ||
309 | list_move(&pc->lru, &mz->lists[lru]); | ||
336 | } | 310 | } |
337 | 311 | ||
338 | int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) | 312 | int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) |
@@ -348,7 +322,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) | |||
348 | /* | 322 | /* |
349 | * This routine assumes that the appropriate zone's lru lock is already held | 323 | * This routine assumes that the appropriate zone's lru lock is already held |
350 | */ | 324 | */ |
351 | void mem_cgroup_move_lists(struct page *page, bool active) | 325 | void mem_cgroup_move_lists(struct page *page, enum lru_list lru) |
352 | { | 326 | { |
353 | struct page_cgroup *pc; | 327 | struct page_cgroup *pc; |
354 | struct mem_cgroup_per_zone *mz; | 328 | struct mem_cgroup_per_zone *mz; |
@@ -364,17 +338,16 @@ void mem_cgroup_move_lists(struct page *page, bool active) | |||
364 | * safely get to page_cgroup without it, so just try_lock it: | 338 | * safely get to page_cgroup without it, so just try_lock it: |
365 | * mem_cgroup_isolate_pages allows for page left on wrong list. | 339 | * mem_cgroup_isolate_pages allows for page left on wrong list. |
366 | */ | 340 | */ |
367 | if (!try_lock_page_cgroup(page)) | 341 | pc = lookup_page_cgroup(page); |
342 | if (!trylock_page_cgroup(pc)) | ||
368 | return; | 343 | return; |
369 | 344 | if (pc && PageCgroupUsed(pc)) { | |
370 | pc = page_get_page_cgroup(page); | ||
371 | if (pc) { | ||
372 | mz = page_cgroup_zoneinfo(pc); | 345 | mz = page_cgroup_zoneinfo(pc); |
373 | spin_lock_irqsave(&mz->lru_lock, flags); | 346 | spin_lock_irqsave(&mz->lru_lock, flags); |
374 | __mem_cgroup_move_lists(pc, active); | 347 | __mem_cgroup_move_lists(pc, lru); |
375 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 348 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
376 | } | 349 | } |
377 | unlock_page_cgroup(page); | 350 | unlock_page_cgroup(pc); |
378 | } | 351 | } |
379 | 352 | ||
380 | /* | 353 | /* |
@@ -395,21 +368,6 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem) | |||
395 | } | 368 | } |
396 | 369 | ||
397 | /* | 370 | /* |
398 | * This function is called from vmscan.c. In page reclaiming loop. balance | ||
399 | * between active and inactive list is calculated. For memory controller | ||
400 | * page reclaiming, we should use using mem_cgroup's imbalance rather than | ||
401 | * zone's global lru imbalance. | ||
402 | */ | ||
403 | long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem) | ||
404 | { | ||
405 | unsigned long active, inactive; | ||
406 | /* active and inactive are the number of pages. 'long' is ok.*/ | ||
407 | active = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_ACTIVE); | ||
408 | inactive = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_INACTIVE); | ||
409 | return (long) (active / (inactive + 1)); | ||
410 | } | ||
411 | |||
412 | /* | ||
413 | * prev_priority control...this will be used in memory reclaim path. | 371 | * prev_priority control...this will be used in memory reclaim path. |
414 | */ | 372 | */ |
415 | int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) | 373 | int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem) |
@@ -436,28 +394,17 @@ void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority) | |||
436 | * (see include/linux/mmzone.h) | 394 | * (see include/linux/mmzone.h) |
437 | */ | 395 | */ |
438 | 396 | ||
439 | long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem, | 397 | long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, |
440 | struct zone *zone, int priority) | 398 | int priority, enum lru_list lru) |
441 | { | 399 | { |
442 | long nr_active; | 400 | long nr_pages; |
443 | int nid = zone->zone_pgdat->node_id; | 401 | int nid = zone->zone_pgdat->node_id; |
444 | int zid = zone_idx(zone); | 402 | int zid = zone_idx(zone); |
445 | struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); | 403 | struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); |
446 | 404 | ||
447 | nr_active = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE); | 405 | nr_pages = MEM_CGROUP_ZSTAT(mz, lru); |
448 | return (nr_active >> priority); | ||
449 | } | ||
450 | |||
451 | long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, | ||
452 | struct zone *zone, int priority) | ||
453 | { | ||
454 | long nr_inactive; | ||
455 | int nid = zone->zone_pgdat->node_id; | ||
456 | int zid = zone_idx(zone); | ||
457 | struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); | ||
458 | 406 | ||
459 | nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE); | 407 | return (nr_pages >> priority); |
460 | return (nr_inactive >> priority); | ||
461 | } | 408 | } |
462 | 409 | ||
463 | unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | 410 | unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, |
@@ -465,7 +412,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
465 | unsigned long *scanned, int order, | 412 | unsigned long *scanned, int order, |
466 | int mode, struct zone *z, | 413 | int mode, struct zone *z, |
467 | struct mem_cgroup *mem_cont, | 414 | struct mem_cgroup *mem_cont, |
468 | int active) | 415 | int active, int file) |
469 | { | 416 | { |
470 | unsigned long nr_taken = 0; | 417 | unsigned long nr_taken = 0; |
471 | struct page *page; | 418 | struct page *page; |
@@ -476,38 +423,38 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
476 | int nid = z->zone_pgdat->node_id; | 423 | int nid = z->zone_pgdat->node_id; |
477 | int zid = zone_idx(z); | 424 | int zid = zone_idx(z); |
478 | struct mem_cgroup_per_zone *mz; | 425 | struct mem_cgroup_per_zone *mz; |
426 | int lru = LRU_FILE * !!file + !!active; | ||
479 | 427 | ||
480 | BUG_ON(!mem_cont); | 428 | BUG_ON(!mem_cont); |
481 | mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); | 429 | mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); |
482 | if (active) | 430 | src = &mz->lists[lru]; |
483 | src = &mz->active_list; | ||
484 | else | ||
485 | src = &mz->inactive_list; | ||
486 | |||
487 | 431 | ||
488 | spin_lock(&mz->lru_lock); | 432 | spin_lock(&mz->lru_lock); |
489 | scan = 0; | 433 | scan = 0; |
490 | list_for_each_entry_safe_reverse(pc, tmp, src, lru) { | 434 | list_for_each_entry_safe_reverse(pc, tmp, src, lru) { |
491 | if (scan >= nr_to_scan) | 435 | if (scan >= nr_to_scan) |
492 | break; | 436 | break; |
437 | if (unlikely(!PageCgroupUsed(pc))) | ||
438 | continue; | ||
493 | page = pc->page; | 439 | page = pc->page; |
494 | 440 | ||
495 | if (unlikely(!PageLRU(page))) | 441 | if (unlikely(!PageLRU(page))) |
496 | continue; | 442 | continue; |
497 | 443 | ||
498 | if (PageActive(page) && !active) { | 444 | /* |
499 | __mem_cgroup_move_lists(pc, true); | 445 | * TODO: play better with lumpy reclaim, grabbing anything. |
500 | continue; | 446 | */ |
501 | } | 447 | if (PageUnevictable(page) || |
502 | if (!PageActive(page) && active) { | 448 | (PageActive(page) && !active) || |
503 | __mem_cgroup_move_lists(pc, false); | 449 | (!PageActive(page) && active)) { |
450 | __mem_cgroup_move_lists(pc, page_lru(page)); | ||
504 | continue; | 451 | continue; |
505 | } | 452 | } |
506 | 453 | ||
507 | scan++; | 454 | scan++; |
508 | list_move(&pc->lru, &pc_list); | 455 | list_move(&pc->lru, &pc_list); |
509 | 456 | ||
510 | if (__isolate_lru_page(page, mode) == 0) { | 457 | if (__isolate_lru_page(page, mode, file) == 0) { |
511 | list_move(&page->lru, dst); | 458 | list_move(&page->lru, dst); |
512 | nr_taken++; | 459 | nr_taken++; |
513 | } | 460 | } |
@@ -532,23 +479,29 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
532 | { | 479 | { |
533 | struct mem_cgroup *mem; | 480 | struct mem_cgroup *mem; |
534 | struct page_cgroup *pc; | 481 | struct page_cgroup *pc; |
535 | unsigned long flags; | ||
536 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 482 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
537 | struct mem_cgroup_per_zone *mz; | 483 | struct mem_cgroup_per_zone *mz; |
484 | unsigned long flags; | ||
538 | 485 | ||
539 | pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask); | 486 | pc = lookup_page_cgroup(page); |
540 | if (unlikely(pc == NULL)) | 487 | /* can happen at boot */ |
541 | goto err; | 488 | if (unlikely(!pc)) |
542 | 489 | return 0; | |
490 | prefetchw(pc); | ||
543 | /* | 491 | /* |
544 | * We always charge the cgroup the mm_struct belongs to. | 492 | * We always charge the cgroup the mm_struct belongs to. |
545 | * The mm_struct's mem_cgroup changes on task migration if the | 493 | * The mm_struct's mem_cgroup changes on task migration if the |
546 | * thread group leader migrates. It's possible that mm is not | 494 | * thread group leader migrates. It's possible that mm is not |
547 | * set, if so charge the init_mm (happens for pagecache usage). | 495 | * set, if so charge the init_mm (happens for pagecache usage). |
548 | */ | 496 | */ |
497 | |||
549 | if (likely(!memcg)) { | 498 | if (likely(!memcg)) { |
550 | rcu_read_lock(); | 499 | rcu_read_lock(); |
551 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | 500 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); |
501 | if (unlikely(!mem)) { | ||
502 | rcu_read_unlock(); | ||
503 | return 0; | ||
504 | } | ||
552 | /* | 505 | /* |
553 | * For every charge from the cgroup, increment reference count | 506 | * For every charge from the cgroup, increment reference count |
554 | */ | 507 | */ |
@@ -559,7 +512,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
559 | css_get(&memcg->css); | 512 | css_get(&memcg->css); |
560 | } | 513 | } |
561 | 514 | ||
562 | while (res_counter_charge(&mem->res, PAGE_SIZE)) { | 515 | while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) { |
563 | if (!(gfp_mask & __GFP_WAIT)) | 516 | if (!(gfp_mask & __GFP_WAIT)) |
564 | goto out; | 517 | goto out; |
565 | 518 | ||
@@ -582,39 +535,33 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
582 | } | 535 | } |
583 | } | 536 | } |
584 | 537 | ||
585 | pc->mem_cgroup = mem; | ||
586 | pc->page = page; | ||
587 | /* | ||
588 | * If a page is accounted as a page cache, insert to inactive list. | ||
589 | * If anon, insert to active list. | ||
590 | */ | ||
591 | if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) | ||
592 | pc->flags = PAGE_CGROUP_FLAG_CACHE; | ||
593 | else | ||
594 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; | ||
595 | 538 | ||
596 | lock_page_cgroup(page); | 539 | lock_page_cgroup(pc); |
597 | if (unlikely(page_get_page_cgroup(page))) { | 540 | if (unlikely(PageCgroupUsed(pc))) { |
598 | unlock_page_cgroup(page); | 541 | unlock_page_cgroup(pc); |
599 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 542 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
600 | css_put(&mem->css); | 543 | css_put(&mem->css); |
601 | kmem_cache_free(page_cgroup_cache, pc); | 544 | |
602 | goto done; | 545 | goto done; |
603 | } | 546 | } |
604 | page_assign_page_cgroup(page, pc); | 547 | pc->mem_cgroup = mem; |
548 | /* | ||
549 | * If a page is accounted as a page cache, insert to inactive list. | ||
550 | * If anon, insert to active list. | ||
551 | */ | ||
552 | pc->flags = pcg_default_flags[ctype]; | ||
605 | 553 | ||
606 | mz = page_cgroup_zoneinfo(pc); | 554 | mz = page_cgroup_zoneinfo(pc); |
555 | |||
607 | spin_lock_irqsave(&mz->lru_lock, flags); | 556 | spin_lock_irqsave(&mz->lru_lock, flags); |
608 | __mem_cgroup_add_list(mz, pc); | 557 | __mem_cgroup_add_list(mz, pc); |
609 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 558 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
559 | unlock_page_cgroup(pc); | ||
610 | 560 | ||
611 | unlock_page_cgroup(page); | ||
612 | done: | 561 | done: |
613 | return 0; | 562 | return 0; |
614 | out: | 563 | out: |
615 | css_put(&mem->css); | 564 | css_put(&mem->css); |
616 | kmem_cache_free(page_cgroup_cache, pc); | ||
617 | err: | ||
618 | return -ENOMEM; | 565 | return -ENOMEM; |
619 | } | 566 | } |
620 | 567 | ||
@@ -622,7 +569,8 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) | |||
622 | { | 569 | { |
623 | if (mem_cgroup_subsys.disabled) | 570 | if (mem_cgroup_subsys.disabled) |
624 | return 0; | 571 | return 0; |
625 | 572 | if (PageCompound(page)) | |
573 | return 0; | ||
626 | /* | 574 | /* |
627 | * If already mapped, we don't have to account. | 575 | * If already mapped, we don't have to account. |
628 | * If page cache, page->mapping has address_space. | 576 | * If page cache, page->mapping has address_space. |
@@ -643,7 +591,8 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
643 | { | 591 | { |
644 | if (mem_cgroup_subsys.disabled) | 592 | if (mem_cgroup_subsys.disabled) |
645 | return 0; | 593 | return 0; |
646 | 594 | if (PageCompound(page)) | |
595 | return 0; | ||
647 | /* | 596 | /* |
648 | * Corner case handling. This is called from add_to_page_cache() | 597 | * Corner case handling. This is called from add_to_page_cache() |
649 | * in usual. But some FS (shmem) precharges this page before calling it | 598 | * in usual. But some FS (shmem) precharges this page before calling it |
@@ -656,22 +605,27 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
656 | if (!(gfp_mask & __GFP_WAIT)) { | 605 | if (!(gfp_mask & __GFP_WAIT)) { |
657 | struct page_cgroup *pc; | 606 | struct page_cgroup *pc; |
658 | 607 | ||
659 | lock_page_cgroup(page); | 608 | |
660 | pc = page_get_page_cgroup(page); | 609 | pc = lookup_page_cgroup(page); |
661 | if (pc) { | 610 | if (!pc) |
662 | VM_BUG_ON(pc->page != page); | 611 | return 0; |
663 | VM_BUG_ON(!pc->mem_cgroup); | 612 | lock_page_cgroup(pc); |
664 | unlock_page_cgroup(page); | 613 | if (PageCgroupUsed(pc)) { |
614 | unlock_page_cgroup(pc); | ||
665 | return 0; | 615 | return 0; |
666 | } | 616 | } |
667 | unlock_page_cgroup(page); | 617 | unlock_page_cgroup(pc); |
668 | } | 618 | } |
669 | 619 | ||
670 | if (unlikely(!mm)) | 620 | if (unlikely(!mm)) |
671 | mm = &init_mm; | 621 | mm = &init_mm; |
672 | 622 | ||
673 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 623 | if (page_is_file_cache(page)) |
624 | return mem_cgroup_charge_common(page, mm, gfp_mask, | ||
674 | MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); | 625 | MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); |
626 | else | ||
627 | return mem_cgroup_charge_common(page, mm, gfp_mask, | ||
628 | MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); | ||
675 | } | 629 | } |
676 | 630 | ||
677 | /* | 631 | /* |
@@ -691,44 +645,46 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
691 | /* | 645 | /* |
692 | * Check if our page_cgroup is valid | 646 | * Check if our page_cgroup is valid |
693 | */ | 647 | */ |
694 | lock_page_cgroup(page); | 648 | pc = lookup_page_cgroup(page); |
695 | pc = page_get_page_cgroup(page); | 649 | if (unlikely(!pc || !PageCgroupUsed(pc))) |
696 | if (unlikely(!pc)) | 650 | return; |
697 | goto unlock; | ||
698 | |||
699 | VM_BUG_ON(pc->page != page); | ||
700 | 651 | ||
701 | if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) | 652 | lock_page_cgroup(pc); |
702 | && ((pc->flags & PAGE_CGROUP_FLAG_CACHE) | 653 | if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED && page_mapped(page)) |
703 | || page_mapped(page))) | 654 | || !PageCgroupUsed(pc)) { |
704 | goto unlock; | 655 | /* This happens at race in zap_pte_range() and do_swap_page()*/ |
656 | unlock_page_cgroup(pc); | ||
657 | return; | ||
658 | } | ||
659 | ClearPageCgroupUsed(pc); | ||
660 | mem = pc->mem_cgroup; | ||
705 | 661 | ||
706 | mz = page_cgroup_zoneinfo(pc); | 662 | mz = page_cgroup_zoneinfo(pc); |
707 | spin_lock_irqsave(&mz->lru_lock, flags); | 663 | spin_lock_irqsave(&mz->lru_lock, flags); |
708 | __mem_cgroup_remove_list(mz, pc); | 664 | __mem_cgroup_remove_list(mz, pc); |
709 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 665 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
666 | unlock_page_cgroup(pc); | ||
710 | 667 | ||
711 | page_assign_page_cgroup(page, NULL); | ||
712 | unlock_page_cgroup(page); | ||
713 | |||
714 | mem = pc->mem_cgroup; | ||
715 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 668 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
716 | css_put(&mem->css); | 669 | css_put(&mem->css); |
717 | 670 | ||
718 | kmem_cache_free(page_cgroup_cache, pc); | ||
719 | return; | 671 | return; |
720 | unlock: | ||
721 | unlock_page_cgroup(page); | ||
722 | } | 672 | } |
723 | 673 | ||
724 | void mem_cgroup_uncharge_page(struct page *page) | 674 | void mem_cgroup_uncharge_page(struct page *page) |
725 | { | 675 | { |
676 | /* early check. */ | ||
677 | if (page_mapped(page)) | ||
678 | return; | ||
679 | if (page->mapping && !PageAnon(page)) | ||
680 | return; | ||
726 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED); | 681 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED); |
727 | } | 682 | } |
728 | 683 | ||
729 | void mem_cgroup_uncharge_cache_page(struct page *page) | 684 | void mem_cgroup_uncharge_cache_page(struct page *page) |
730 | { | 685 | { |
731 | VM_BUG_ON(page_mapped(page)); | 686 | VM_BUG_ON(page_mapped(page)); |
687 | VM_BUG_ON(page->mapping); | ||
732 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); | 688 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); |
733 | } | 689 | } |
734 | 690 | ||
@@ -745,15 +701,19 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage) | |||
745 | if (mem_cgroup_subsys.disabled) | 701 | if (mem_cgroup_subsys.disabled) |
746 | return 0; | 702 | return 0; |
747 | 703 | ||
748 | lock_page_cgroup(page); | 704 | pc = lookup_page_cgroup(page); |
749 | pc = page_get_page_cgroup(page); | 705 | lock_page_cgroup(pc); |
750 | if (pc) { | 706 | if (PageCgroupUsed(pc)) { |
751 | mem = pc->mem_cgroup; | 707 | mem = pc->mem_cgroup; |
752 | css_get(&mem->css); | 708 | css_get(&mem->css); |
753 | if (pc->flags & PAGE_CGROUP_FLAG_CACHE) | 709 | if (PageCgroupCache(pc)) { |
754 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | 710 | if (page_is_file_cache(page)) |
711 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | ||
712 | else | ||
713 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; | ||
714 | } | ||
755 | } | 715 | } |
756 | unlock_page_cgroup(page); | 716 | unlock_page_cgroup(pc); |
757 | if (mem) { | 717 | if (mem) { |
758 | ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL, | 718 | ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL, |
759 | ctype, mem); | 719 | ctype, mem); |
@@ -778,7 +738,7 @@ void mem_cgroup_end_migration(struct page *newpage) | |||
778 | */ | 738 | */ |
779 | if (!newpage->mapping) | 739 | if (!newpage->mapping) |
780 | __mem_cgroup_uncharge_common(newpage, | 740 | __mem_cgroup_uncharge_common(newpage, |
781 | MEM_CGROUP_CHARGE_TYPE_FORCE); | 741 | MEM_CGROUP_CHARGE_TYPE_FORCE); |
782 | else if (PageAnon(newpage)) | 742 | else if (PageAnon(newpage)) |
783 | mem_cgroup_uncharge_page(newpage); | 743 | mem_cgroup_uncharge_page(newpage); |
784 | } | 744 | } |
@@ -801,11 +761,16 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) | |||
801 | 761 | ||
802 | rcu_read_lock(); | 762 | rcu_read_lock(); |
803 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | 763 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); |
764 | if (unlikely(!mem)) { | ||
765 | rcu_read_unlock(); | ||
766 | return 0; | ||
767 | } | ||
804 | css_get(&mem->css); | 768 | css_get(&mem->css); |
805 | rcu_read_unlock(); | 769 | rcu_read_unlock(); |
806 | 770 | ||
807 | do { | 771 | do { |
808 | progress = try_to_free_mem_cgroup_pages(mem, gfp_mask); | 772 | progress = try_to_free_mem_cgroup_pages(mem, gfp_mask); |
773 | progress += res_counter_check_under_limit(&mem->res); | ||
809 | } while (!progress && --retry); | 774 | } while (!progress && --retry); |
810 | 775 | ||
811 | css_put(&mem->css); | 776 | css_put(&mem->css); |
@@ -845,7 +810,7 @@ int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val) | |||
845 | #define FORCE_UNCHARGE_BATCH (128) | 810 | #define FORCE_UNCHARGE_BATCH (128) |
846 | static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, | 811 | static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, |
847 | struct mem_cgroup_per_zone *mz, | 812 | struct mem_cgroup_per_zone *mz, |
848 | int active) | 813 | enum lru_list lru) |
849 | { | 814 | { |
850 | struct page_cgroup *pc; | 815 | struct page_cgroup *pc; |
851 | struct page *page; | 816 | struct page *page; |
@@ -853,15 +818,14 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |||
853 | unsigned long flags; | 818 | unsigned long flags; |
854 | struct list_head *list; | 819 | struct list_head *list; |
855 | 820 | ||
856 | if (active) | 821 | list = &mz->lists[lru]; |
857 | list = &mz->active_list; | ||
858 | else | ||
859 | list = &mz->inactive_list; | ||
860 | 822 | ||
861 | spin_lock_irqsave(&mz->lru_lock, flags); | 823 | spin_lock_irqsave(&mz->lru_lock, flags); |
862 | while (!list_empty(list)) { | 824 | while (!list_empty(list)) { |
863 | pc = list_entry(list->prev, struct page_cgroup, lru); | 825 | pc = list_entry(list->prev, struct page_cgroup, lru); |
864 | page = pc->page; | 826 | page = pc->page; |
827 | if (!PageCgroupUsed(pc)) | ||
828 | break; | ||
865 | get_page(page); | 829 | get_page(page); |
866 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 830 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
867 | /* | 831 | /* |
@@ -876,8 +840,10 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |||
876 | count = FORCE_UNCHARGE_BATCH; | 840 | count = FORCE_UNCHARGE_BATCH; |
877 | cond_resched(); | 841 | cond_resched(); |
878 | } | 842 | } |
879 | } else | 843 | } else { |
880 | cond_resched(); | 844 | spin_lock_irqsave(&mz->lru_lock, flags); |
845 | break; | ||
846 | } | ||
881 | spin_lock_irqsave(&mz->lru_lock, flags); | 847 | spin_lock_irqsave(&mz->lru_lock, flags); |
882 | } | 848 | } |
883 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 849 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
@@ -901,15 +867,17 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem) | |||
901 | while (mem->res.usage > 0) { | 867 | while (mem->res.usage > 0) { |
902 | if (atomic_read(&mem->css.cgroup->count) > 0) | 868 | if (atomic_read(&mem->css.cgroup->count) > 0) |
903 | goto out; | 869 | goto out; |
870 | /* This is for making all *used* pages to be on LRU. */ | ||
871 | lru_add_drain_all(); | ||
904 | for_each_node_state(node, N_POSSIBLE) | 872 | for_each_node_state(node, N_POSSIBLE) |
905 | for (zid = 0; zid < MAX_NR_ZONES; zid++) { | 873 | for (zid = 0; zid < MAX_NR_ZONES; zid++) { |
906 | struct mem_cgroup_per_zone *mz; | 874 | struct mem_cgroup_per_zone *mz; |
875 | enum lru_list l; | ||
907 | mz = mem_cgroup_zoneinfo(mem, node, zid); | 876 | mz = mem_cgroup_zoneinfo(mem, node, zid); |
908 | /* drop all page_cgroup in active_list */ | 877 | for_each_lru(l) |
909 | mem_cgroup_force_empty_list(mem, mz, 1); | 878 | mem_cgroup_force_empty_list(mem, mz, l); |
910 | /* drop all page_cgroup in inactive_list */ | ||
911 | mem_cgroup_force_empty_list(mem, mz, 0); | ||
912 | } | 879 | } |
880 | cond_resched(); | ||
913 | } | 881 | } |
914 | ret = 0; | 882 | ret = 0; |
915 | out: | 883 | out: |
@@ -994,14 +962,27 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, | |||
994 | } | 962 | } |
995 | /* showing # of active pages */ | 963 | /* showing # of active pages */ |
996 | { | 964 | { |
997 | unsigned long active, inactive; | 965 | unsigned long active_anon, inactive_anon; |
998 | 966 | unsigned long active_file, inactive_file; | |
999 | inactive = mem_cgroup_get_all_zonestat(mem_cont, | 967 | unsigned long unevictable; |
1000 | MEM_CGROUP_ZSTAT_INACTIVE); | 968 | |
1001 | active = mem_cgroup_get_all_zonestat(mem_cont, | 969 | inactive_anon = mem_cgroup_get_all_zonestat(mem_cont, |
1002 | MEM_CGROUP_ZSTAT_ACTIVE); | 970 | LRU_INACTIVE_ANON); |
1003 | cb->fill(cb, "active", (active) * PAGE_SIZE); | 971 | active_anon = mem_cgroup_get_all_zonestat(mem_cont, |
1004 | cb->fill(cb, "inactive", (inactive) * PAGE_SIZE); | 972 | LRU_ACTIVE_ANON); |
973 | inactive_file = mem_cgroup_get_all_zonestat(mem_cont, | ||
974 | LRU_INACTIVE_FILE); | ||
975 | active_file = mem_cgroup_get_all_zonestat(mem_cont, | ||
976 | LRU_ACTIVE_FILE); | ||
977 | unevictable = mem_cgroup_get_all_zonestat(mem_cont, | ||
978 | LRU_UNEVICTABLE); | ||
979 | |||
980 | cb->fill(cb, "active_anon", (active_anon) * PAGE_SIZE); | ||
981 | cb->fill(cb, "inactive_anon", (inactive_anon) * PAGE_SIZE); | ||
982 | cb->fill(cb, "active_file", (active_file) * PAGE_SIZE); | ||
983 | cb->fill(cb, "inactive_file", (inactive_file) * PAGE_SIZE); | ||
984 | cb->fill(cb, "unevictable", unevictable * PAGE_SIZE); | ||
985 | |||
1005 | } | 986 | } |
1006 | return 0; | 987 | return 0; |
1007 | } | 988 | } |
@@ -1044,6 +1025,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) | |||
1044 | { | 1025 | { |
1045 | struct mem_cgroup_per_node *pn; | 1026 | struct mem_cgroup_per_node *pn; |
1046 | struct mem_cgroup_per_zone *mz; | 1027 | struct mem_cgroup_per_zone *mz; |
1028 | enum lru_list l; | ||
1047 | int zone, tmp = node; | 1029 | int zone, tmp = node; |
1048 | /* | 1030 | /* |
1049 | * This routine is called against possible nodes. | 1031 | * This routine is called against possible nodes. |
@@ -1064,9 +1046,9 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) | |||
1064 | 1046 | ||
1065 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | 1047 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { |
1066 | mz = &pn->zoneinfo[zone]; | 1048 | mz = &pn->zoneinfo[zone]; |
1067 | INIT_LIST_HEAD(&mz->active_list); | ||
1068 | INIT_LIST_HEAD(&mz->inactive_list); | ||
1069 | spin_lock_init(&mz->lru_lock); | 1049 | spin_lock_init(&mz->lru_lock); |
1050 | for_each_lru(l) | ||
1051 | INIT_LIST_HEAD(&mz->lists[l]); | ||
1070 | } | 1052 | } |
1071 | return 0; | 1053 | return 0; |
1072 | } | 1054 | } |
@@ -1107,7 +1089,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
1107 | 1089 | ||
1108 | if (unlikely((cont->parent) == NULL)) { | 1090 | if (unlikely((cont->parent) == NULL)) { |
1109 | mem = &init_mem_cgroup; | 1091 | mem = &init_mem_cgroup; |
1110 | page_cgroup_cache = KMEM_CACHE(page_cgroup, SLAB_PANIC); | ||
1111 | } else { | 1092 | } else { |
1112 | mem = mem_cgroup_alloc(); | 1093 | mem = mem_cgroup_alloc(); |
1113 | if (!mem) | 1094 | if (!mem) |